aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2018-02-16 09:47:26 -0500
committerThomas Gleixner <tglx@linutronix.de>2018-02-16 09:47:26 -0500
commit6dee6ae9d62642e81def4d461d71f13a6496ab59 (patch)
tree6c75d416c427a59f190e197ad83fe59b7bebf656 /net
parent1beaeacdc88b537703d04d5536235d0bbb36db93 (diff)
parent0b24a0bbe2147815d982d9335c41bb10c04f40bc (diff)
Merge tag 'irqchip-4.16-2' of git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms into irq/urgent
Pull irqchip updates for 4.16-rc2 from Marc Zyngier - A MIPS GIC fix for spurious, masked interrupts - A fix for a subtle IPI bug in GICv3 - Do not probe GICv3 ITSs that are marked as disabled - Multi-MSI support for GICv2m - Various cleanups
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlanproc.c2
-rw-r--r--net/9p/trans_fd.c82
-rw-r--r--net/Kconfig19
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/aarp.c1
-rw-r--r--net/appletalk/atalk_proc.c3
-rw-r--r--net/atm/br2684.c1
-rw-r--r--net/atm/common.c14
-rw-r--r--net/atm/common.h2
-rw-r--r--net/atm/lec.c1
-rw-r--r--net/atm/mpc.c9
-rw-r--r--net/atm/mpoa_caches.c48
-rw-r--r--net/atm/mpoa_caches.h9
-rw-r--r--net/atm/mpoa_proc.c16
-rw-r--r--net/atm/proc.c1
-rw-r--r--net/ax25/af_ax25.c1
-rw-r--r--net/ax25/ax25_route.c1
-rw-r--r--net/ax25/ax25_uid.c1
-rw-r--r--net/batman-adv/Kconfig17
-rw-r--r--net/batman-adv/Makefile2
-rw-r--r--net/batman-adv/bat_algo.c35
-rw-r--r--net/batman-adv/bat_algo.h1
-rw-r--r--net/batman-adv/bat_iv_ogm.c107
-rw-r--r--net/batman-adv/bat_iv_ogm.h1
-rw-r--r--net/batman-adv/bat_v.c51
-rw-r--r--net/batman-adv/bat_v.h1
-rw-r--r--net/batman-adv/bat_v_elp.c29
-rw-r--r--net/batman-adv/bat_v_elp.h1
-rw-r--r--net/batman-adv/bat_v_ogm.c39
-rw-r--r--net/batman-adv/bat_v_ogm.h1
-rw-r--r--net/batman-adv/bitarray.c3
-rw-r--r--net/batman-adv/bitarray.h10
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c114
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h5
-rw-r--r--net/batman-adv/debugfs.c30
-rw-r--r--net/batman-adv/debugfs.h1
-rw-r--r--net/batman-adv/distributed-arp-table.c80
-rw-r--r--net/batman-adv/distributed-arp-table.h9
-rw-r--r--net/batman-adv/fragmentation.c25
-rw-r--r--net/batman-adv/fragmentation.h3
-rw-r--r--net/batman-adv/gateway_client.c67
-rw-r--r--net/batman-adv/gateway_client.h1
-rw-r--r--net/batman-adv/gateway_common.c30
-rw-r--r--net/batman-adv/gateway_common.h6
-rw-r--r--net/batman-adv/hard-interface.c67
-rw-r--r--net/batman-adv/hard-interface.h59
-rw-r--r--net/batman-adv/hash.c20
-rw-r--r--net/batman-adv/hash.h28
-rw-r--r--net/batman-adv/icmp_socket.c21
-rw-r--r--net/batman-adv/icmp_socket.h1
-rw-r--r--net/batman-adv/log.c23
-rw-r--r--net/batman-adv/log.h62
-rw-r--r--net/batman-adv/main.c54
-rw-r--r--net/batman-adv/main.h127
-rw-r--r--net/batman-adv/multicast.c83
-rw-r--r--net/batman-adv/multicast.h17
-rw-r--r--net/batman-adv/netlink.c27
-rw-r--r--net/batman-adv/netlink.h1
-rw-r--r--net/batman-adv/network-coding.c126
-rw-r--r--net/batman-adv/network-coding.h1
-rw-r--r--net/batman-adv/originator.c154
-rw-r--r--net/batman-adv/originator.h47
-rw-r--r--net/batman-adv/packet.h621
-rw-r--r--net/batman-adv/routing.c56
-rw-r--r--net/batman-adv/routing.h1
-rw-r--r--net/batman-adv/send.c66
-rw-r--r--net/batman-adv/send.h8
-rw-r--r--net/batman-adv/soft-interface.c66
-rw-r--r--net/batman-adv/soft-interface.h1
-rw-r--r--net/batman-adv/sysfs.c58
-rw-r--r--net/batman-adv/sysfs.h14
-rw-r--r--net/batman-adv/tp_meter.c77
-rw-r--r--net/batman-adv/tp_meter.h1
-rw-r--r--net/batman-adv/translation-table.c234
-rw-r--r--net/batman-adv/translation-table.h1
-rw-r--r--net/batman-adv/tvlv.c43
-rw-r--r--net/batman-adv/tvlv.h1
-rw-r--r--net/batman-adv/types.h1996
-rw-r--r--net/bluetooth/af_bluetooth.c60
-rw-r--r--net/bluetooth/cmtp/capi.c1
-rw-r--r--net/bluetooth/hci_debugfs.c210
-rw-r--r--net/bluetooth/hci_request.c64
-rw-r--r--net/bluetooth/hidp/core.c2
-rw-r--r--net/bpf/test_run.c4
-rw-r--r--net/bridge/br_device.c10
-rw-r--r--net/bridge/br_fdb.c394
-rw-r--r--net/bridge/br_mdb.c6
-rw-r--r--net/bridge/br_netfilter_hooks.c2
-rw-r--r--net/bridge/br_nf_core.c1
-rw-r--r--net/bridge/br_private.h18
-rw-r--r--net/bridge/br_switchdev.c8
-rw-r--r--net/bridge/br_sysfs_br.c13
-rw-r--r--net/bridge/netfilter/Kconfig2
-rw-r--r--net/bridge/netfilter/ebtables.c2
-rw-r--r--net/bridge/netfilter/nf_tables_bridge.c120
-rw-r--r--net/caif/caif_socket.c18
-rw-r--r--net/caif/cfctrl.c50
-rw-r--r--net/caif/cfpkt_skbuff.c1
-rw-r--r--net/caif/chnl_net.c1
-rw-r--r--net/can/Kconfig2
-rw-r--r--net/can/af_can.c20
-rw-r--r--net/can/af_can.h2
-rw-r--r--net/can/bcm.c1
-rw-r--r--net/can/gw.c14
-rw-r--r--net/can/proc.c14
-rw-r--r--net/can/raw.c2
-rw-r--r--net/ceph/ceph_common.c4
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/datagram.c22
-rw-r--r--net/core/dev.c260
-rw-r--r--net/core/dev_ioctl.c132
-rw-r--r--net/core/devlink.c596
-rw-r--r--net/core/dst.c14
-rw-r--r--net/core/ethtool.c67
-rw-r--r--net/core/filter.c359
-rw-r--r--net/core/flow_dissector.c69
-rw-r--r--net/core/gen_estimator.c4
-rw-r--r--net/core/gen_stats.c9
-rw-r--r--net/core/link_watch.c2
-rw-r--r--net/core/neighbour.c1
-rw-r--r--net/core/net-procfs.c4
-rw-r--r--net/core/net-sysfs.c56
-rw-r--r--net/core/net_namespace.c83
-rw-r--r--net/core/pktgen.c281
-rw-r--r--net/core/rtnetlink.c527
-rw-r--r--net/core/skbuff.c81
-rw-r--r--net/core/sock.c77
-rw-r--r--net/core/sock_reuseport.c39
-rw-r--r--net/core/stream.c4
-rw-r--r--net/core/sysctl_net_core.c60
-rw-r--r--net/core/xdp.c73
-rw-r--r--net/dccp/Kconfig17
-rw-r--r--net/dccp/Makefile5
-rw-r--r--net/dccp/ackvec.c2
-rw-r--r--net/dccp/dccp.h2
-rw-r--r--net/dccp/minisocks.c7
-rw-r--r--net/dccp/probe.c203
-rw-r--r--net/dccp/proto.c23
-rw-r--r--net/dccp/trace.h84
-rw-r--r--net/decnet/af_decnet.c7
-rw-r--r--net/decnet/dn_dev.c10
-rw-r--r--net/decnet/dn_fib.c6
-rw-r--r--net/decnet/dn_neigh.c1
-rw-r--r--net/decnet/dn_route.c43
-rw-r--r--net/dns_resolver/dns_query.c22
-rw-r--r--net/dsa/Kconfig9
-rw-r--r--net/dsa/Makefile3
-rw-r--r--net/dsa/dsa2.c9
-rw-r--r--net/dsa/dsa_priv.h13
-rw-r--r--net/dsa/legacy.c24
-rw-r--r--net/dsa/port.c103
-rw-r--r--net/dsa/slave.c25
-rw-r--r--net/dsa/switch.c111
-rw-r--r--net/dsa/tag_brcm.c12
-rw-r--r--net/dsa/tag_mtk.c38
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c47
-rw-r--r--net/ipv4/arp.c1
-rw-r--r--net/ipv4/devinet.c61
-rw-r--r--net/ipv4/esp4.c36
-rw-r--r--net/ipv4/esp4_offload.c75
-rw-r--r--net/ipv4/fib_frontend.c8
-rw-r--r--net/ipv4/fib_trie.c3
-rw-r--r--net/ipv4/igmp.c6
-rw-r--r--net/ipv4/inet_connection_sock.c9
-rw-r--r--net/ipv4/inet_diag.c8
-rw-r--r--net/ipv4/inet_hashtables.c188
-rw-r--r--net/ipv4/inet_timewait_sock.c31
-rw-r--r--net/ipv4/ip_gre.c165
-rw-r--r--net/ipv4/ip_sockglue.c20
-rw-r--r--net/ipv4/ip_tunnel.c13
-rw-r--r--net/ipv4/ipconfig.c48
-rw-r--r--net/ipv4/ipmr.c4
-rw-r--r--net/ipv4/netfilter.c62
-rw-r--r--net/ipv4/netfilter/Kconfig12
-rw-r--r--net/ipv4/netfilter/Makefile9
-rw-r--r--net/ipv4/netfilter/arp_tables.c34
-rw-r--r--net/ipv4/netfilter/ip_tables.c34
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c17
-rw-r--r--net/ipv4/netfilter/iptable_filter.c6
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c5
-rw-r--r--net/ipv4/netfilter/iptable_nat.c4
-rw-r--r--net/ipv4/netfilter/iptable_raw.c37
-rw-r--r--net/ipv4/netfilter/iptable_security.c6
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c13
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c4
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c285
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c10
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.asn1177
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c1286
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic_main.c235
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c62
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c83
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nft_chain_route_ipv4.c8
-rw-r--r--net/ipv4/proc.c3
-rw-r--r--net/ipv4/raw.c18
-rw-r--r--net/ipv4/route.c5
-rw-r--r--net/ipv4/tcp.c111
-rw-r--r--net/ipv4/tcp_bbr.c9
-rw-r--r--net/ipv4/tcp_diag.c2
-rw-r--r--net/ipv4/tcp_fastopen.c30
-rw-r--r--net/ipv4/tcp_input.c60
-rw-r--r--net/ipv4/tcp_ipv4.c8
-rw-r--r--net/ipv4/tcp_metrics.c7
-rw-r--r--net/ipv4/tcp_minisocks.c7
-rw-r--r--net/ipv4/tcp_nv.c4
-rw-r--r--net/ipv4/tcp_output.c18
-rw-r--r--net/ipv4/tcp_probe.c301
-rw-r--r--net/ipv4/tcp_timer.c24
-rw-r--r--net/ipv4/tcp_ulp.c59
-rw-r--r--net/ipv4/udp.c72
-rw-r--r--net/ipv4/udplite.c1
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c7
-rw-r--r--net/ipv6/addrconf.c90
-rw-r--r--net/ipv6/addrlabel.c25
-rw-r--r--net/ipv6/af_inet6.c11
-rw-r--r--net/ipv6/anycast.c1
-rw-r--r--net/ipv6/datagram.c3
-rw-r--r--net/ipv6/esp6.c36
-rw-r--r--net/ipv6/esp6_offload.c82
-rw-r--r--net/ipv6/ila/ila_xlat.c4
-rw-r--r--net/ipv6/inet6_hashtables.c77
-rw-r--r--net/ipv6/ip6_fib.c66
-rw-r--r--net/ipv6/ip6_flowlabel.c1
-rw-r--r--net/ipv6/ip6_gre.c631
-rw-r--r--net/ipv6/ip6_output.c15
-rw-r--r--net/ipv6/ip6_tunnel.c5
-rw-r--r--net/ipv6/ip6_vti.c20
-rw-r--r--net/ipv6/ip6mr.c12
-rw-r--r--net/ipv6/ipv6_sockglue.c17
-rw-r--r--net/ipv6/mcast.c12
-rw-r--r--net/ipv6/ndisc.c5
-rw-r--r--net/ipv6/netfilter.c44
-rw-r--r--net/ipv6/netfilter/Kconfig17
-rw-r--r--net/ipv6/netfilter/Makefile4
-rw-r--r--net/ipv6/netfilter/ip6_tables.c34
-rw-r--r--net/ipv6/netfilter/ip6t_srh.c161
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c8
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c4
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c31
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c25
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c4
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c16
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c3
-rw-r--r--net/ipv6/netfilter/nf_flow_table_ipv6.c278
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c8
-rw-r--r--net/ipv6/netfilter/nf_tables_ipv6.c82
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c12
-rw-r--r--net/ipv6/proc.c3
-rw-r--r--net/ipv6/raw.c3
-rw-r--r--net/ipv6/route.c569
-rw-r--r--net/ipv6/seg6.c4
-rw-r--r--net/ipv6/seg6_local.c2
-rw-r--r--net/ipv6/tcp_ipv6.c9
-rw-r--r--net/ipv6/udp.c55
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c7
-rw-r--r--net/ipv6/xfrm6_policy.c2
-rw-r--r--net/ipx/Kconfig60
-rw-r--r--net/ipx/Makefile8
-rw-r--r--net/ipx/af_ipx.c2084
-rw-r--r--net/ipx/ipx_proc.c341
-rw-r--r--net/ipx/ipx_route.c293
-rw-r--r--net/ipx/pe2.c36
-rw-r--r--net/ipx/sysctl_net_ipx.c40
-rw-r--r--net/iucv/af_iucv.c24
-rw-r--r--net/kcm/kcmproc.c2
-rw-r--r--net/kcm/kcmsock.c6
-rw-r--r--net/l2tp/l2tp_core.c54
-rw-r--r--net/l2tp/l2tp_core.h16
-rw-r--r--net/l2tp/l2tp_debugfs.c4
-rw-r--r--net/l2tp/l2tp_netlink.c39
-rw-r--r--net/l2tp/l2tp_ppp.c1
-rw-r--r--net/llc/llc_proc.c2
-rw-r--r--net/mac80211/agg-rx.c26
-rw-r--r--net/mac80211/agg-tx.c34
-rw-r--r--net/mac80211/cfg.c31
-rw-r--r--net/mac80211/debugfs.c1
-rw-r--r--net/mac80211/debugfs_sta.c4
-rw-r--r--net/mac80211/driver-ops.h3
-rw-r--r--net/mac80211/ht.c1
-rw-r--r--net/mac80211/ieee80211_i.h4
-rw-r--r--net/mac80211/iface.c4
-rw-r--r--net/mac80211/key.c12
-rw-r--r--net/mac80211/main.c3
-rw-r--r--net/mac80211/mesh.c2
-rw-r--r--net/mac80211/mesh_hwmp.c1
-rw-r--r--net/mac80211/mesh_pathtbl.c34
-rw-r--r--net/mac80211/mesh_plink.c2
-rw-r--r--net/mac80211/mlme.c10
-rw-r--r--net/mac80211/offchannel.c4
-rw-r--r--net/mac80211/rx.c17
-rw-r--r--net/mac80211/tdls.c6
-rw-r--r--net/mac80211/tx.c4
-rw-r--r--net/mac80211/util.c19
-rw-r--r--net/mac80211/wme.c1
-rw-r--r--net/mac80211/wpa.c16
-rw-r--r--net/mpls/af_mpls.c39
-rw-r--r--net/ncsi/ncsi-aen.c35
-rw-r--r--net/netfilter/Kconfig35
-rw-r--r--net/netfilter/Makefile9
-rw-r--r--net/netfilter/core.c263
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h10
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c8
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c8
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c8
-rw-r--r--net/netfilter/ipset/ip_set_core.c36
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h38
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c26
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c9
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c9
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c28
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c19
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c35
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c21
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c9
-rw-r--r--net/netfilter/nf_conncount.c373
-rw-r--r--net/netfilter/nf_conntrack_core.c28
-rw-r--r--net/netfilter/nf_conntrack_expect.c1
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c40
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c77
-rw-r--r--net/netfilter/nf_conntrack_netlink.c22
-rw-r--r--net/netfilter/nf_conntrack_proto.c18
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c21
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c21
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c25
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c10
-rw-r--r--net/netfilter/nf_conntrack_standalone.c14
-rw-r--r--net/netfilter/nf_flow_table.c453
-rw-r--r--net/netfilter/nf_flow_table_inet.c49
-rw-r--r--net/netfilter/nf_internals.h2
-rw-r--r--net/netfilter/nf_log.c1
-rw-r--r--net/netfilter/nf_queue.c96
-rw-r--r--net/netfilter/nf_synproxy_core.c1
-rw-r--r--net/netfilter/nf_tables_api.c1653
-rw-r--r--net/netfilter/nf_tables_inet.c88
-rw-r--r--net/netfilter/nf_tables_netdev.c87
-rw-r--r--net/netfilter/nfnetlink.c4
-rw-r--r--net/netfilter/nfnetlink_acct.c2
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c2
-rw-r--r--net/netfilter/nfnetlink_log.c1
-rw-r--r--net/netfilter/nfnetlink_queue.c10
-rw-r--r--net/netfilter/nft_cmp.c2
-rw-r--r--net/netfilter/nft_compat.c26
-rw-r--r--net/netfilter/nft_ct.c16
-rw-r--r--net/netfilter/nft_dynset.c4
-rw-r--r--net/netfilter/nft_flow_offload.c242
-rw-r--r--net/netfilter/nft_log.c4
-rw-r--r--net/netfilter/nft_masq.c2
-rw-r--r--net/netfilter/nft_meta.c45
-rw-r--r--net/netfilter/nft_nat.c2
-rw-r--r--net/netfilter/nft_redir.c2
-rw-r--r--net/netfilter/nft_rt.c15
-rw-r--r--net/netfilter/nft_set_hash.c10
-rw-r--r--net/netfilter/utils.c90
-rw-r--r--net/netfilter/x_tables.c57
-rw-r--r--net/netfilter/xt_IDLETIMER.c1
-rw-r--r--net/netfilter/xt_LED.c1
-rw-r--r--net/netfilter/xt_RATEEST.c22
-rw-r--r--net/netfilter/xt_TCPMSS.c5
-rw-r--r--net/netfilter/xt_addrtype.c15
-rw-r--r--net/netfilter/xt_cgroup.c1
-rw-r--r--net/netfilter/xt_connlimit.c369
-rw-r--r--net/netfilter/xt_hashlimit.c5
-rw-r--r--net/netfilter/xt_ipcomp.c2
-rw-r--r--net/netfilter/xt_limit.c3
-rw-r--r--net/netfilter/xt_nfacct.c1
-rw-r--r--net/netfilter/xt_policy.c3
-rw-r--r--net/netfilter/xt_set.c119
-rw-r--r--net/netfilter/xt_statistic.c1
-rw-r--r--net/netlink/af_netlink.c72
-rw-r--r--net/netlink/diag.c8
-rw-r--r--net/netlink/genetlink.c12
-rw-r--r--net/netrom/af_netrom.c1
-rw-r--r--net/netrom/nr_route.c2
-rw-r--r--net/nfc/llcp_sock.c22
-rw-r--r--net/nfc/nci/uart.c2
-rw-r--r--net/openvswitch/conntrack.c38
-rw-r--r--net/openvswitch/flow.c6
-rw-r--r--net/openvswitch/flow_netlink.c56
-rw-r--r--net/openvswitch/meter.c2
-rw-r--r--net/openvswitch/vport-internal_dev.c10
-rw-r--r--net/packet/af_packet.c12
-rw-r--r--net/phonet/pn_netlink.c21
-rw-r--r--net/phonet/socket.c16
-rw-r--r--net/qrtr/qrtr.c8
-rw-r--r--net/rds/af_rds.c20
-rw-r--r--net/rds/bind.c1
-rw-r--r--net/rds/cong.c10
-rw-r--r--net/rds/connection.c32
-rw-r--r--net/rds/ib.c26
-rw-r--r--net/rds/ib_cm.c1
-rw-r--r--net/rds/rds.h17
-rw-r--r--net/rds/send.c37
-rw-r--r--net/rds/tcp.c113
-rw-r--r--net/rds/tcp.h1
-rw-r--r--net/rds/tcp_connect.c2
-rw-r--r--net/rds/tcp_recv.c8
-rw-r--r--net/rds/tcp_send.c5
-rw-r--r--net/rds/threads.c20
-rw-r--r--net/rfkill/core.c6
-rw-r--r--net/rose/af_rose.c1
-rw-r--r--net/rose/rose_route.c3
-rw-r--r--net/rxrpc/af_rxrpc.c8
-rw-r--r--net/rxrpc/conn_client.c3
-rw-r--r--net/rxrpc/conn_event.c1
-rw-r--r--net/rxrpc/conn_object.c16
-rw-r--r--net/rxrpc/proc.c2
-rw-r--r--net/rxrpc/rxkad.c92
-rw-r--r--net/sched/Kconfig3
-rw-r--r--net/sched/act_api.c74
-rw-r--r--net/sched/act_bpf.c10
-rw-r--r--net/sched/act_connmark.c8
-rw-r--r--net/sched/act_csum.c74
-rw-r--r--net/sched/act_gact.c8
-rw-r--r--net/sched/act_ife.c18
-rw-r--r--net/sched/act_ipt.c18
-rw-r--r--net/sched/act_mirred.c27
-rw-r--r--net/sched/act_nat.c8
-rw-r--r--net/sched/act_pedit.c10
-rw-r--r--net/sched/act_police.c12
-rw-r--r--net/sched/act_sample.c10
-rw-r--r--net/sched/act_simple.c10
-rw-r--r--net/sched/act_skbedit.c8
-rw-r--r--net/sched/act_skbmod.c10
-rw-r--r--net/sched/act_tunnel_key.c10
-rw-r--r--net/sched/act_vlan.c10
-rw-r--r--net/sched/cls_api.c695
-rw-r--r--net/sched/cls_basic.c49
-rw-r--r--net/sched/cls_bpf.c88
-rw-r--r--net/sched/cls_cgroup.c12
-rw-r--r--net/sched/cls_flow.c12
-rw-r--r--net/sched/cls_flower.c88
-rw-r--r--net/sched/cls_fw.c19
-rw-r--r--net/sched/cls_matchall.c35
-rw-r--r--net/sched/cls_route.c16
-rw-r--r--net/sched/cls_rsvp.h9
-rw-r--r--net/sched/cls_tcindex.c17
-rw-r--r--net/sched/cls_u32.c212
-rw-r--r--net/sched/sch_api.c305
-rw-r--r--net/sched/sch_atm.c23
-rw-r--r--net/sched/sch_cbq.c78
-rw-r--r--net/sched/sch_cbs.c31
-rw-r--r--net/sched/sch_choke.c8
-rw-r--r--net/sched/sch_codel.c8
-rw-r--r--net/sched/sch_drr.c40
-rw-r--r--net/sched/sch_dsmark.c19
-rw-r--r--net/sched/sch_fifo.c11
-rw-r--r--net/sched/sch_fq.c8
-rw-r--r--net/sched/sch_fq_codel.c13
-rw-r--r--net/sched/sch_generic.c545
-rw-r--r--net/sched/sch_gred.c13
-rw-r--r--net/sched/sch_hfsc.c28
-rw-r--r--net/sched/sch_hhf.c8
-rw-r--r--net/sched/sch_htb.c29
-rw-r--r--net/sched/sch_ingress.c99
-rw-r--r--net/sched/sch_mq.c42
-rw-r--r--net/sched/sch_mqprio.c76
-rw-r--r--net/sched/sch_multiq.c19
-rw-r--r--net/sched/sch_netem.c12
-rw-r--r--net/sched/sch_pie.c8
-rw-r--r--net/sched/sch_plug.c6
-rw-r--r--net/sched/sch_prio.c81
-rw-r--r--net/sched/sch_qfq.c22
-rw-r--r--net/sched/sch_red.c65
-rw-r--r--net/sched/sch_sfb.c20
-rw-r--r--net/sched/sch_sfq.c8
-rw-r--r--net/sched/sch_tbf.c31
-rw-r--r--net/sched/sch_teql.c3
-rw-r--r--net/sctp/Kconfig12
-rw-r--r--net/sctp/Makefile5
-rw-r--r--net/sctp/associola.c2
-rw-r--r--net/sctp/chunk.c8
-rw-r--r--net/sctp/endpointola.c2
-rw-r--r--net/sctp/ipv6.c10
-rw-r--r--net/sctp/output.c5
-rw-r--r--net/sctp/outqueue.c12
-rw-r--r--net/sctp/probe.c244
-rw-r--r--net/sctp/proc.c7
-rw-r--r--net/sctp/protocol.c10
-rw-r--r--net/sctp/sm_make_chunk.c79
-rw-r--r--net/sctp/sm_sideeffect.c51
-rw-r--r--net/sctp/sm_statefuns.c50
-rw-r--r--net/sctp/sm_statetable.c5
-rw-r--r--net/sctp/socket.c253
-rw-r--r--net/sctp/stream.c47
-rw-r--r--net/sctp/stream_interleave.c1334
-rw-r--r--net/sctp/stream_sched.c3
-rw-r--r--net/sctp/sysctl.c7
-rw-r--r--net/sctp/ulpevent.c15
-rw-r--r--net/sctp/ulpqueue.c23
-rw-r--r--net/smc/af_smc.c247
-rw-r--r--net/smc/smc.h5
-rw-r--r--net/smc/smc_cdc.c52
-rw-r--r--net/smc/smc_cdc.h1
-rw-r--r--net/smc/smc_clc.c102
-rw-r--r--net/smc/smc_clc.h34
-rw-r--r--net/smc/smc_close.c208
-rw-r--r--net/smc/smc_close.h2
-rw-r--r--net/smc/smc_core.c17
-rw-r--r--net/smc/smc_diag.c6
-rw-r--r--net/smc/smc_ib.c38
-rw-r--r--net/smc/smc_rx.c9
-rw-r--r--net/smc/smc_tx.c36
-rw-r--r--net/smc/smc_wr.c50
-rw-r--r--net/smc/smc_wr.h2
-rw-r--r--net/socket.c307
-rw-r--r--net/sunrpc/cache.c12
-rw-r--r--net/sunrpc/clnt.c16
-rw-r--r--net/sunrpc/rpc_pipe.c8
-rw-r--r--net/sunrpc/sched.c42
-rw-r--r--net/sunrpc/svcsock.c18
-rw-r--r--net/sunrpc/xprt.c5
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c78
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c157
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c329
-rw-r--r--net/sunrpc/xprtrdma/module.c12
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c164
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c5
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c9
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c12
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c6
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c25
-rw-r--r--net/sunrpc/xprtrdma/transport.c128
-rw-r--r--net/sunrpc/xprtrdma/verbs.c288
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h116
-rw-r--r--net/sunrpc/xprtsock.c63
-rw-r--r--net/tipc/bcast.c12
-rw-r--r--net/tipc/core.h1
-rw-r--r--net/tipc/group.c371
-rw-r--r--net/tipc/group.h10
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/msg.c55
-rw-r--r--net/tipc/msg.h3
-rw-r--r--net/tipc/name_table.c57
-rw-r--r--net/tipc/name_table.h9
-rw-r--r--net/tipc/server.c80
-rw-r--r--net/tipc/server.h13
-rw-r--r--net/tipc/socket.c138
-rw-r--r--net/tipc/subscr.c35
-rw-r--r--net/tipc/subscr.h2
-rw-r--r--net/tls/tls_main.c2
-rw-r--r--net/tls/tls_sw.c8
-rw-r--r--net/unix/af_unix.c56
-rw-r--r--net/vmw_vsock/af_vsock.c34
-rw-r--r--net/wireless/ibss.c5
-rw-r--r--net/wireless/mlme.c6
-rw-r--r--net/wireless/nl80211.c77
-rw-r--r--net/wireless/scan.c5
-rw-r--r--net/wireless/trace.h12
-rw-r--r--net/wireless/wext-core.c13
-rw-r--r--net/wireless/wext-proc.c1
-rw-r--r--net/xfrm/xfrm_device.c199
-rw-r--r--net/xfrm/xfrm_input.c1
-rw-r--r--net/xfrm/xfrm_output.c2
-rw-r--r--net/xfrm/xfrm_policy.c137
-rw-r--r--net/xfrm/xfrm_proc.c1
-rw-r--r--net/xfrm/xfrm_replay.c5
-rw-r--r--net/xfrm/xfrm_state.c7
571 files changed, 19005 insertions, 15246 deletions
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 5f1446c9f098..a662ccc166df 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -80,7 +80,6 @@ static int vlan_seq_open(struct inode *inode, struct file *file)
80} 80}
81 81
82static const struct file_operations vlan_fops = { 82static const struct file_operations vlan_fops = {
83 .owner = THIS_MODULE,
84 .open = vlan_seq_open, 83 .open = vlan_seq_open,
85 .read = seq_read, 84 .read = seq_read,
86 .llseek = seq_lseek, 85 .llseek = seq_lseek,
@@ -97,7 +96,6 @@ static int vlandev_seq_open(struct inode *inode, struct file *file)
97} 96}
98 97
99static const struct file_operations vlandev_fops = { 98static const struct file_operations vlandev_fops = {
100 .owner = THIS_MODULE,
101 .open = vlandev_seq_open, 99 .open = vlandev_seq_open,
102 .read = seq_read, 100 .read = seq_read,
103 .llseek = seq_lseek, 101 .llseek = seq_lseek,
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 80f5c79053a4..0cfba919d167 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -228,33 +228,32 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
228 } 228 }
229} 229}
230 230
231static int 231static __poll_t
232p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt) 232p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
233{ 233{
234 int ret, n; 234 __poll_t ret, n;
235 struct p9_trans_fd *ts = NULL; 235 struct p9_trans_fd *ts = NULL;
236 236
237 if (client && client->status == Connected) 237 if (client && client->status == Connected)
238 ts = client->trans; 238 ts = client->trans;
239 239
240 if (!ts) 240 if (!ts) {
241 return -EREMOTEIO; 241 if (err)
242 *err = -EREMOTEIO;
243 return EPOLLERR;
244 }
242 245
243 if (!ts->rd->f_op->poll) 246 if (!ts->rd->f_op->poll)
244 return -EIO; 247 ret = DEFAULT_POLLMASK;
245 248 else
246 if (!ts->wr->f_op->poll) 249 ret = ts->rd->f_op->poll(ts->rd, pt);
247 return -EIO;
248
249 ret = ts->rd->f_op->poll(ts->rd, pt);
250 if (ret < 0)
251 return ret;
252 250
253 if (ts->rd != ts->wr) { 251 if (ts->rd != ts->wr) {
254 n = ts->wr->f_op->poll(ts->wr, pt); 252 if (!ts->wr->f_op->poll)
255 if (n < 0) 253 n = DEFAULT_POLLMASK;
256 return n; 254 else
257 ret = (ret & ~POLLOUT) | (n & ~POLLIN); 255 n = ts->wr->f_op->poll(ts->wr, pt);
256 ret = (ret & ~EPOLLOUT) | (n & ~EPOLLIN);
258 } 257 }
259 258
260 return ret; 259 return ret;
@@ -298,7 +297,8 @@ static int p9_fd_read(struct p9_client *client, void *v, int len)
298 297
299static void p9_read_work(struct work_struct *work) 298static void p9_read_work(struct work_struct *work)
300{ 299{
301 int n, err; 300 __poll_t n;
301 int err;
302 struct p9_conn *m; 302 struct p9_conn *m;
303 int status = REQ_STATUS_ERROR; 303 int status = REQ_STATUS_ERROR;
304 304
@@ -396,11 +396,11 @@ end_clear:
396 396
397 if (!list_empty(&m->req_list)) { 397 if (!list_empty(&m->req_list)) {
398 if (test_and_clear_bit(Rpending, &m->wsched)) 398 if (test_and_clear_bit(Rpending, &m->wsched))
399 n = POLLIN; 399 n = EPOLLIN;
400 else 400 else
401 n = p9_fd_poll(m->client, NULL); 401 n = p9_fd_poll(m->client, NULL, NULL);
402 402
403 if ((n & POLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) { 403 if ((n & EPOLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) {
404 p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); 404 p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
405 schedule_work(&m->rq); 405 schedule_work(&m->rq);
406 } 406 }
@@ -448,7 +448,8 @@ static int p9_fd_write(struct p9_client *client, void *v, int len)
448 448
449static void p9_write_work(struct work_struct *work) 449static void p9_write_work(struct work_struct *work)
450{ 450{
451 int n, err; 451 __poll_t n;
452 int err;
452 struct p9_conn *m; 453 struct p9_conn *m;
453 struct p9_req_t *req; 454 struct p9_req_t *req;
454 455
@@ -504,11 +505,11 @@ end_clear:
504 505
505 if (m->wsize || !list_empty(&m->unsent_req_list)) { 506 if (m->wsize || !list_empty(&m->unsent_req_list)) {
506 if (test_and_clear_bit(Wpending, &m->wsched)) 507 if (test_and_clear_bit(Wpending, &m->wsched))
507 n = POLLOUT; 508 n = EPOLLOUT;
508 else 509 else
509 n = p9_fd_poll(m->client, NULL); 510 n = p9_fd_poll(m->client, NULL, NULL);
510 511
511 if ((n & POLLOUT) && 512 if ((n & EPOLLOUT) &&
512 !test_and_set_bit(Wworksched, &m->wsched)) { 513 !test_and_set_bit(Wworksched, &m->wsched)) {
513 p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); 514 p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m);
514 schedule_work(&m->wq); 515 schedule_work(&m->wq);
@@ -581,7 +582,7 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
581 582
582static void p9_conn_create(struct p9_client *client) 583static void p9_conn_create(struct p9_client *client)
583{ 584{
584 int n; 585 __poll_t n;
585 struct p9_trans_fd *ts = client->trans; 586 struct p9_trans_fd *ts = client->trans;
586 struct p9_conn *m = &ts->conn; 587 struct p9_conn *m = &ts->conn;
587 588
@@ -597,13 +598,13 @@ static void p9_conn_create(struct p9_client *client)
597 INIT_LIST_HEAD(&m->poll_pending_link); 598 INIT_LIST_HEAD(&m->poll_pending_link);
598 init_poll_funcptr(&m->pt, p9_pollwait); 599 init_poll_funcptr(&m->pt, p9_pollwait);
599 600
600 n = p9_fd_poll(client, &m->pt); 601 n = p9_fd_poll(client, &m->pt, NULL);
601 if (n & POLLIN) { 602 if (n & EPOLLIN) {
602 p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); 603 p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
603 set_bit(Rpending, &m->wsched); 604 set_bit(Rpending, &m->wsched);
604 } 605 }
605 606
606 if (n & POLLOUT) { 607 if (n & EPOLLOUT) {
607 p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); 608 p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
608 set_bit(Wpending, &m->wsched); 609 set_bit(Wpending, &m->wsched);
609 } 610 }
@@ -617,20 +618,19 @@ static void p9_conn_create(struct p9_client *client)
617 618
618static void p9_poll_mux(struct p9_conn *m) 619static void p9_poll_mux(struct p9_conn *m)
619{ 620{
620 int n; 621 __poll_t n;
622 int err = -ECONNRESET;
621 623
622 if (m->err < 0) 624 if (m->err < 0)
623 return; 625 return;
624 626
625 n = p9_fd_poll(m->client, NULL); 627 n = p9_fd_poll(m->client, NULL, &err);
626 if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { 628 if (n & (EPOLLERR | EPOLLHUP | EPOLLNVAL)) {
627 p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); 629 p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n);
628 if (n >= 0) 630 p9_conn_cancel(m, err);
629 n = -ECONNRESET;
630 p9_conn_cancel(m, n);
631 } 631 }
632 632
633 if (n & POLLIN) { 633 if (n & EPOLLIN) {
634 set_bit(Rpending, &m->wsched); 634 set_bit(Rpending, &m->wsched);
635 p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); 635 p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
636 if (!test_and_set_bit(Rworksched, &m->wsched)) { 636 if (!test_and_set_bit(Rworksched, &m->wsched)) {
@@ -639,7 +639,7 @@ static void p9_poll_mux(struct p9_conn *m)
639 } 639 }
640 } 640 }
641 641
642 if (n & POLLOUT) { 642 if (n & EPOLLOUT) {
643 set_bit(Wpending, &m->wsched); 643 set_bit(Wpending, &m->wsched);
644 p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); 644 p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
645 if ((m->wsize || !list_empty(&m->unsent_req_list)) && 645 if ((m->wsize || !list_empty(&m->unsent_req_list)) &&
@@ -663,7 +663,7 @@ static void p9_poll_mux(struct p9_conn *m)
663 663
664static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) 664static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
665{ 665{
666 int n; 666 __poll_t n;
667 struct p9_trans_fd *ts = client->trans; 667 struct p9_trans_fd *ts = client->trans;
668 struct p9_conn *m = &ts->conn; 668 struct p9_conn *m = &ts->conn;
669 669
@@ -678,11 +678,11 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
678 spin_unlock(&client->lock); 678 spin_unlock(&client->lock);
679 679
680 if (test_and_clear_bit(Wpending, &m->wsched)) 680 if (test_and_clear_bit(Wpending, &m->wsched))
681 n = POLLOUT; 681 n = EPOLLOUT;
682 else 682 else
683 n = p9_fd_poll(m->client, NULL); 683 n = p9_fd_poll(m->client, NULL, NULL);
684 684
685 if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) 685 if (n & EPOLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
686 schedule_work(&m->wq); 686 schedule_work(&m->wq);
687 687
688 return 0; 688 return 0;
diff --git a/net/Kconfig b/net/Kconfig
index 9dba2715919d..0428f12c25c2 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -182,6 +182,7 @@ config BRIDGE_NETFILTER
182 depends on BRIDGE 182 depends on BRIDGE
183 depends on NETFILTER && INET 183 depends on NETFILTER && INET
184 depends on NETFILTER_ADVANCED 184 depends on NETFILTER_ADVANCED
185 select NETFILTER_FAMILY_BRIDGE
185 default m 186 default m
186 ---help--- 187 ---help---
187 Enabling this option will let arptables resp. iptables see bridged 188 Enabling this option will let arptables resp. iptables see bridged
@@ -212,7 +213,6 @@ source "net/dsa/Kconfig"
212source "net/8021q/Kconfig" 213source "net/8021q/Kconfig"
213source "net/decnet/Kconfig" 214source "net/decnet/Kconfig"
214source "net/llc/Kconfig" 215source "net/llc/Kconfig"
215source "net/ipx/Kconfig"
216source "drivers/net/appletalk/Kconfig" 216source "drivers/net/appletalk/Kconfig"
217source "net/x25/Kconfig" 217source "net/x25/Kconfig"
218source "net/lapb/Kconfig" 218source "net/lapb/Kconfig"
@@ -336,23 +336,6 @@ config NET_PKTGEN
336 To compile this code as a module, choose M here: the 336 To compile this code as a module, choose M here: the
337 module will be called pktgen. 337 module will be called pktgen.
338 338
339config NET_TCPPROBE
340 tristate "TCP connection probing"
341 depends on INET && PROC_FS && KPROBES
342 ---help---
343 This module allows for capturing the changes to TCP connection
344 state in response to incoming packets. It is used for debugging
345 TCP congestion avoidance modules. If you don't understand
346 what was just said, you don't need it: say N.
347
348 Documentation on how to use TCP connection probing can be found
349 at:
350
351 http://www.linuxfoundation.org/collaborate/workgroups/networking/tcpprobe
352
353 To compile this code as a module, choose M here: the
354 module will be called tcp_probe.
355
356config NET_DROP_MONITOR 339config NET_DROP_MONITOR
357 tristate "Network packet drop alerting service" 340 tristate "Network packet drop alerting service"
358 depends on INET && TRACEPOINTS 341 depends on INET && TRACEPOINTS
diff --git a/net/Makefile b/net/Makefile
index 14fede520840..a6147c61b174 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -24,7 +24,6 @@ obj-$(CONFIG_PACKET) += packet/
24obj-$(CONFIG_NET_KEY) += key/ 24obj-$(CONFIG_NET_KEY) += key/
25obj-$(CONFIG_BRIDGE) += bridge/ 25obj-$(CONFIG_BRIDGE) += bridge/
26obj-$(CONFIG_NET_DSA) += dsa/ 26obj-$(CONFIG_NET_DSA) += dsa/
27obj-$(CONFIG_IPX) += ipx/
28obj-$(CONFIG_ATALK) += appletalk/ 27obj-$(CONFIG_ATALK) += appletalk/
29obj-$(CONFIG_X25) += x25/ 28obj-$(CONFIG_X25) += x25/
30obj-$(CONFIG_LAPB) += lapb/ 29obj-$(CONFIG_LAPB) += lapb/
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 309d7dbb36e8..d4c1021e74e1 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -1047,7 +1047,6 @@ static int aarp_seq_open(struct inode *inode, struct file *file)
1047} 1047}
1048 1048
1049const struct file_operations atalk_seq_arp_fops = { 1049const struct file_operations atalk_seq_arp_fops = {
1050 .owner = THIS_MODULE,
1051 .open = aarp_seq_open, 1050 .open = aarp_seq_open,
1052 .read = seq_read, 1051 .read = seq_read,
1053 .llseek = seq_lseek, 1052 .llseek = seq_lseek,
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index af46bc49e1e9..a3bf9d519193 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -226,7 +226,6 @@ static int atalk_seq_socket_open(struct inode *inode, struct file *file)
226} 226}
227 227
228static const struct file_operations atalk_seq_interface_fops = { 228static const struct file_operations atalk_seq_interface_fops = {
229 .owner = THIS_MODULE,
230 .open = atalk_seq_interface_open, 229 .open = atalk_seq_interface_open,
231 .read = seq_read, 230 .read = seq_read,
232 .llseek = seq_lseek, 231 .llseek = seq_lseek,
@@ -234,7 +233,6 @@ static const struct file_operations atalk_seq_interface_fops = {
234}; 233};
235 234
236static const struct file_operations atalk_seq_route_fops = { 235static const struct file_operations atalk_seq_route_fops = {
237 .owner = THIS_MODULE,
238 .open = atalk_seq_route_open, 236 .open = atalk_seq_route_open,
239 .read = seq_read, 237 .read = seq_read,
240 .llseek = seq_lseek, 238 .llseek = seq_lseek,
@@ -242,7 +240,6 @@ static const struct file_operations atalk_seq_route_fops = {
242}; 240};
243 241
244static const struct file_operations atalk_seq_socket_fops = { 242static const struct file_operations atalk_seq_socket_fops = {
245 .owner = THIS_MODULE,
246 .open = atalk_seq_socket_open, 243 .open = atalk_seq_socket_open,
247 .read = seq_read, 244 .read = seq_read,
248 .llseek = seq_lseek, 245 .llseek = seq_lseek,
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 4e111196f902..fd94bea36ee8 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -824,7 +824,6 @@ static int br2684_proc_open(struct inode *inode, struct file *file)
824} 824}
825 825
826static const struct file_operations br2684_proc_ops = { 826static const struct file_operations br2684_proc_ops = {
827 .owner = THIS_MODULE,
828 .open = br2684_proc_open, 827 .open = br2684_proc_open,
829 .read = seq_read, 828 .read = seq_read,
830 .llseek = seq_lseek, 829 .llseek = seq_lseek,
diff --git a/net/atm/common.c b/net/atm/common.c
index 8a4f99114cd2..fc78a0508ae1 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -14,7 +14,7 @@
14#include <linux/capability.h> 14#include <linux/capability.h>
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/sched/signal.h> 16#include <linux/sched/signal.h>
17#include <linux/time.h> /* struct timeval */ 17#include <linux/time64.h> /* 64-bit time for seconds */
18#include <linux/skbuff.h> 18#include <linux/skbuff.h>
19#include <linux/bitops.h> 19#include <linux/bitops.h>
20#include <linux/init.h> 20#include <linux/init.h>
@@ -648,11 +648,11 @@ out:
648 return error; 648 return error;
649} 649}
650 650
651unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait) 651__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
652{ 652{
653 struct sock *sk = sock->sk; 653 struct sock *sk = sock->sk;
654 struct atm_vcc *vcc; 654 struct atm_vcc *vcc;
655 unsigned int mask; 655 __poll_t mask;
656 656
657 sock_poll_wait(file, sk_sleep(sk), wait); 657 sock_poll_wait(file, sk_sleep(sk), wait);
658 mask = 0; 658 mask = 0;
@@ -661,15 +661,15 @@ unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
661 661
662 /* exceptional events */ 662 /* exceptional events */
663 if (sk->sk_err) 663 if (sk->sk_err)
664 mask = POLLERR; 664 mask = EPOLLERR;
665 665
666 if (test_bit(ATM_VF_RELEASED, &vcc->flags) || 666 if (test_bit(ATM_VF_RELEASED, &vcc->flags) ||
667 test_bit(ATM_VF_CLOSE, &vcc->flags)) 667 test_bit(ATM_VF_CLOSE, &vcc->flags))
668 mask |= POLLHUP; 668 mask |= EPOLLHUP;
669 669
670 /* readable? */ 670 /* readable? */
671 if (!skb_queue_empty(&sk->sk_receive_queue)) 671 if (!skb_queue_empty(&sk->sk_receive_queue))
672 mask |= POLLIN | POLLRDNORM; 672 mask |= EPOLLIN | EPOLLRDNORM;
673 673
674 /* writable? */ 674 /* writable? */
675 if (sock->state == SS_CONNECTING && 675 if (sock->state == SS_CONNECTING &&
@@ -678,7 +678,7 @@ unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
678 678
679 if (vcc->qos.txtp.traffic_class != ATM_NONE && 679 if (vcc->qos.txtp.traffic_class != ATM_NONE &&
680 vcc_writable(sk)) 680 vcc_writable(sk))
681 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 681 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
682 682
683 return mask; 683 return mask;
684} 684}
diff --git a/net/atm/common.h b/net/atm/common.h
index d9d583712a91..5850649068bb 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -17,7 +17,7 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci);
17int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 17int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
18 int flags); 18 int flags);
19int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len); 19int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len);
20unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait); 20__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
21int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 21int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
22int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 22int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
23int vcc_setsockopt(struct socket *sock, int level, int optname, 23int vcc_setsockopt(struct socket *sock, int level, int optname,
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 6676e3433261..09a1f056712a 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -992,7 +992,6 @@ static int lec_seq_open(struct inode *inode, struct file *file)
992} 992}
993 993
994static const struct file_operations lec_seq_fops = { 994static const struct file_operations lec_seq_fops = {
995 .owner = THIS_MODULE,
996 .open = lec_seq_open, 995 .open = lec_seq_open,
997 .read = seq_read, 996 .read = seq_read,
998 .llseek = seq_lseek, 997 .llseek = seq_lseek,
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 7c6a1cc760a2..31e0dcb970f8 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -1089,7 +1089,7 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc)
1089 msg->type = SND_MPOA_RES_RQST; 1089 msg->type = SND_MPOA_RES_RQST;
1090 msg->content.in_info = entry->ctrl_info; 1090 msg->content.in_info = entry->ctrl_info;
1091 msg_to_mpoad(msg, mpc); 1091 msg_to_mpoad(msg, mpc);
1092 do_gettimeofday(&(entry->reply_wait)); 1092 entry->reply_wait = ktime_get_seconds();
1093 mpc->in_ops->put(entry); 1093 mpc->in_ops->put(entry);
1094 return; 1094 return;
1095 } 1095 }
@@ -1099,7 +1099,7 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc)
1099 msg->type = SND_MPOA_RES_RQST; 1099 msg->type = SND_MPOA_RES_RQST;
1100 msg->content.in_info = entry->ctrl_info; 1100 msg->content.in_info = entry->ctrl_info;
1101 msg_to_mpoad(msg, mpc); 1101 msg_to_mpoad(msg, mpc);
1102 do_gettimeofday(&(entry->reply_wait)); 1102 entry->reply_wait = ktime_get_seconds();
1103 mpc->in_ops->put(entry); 1103 mpc->in_ops->put(entry);
1104 return; 1104 return;
1105 } 1105 }
@@ -1175,8 +1175,9 @@ static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc)
1175 } 1175 }
1176 1176
1177 entry->ctrl_info = msg->content.in_info; 1177 entry->ctrl_info = msg->content.in_info;
1178 do_gettimeofday(&(entry->tv)); 1178 entry->time = ktime_get_seconds();
1179 do_gettimeofday(&(entry->reply_wait)); /* Used in refreshing func from now on */ 1179 /* Used in refreshing func from now on */
1180 entry->reply_wait = ktime_get_seconds();
1180 entry->refresh_time = 0; 1181 entry->refresh_time = 0;
1181 ddprintk_cont("entry->shortcut = %p\n", entry->shortcut); 1182 ddprintk_cont("entry->shortcut = %p\n", entry->shortcut);
1182 1183
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index e01450bb32d6..4bb418313720 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -117,7 +117,7 @@ static in_cache_entry *in_cache_add_entry(__be32 dst_ip,
117 117
118 memcpy(entry->MPS_ctrl_ATM_addr, client->mps_ctrl_addr, ATM_ESA_LEN); 118 memcpy(entry->MPS_ctrl_ATM_addr, client->mps_ctrl_addr, ATM_ESA_LEN);
119 entry->ctrl_info.in_dst_ip = dst_ip; 119 entry->ctrl_info.in_dst_ip = dst_ip;
120 do_gettimeofday(&(entry->tv)); 120 entry->time = ktime_get_seconds();
121 entry->retry_time = client->parameters.mpc_p4; 121 entry->retry_time = client->parameters.mpc_p4;
122 entry->count = 1; 122 entry->count = 1;
123 entry->entry_state = INGRESS_INVALID; 123 entry->entry_state = INGRESS_INVALID;
@@ -148,7 +148,7 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc)
148 if (qos != NULL) 148 if (qos != NULL)
149 msg.qos = qos->qos; 149 msg.qos = qos->qos;
150 msg_to_mpoad(&msg, mpc); 150 msg_to_mpoad(&msg, mpc);
151 do_gettimeofday(&(entry->reply_wait)); 151 entry->reply_wait = ktime_get_seconds();
152 entry->entry_state = INGRESS_RESOLVING; 152 entry->entry_state = INGRESS_RESOLVING;
153 } 153 }
154 if (entry->shortcut != NULL) 154 if (entry->shortcut != NULL)
@@ -171,7 +171,7 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc)
171 if (qos != NULL) 171 if (qos != NULL)
172 msg.qos = qos->qos; 172 msg.qos = qos->qos;
173 msg_to_mpoad(&msg, mpc); 173 msg_to_mpoad(&msg, mpc);
174 do_gettimeofday(&(entry->reply_wait)); 174 entry->reply_wait = ktime_get_seconds();
175 } 175 }
176 176
177 return CLOSED; 177 return CLOSED;
@@ -227,17 +227,16 @@ static void in_cache_remove_entry(in_cache_entry *entry,
227static void clear_count_and_expired(struct mpoa_client *client) 227static void clear_count_and_expired(struct mpoa_client *client)
228{ 228{
229 in_cache_entry *entry, *next_entry; 229 in_cache_entry *entry, *next_entry;
230 struct timeval now; 230 time64_t now;
231 231
232 do_gettimeofday(&now); 232 now = ktime_get_seconds();
233 233
234 write_lock_bh(&client->ingress_lock); 234 write_lock_bh(&client->ingress_lock);
235 entry = client->in_cache; 235 entry = client->in_cache;
236 while (entry != NULL) { 236 while (entry != NULL) {
237 entry->count = 0; 237 entry->count = 0;
238 next_entry = entry->next; 238 next_entry = entry->next;
239 if ((now.tv_sec - entry->tv.tv_sec) 239 if ((now - entry->time) > entry->ctrl_info.holding_time) {
240 > entry->ctrl_info.holding_time) {
241 dprintk("holding time expired, ip = %pI4\n", 240 dprintk("holding time expired, ip = %pI4\n",
242 &entry->ctrl_info.in_dst_ip); 241 &entry->ctrl_info.in_dst_ip);
243 client->in_ops->remove_entry(entry, client); 242 client->in_ops->remove_entry(entry, client);
@@ -253,35 +252,35 @@ static void check_resolving_entries(struct mpoa_client *client)
253 252
254 struct atm_mpoa_qos *qos; 253 struct atm_mpoa_qos *qos;
255 in_cache_entry *entry; 254 in_cache_entry *entry;
256 struct timeval now; 255 time64_t now;
257 struct k_message msg; 256 struct k_message msg;
258 257
259 do_gettimeofday(&now); 258 now = ktime_get_seconds();
260 259
261 read_lock_bh(&client->ingress_lock); 260 read_lock_bh(&client->ingress_lock);
262 entry = client->in_cache; 261 entry = client->in_cache;
263 while (entry != NULL) { 262 while (entry != NULL) {
264 if (entry->entry_state == INGRESS_RESOLVING) { 263 if (entry->entry_state == INGRESS_RESOLVING) {
265 if ((now.tv_sec - entry->hold_down.tv_sec) < 264
266 client->parameters.mpc_p6) { 265 if ((now - entry->hold_down)
266 < client->parameters.mpc_p6) {
267 entry = entry->next; /* Entry in hold down */ 267 entry = entry->next; /* Entry in hold down */
268 continue; 268 continue;
269 } 269 }
270 if ((now.tv_sec - entry->reply_wait.tv_sec) > 270 if ((now - entry->reply_wait) > entry->retry_time) {
271 entry->retry_time) {
272 entry->retry_time = MPC_C1 * (entry->retry_time); 271 entry->retry_time = MPC_C1 * (entry->retry_time);
273 /* 272 /*
274 * Retry time maximum exceeded, 273 * Retry time maximum exceeded,
275 * put entry in hold down. 274 * put entry in hold down.
276 */ 275 */
277 if (entry->retry_time > client->parameters.mpc_p5) { 276 if (entry->retry_time > client->parameters.mpc_p5) {
278 do_gettimeofday(&(entry->hold_down)); 277 entry->hold_down = ktime_get_seconds();
279 entry->retry_time = client->parameters.mpc_p4; 278 entry->retry_time = client->parameters.mpc_p4;
280 entry = entry->next; 279 entry = entry->next;
281 continue; 280 continue;
282 } 281 }
283 /* Ask daemon to send a resolution request. */ 282 /* Ask daemon to send a resolution request. */
284 memset(&(entry->hold_down), 0, sizeof(struct timeval)); 283 memset(&entry->hold_down, 0, sizeof(time64_t));
285 msg.type = SND_MPOA_RES_RTRY; 284 msg.type = SND_MPOA_RES_RTRY;
286 memcpy(msg.MPS_ctrl, client->mps_ctrl_addr, ATM_ESA_LEN); 285 memcpy(msg.MPS_ctrl, client->mps_ctrl_addr, ATM_ESA_LEN);
287 msg.content.in_info = entry->ctrl_info; 286 msg.content.in_info = entry->ctrl_info;
@@ -289,7 +288,7 @@ static void check_resolving_entries(struct mpoa_client *client)
289 if (qos != NULL) 288 if (qos != NULL)
290 msg.qos = qos->qos; 289 msg.qos = qos->qos;
291 msg_to_mpoad(&msg, client); 290 msg_to_mpoad(&msg, client);
292 do_gettimeofday(&(entry->reply_wait)); 291 entry->reply_wait = ktime_get_seconds();
293 } 292 }
294 } 293 }
295 entry = entry->next; 294 entry = entry->next;
@@ -300,18 +299,18 @@ static void check_resolving_entries(struct mpoa_client *client)
300/* Call this every MPC-p5 seconds. */ 299/* Call this every MPC-p5 seconds. */
301static void refresh_entries(struct mpoa_client *client) 300static void refresh_entries(struct mpoa_client *client)
302{ 301{
303 struct timeval now; 302 time64_t now;
304 struct in_cache_entry *entry = client->in_cache; 303 struct in_cache_entry *entry = client->in_cache;
305 304
306 ddprintk("refresh_entries\n"); 305 ddprintk("refresh_entries\n");
307 do_gettimeofday(&now); 306 now = ktime_get_seconds();
308 307
309 read_lock_bh(&client->ingress_lock); 308 read_lock_bh(&client->ingress_lock);
310 while (entry != NULL) { 309 while (entry != NULL) {
311 if (entry->entry_state == INGRESS_RESOLVED) { 310 if (entry->entry_state == INGRESS_RESOLVED) {
312 if (!(entry->refresh_time)) 311 if (!(entry->refresh_time))
313 entry->refresh_time = (2 * (entry->ctrl_info.holding_time))/3; 312 entry->refresh_time = (2 * (entry->ctrl_info.holding_time))/3;
314 if ((now.tv_sec - entry->reply_wait.tv_sec) > 313 if ((now - entry->reply_wait) >
315 entry->refresh_time) { 314 entry->refresh_time) {
316 dprintk("refreshing an entry.\n"); 315 dprintk("refreshing an entry.\n");
317 entry->entry_state = INGRESS_REFRESHING; 316 entry->entry_state = INGRESS_REFRESHING;
@@ -480,7 +479,7 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg,
480 479
481 memcpy(entry->MPS_ctrl_ATM_addr, client->mps_ctrl_addr, ATM_ESA_LEN); 480 memcpy(entry->MPS_ctrl_ATM_addr, client->mps_ctrl_addr, ATM_ESA_LEN);
482 entry->ctrl_info = msg->content.eg_info; 481 entry->ctrl_info = msg->content.eg_info;
483 do_gettimeofday(&(entry->tv)); 482 entry->time = ktime_get_seconds();
484 entry->entry_state = EGRESS_RESOLVED; 483 entry->entry_state = EGRESS_RESOLVED;
485 dprintk("new_eg_cache_entry cache_id %u\n", 484 dprintk("new_eg_cache_entry cache_id %u\n",
486 ntohl(entry->ctrl_info.cache_id)); 485 ntohl(entry->ctrl_info.cache_id));
@@ -495,7 +494,7 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg,
495 494
496static void update_eg_cache_entry(eg_cache_entry *entry, uint16_t holding_time) 495static void update_eg_cache_entry(eg_cache_entry *entry, uint16_t holding_time)
497{ 496{
498 do_gettimeofday(&(entry->tv)); 497 entry->time = ktime_get_seconds();
499 entry->entry_state = EGRESS_RESOLVED; 498 entry->entry_state = EGRESS_RESOLVED;
500 entry->ctrl_info.holding_time = holding_time; 499 entry->ctrl_info.holding_time = holding_time;
501} 500}
@@ -503,17 +502,16 @@ static void update_eg_cache_entry(eg_cache_entry *entry, uint16_t holding_time)
503static void clear_expired(struct mpoa_client *client) 502static void clear_expired(struct mpoa_client *client)
504{ 503{
505 eg_cache_entry *entry, *next_entry; 504 eg_cache_entry *entry, *next_entry;
506 struct timeval now; 505 time64_t now;
507 struct k_message msg; 506 struct k_message msg;
508 507
509 do_gettimeofday(&now); 508 now = ktime_get_seconds();
510 509
511 write_lock_irq(&client->egress_lock); 510 write_lock_irq(&client->egress_lock);
512 entry = client->eg_cache; 511 entry = client->eg_cache;
513 while (entry != NULL) { 512 while (entry != NULL) {
514 next_entry = entry->next; 513 next_entry = entry->next;
515 if ((now.tv_sec - entry->tv.tv_sec) 514 if ((now - entry->time) > entry->ctrl_info.holding_time) {
516 > entry->ctrl_info.holding_time) {
517 msg.type = SND_EGRESS_PURGE; 515 msg.type = SND_EGRESS_PURGE;
518 msg.content.eg_info = entry->ctrl_info; 516 msg.content.eg_info = entry->ctrl_info;
519 dprintk("egress_cache: holding time expired, cache_id = %u.\n", 517 dprintk("egress_cache: holding time expired, cache_id = %u.\n",
diff --git a/net/atm/mpoa_caches.h b/net/atm/mpoa_caches.h
index 6a266669ebf4..464c4c7f8d1f 100644
--- a/net/atm/mpoa_caches.h
+++ b/net/atm/mpoa_caches.h
@@ -2,6 +2,7 @@
2#ifndef MPOA_CACHES_H 2#ifndef MPOA_CACHES_H
3#define MPOA_CACHES_H 3#define MPOA_CACHES_H
4 4
5#include <linux/time64.h>
5#include <linux/netdevice.h> 6#include <linux/netdevice.h>
6#include <linux/types.h> 7#include <linux/types.h>
7#include <linux/atm.h> 8#include <linux/atm.h>
@@ -16,9 +17,9 @@ void atm_mpoa_init_cache(struct mpoa_client *mpc);
16typedef struct in_cache_entry { 17typedef struct in_cache_entry {
17 struct in_cache_entry *next; 18 struct in_cache_entry *next;
18 struct in_cache_entry *prev; 19 struct in_cache_entry *prev;
19 struct timeval tv; 20 time64_t time;
20 struct timeval reply_wait; 21 time64_t reply_wait;
21 struct timeval hold_down; 22 time64_t hold_down;
22 uint32_t packets_fwded; 23 uint32_t packets_fwded;
23 uint16_t entry_state; 24 uint16_t entry_state;
24 uint32_t retry_time; 25 uint32_t retry_time;
@@ -53,7 +54,7 @@ struct in_cache_ops{
53typedef struct eg_cache_entry{ 54typedef struct eg_cache_entry{
54 struct eg_cache_entry *next; 55 struct eg_cache_entry *next;
55 struct eg_cache_entry *prev; 56 struct eg_cache_entry *prev;
56 struct timeval tv; 57 time64_t time;
57 uint8_t MPS_ctrl_ATM_addr[ATM_ESA_LEN]; 58 uint8_t MPS_ctrl_ATM_addr[ATM_ESA_LEN];
58 struct atm_vcc *shortcut; 59 struct atm_vcc *shortcut;
59 uint32_t packets_rcvd; 60 uint32_t packets_rcvd;
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 8a0c17e1c203..b93cc0f18292 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -8,7 +8,7 @@
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/proc_fs.h> 10#include <linux/proc_fs.h>
11#include <linux/time.h> 11#include <linux/ktime.h>
12#include <linux/seq_file.h> 12#include <linux/seq_file.h>
13#include <linux/uaccess.h> 13#include <linux/uaccess.h>
14#include <linux/atmmpc.h> 14#include <linux/atmmpc.h>
@@ -57,7 +57,6 @@ static int parse_qos(const char *buff);
57 * Define allowed FILE OPERATIONS 57 * Define allowed FILE OPERATIONS
58 */ 58 */
59static const struct file_operations mpc_file_operations = { 59static const struct file_operations mpc_file_operations = {
60 .owner = THIS_MODULE,
61 .open = proc_mpc_open, 60 .open = proc_mpc_open,
62 .read = seq_read, 61 .read = seq_read,
63 .llseek = seq_lseek, 62 .llseek = seq_lseek,
@@ -138,7 +137,7 @@ static int mpc_show(struct seq_file *m, void *v)
138 int i; 137 int i;
139 in_cache_entry *in_entry; 138 in_cache_entry *in_entry;
140 eg_cache_entry *eg_entry; 139 eg_cache_entry *eg_entry;
141 struct timeval now; 140 time64_t now;
142 unsigned char ip_string[16]; 141 unsigned char ip_string[16];
143 142
144 if (v == SEQ_START_TOKEN) { 143 if (v == SEQ_START_TOKEN) {
@@ -148,15 +147,17 @@ static int mpc_show(struct seq_file *m, void *v)
148 147
149 seq_printf(m, "\nInterface %d:\n\n", mpc->dev_num); 148 seq_printf(m, "\nInterface %d:\n\n", mpc->dev_num);
150 seq_printf(m, "Ingress Entries:\nIP address State Holding time Packets fwded VPI VCI\n"); 149 seq_printf(m, "Ingress Entries:\nIP address State Holding time Packets fwded VPI VCI\n");
151 do_gettimeofday(&now); 150 now = ktime_get_seconds();
152 151
153 for (in_entry = mpc->in_cache; in_entry; in_entry = in_entry->next) { 152 for (in_entry = mpc->in_cache; in_entry; in_entry = in_entry->next) {
153 unsigned long seconds_delta = now - in_entry->time;
154
154 sprintf(ip_string, "%pI4", &in_entry->ctrl_info.in_dst_ip); 155 sprintf(ip_string, "%pI4", &in_entry->ctrl_info.in_dst_ip);
155 seq_printf(m, "%-16s%s%-14lu%-12u", 156 seq_printf(m, "%-16s%s%-14lu%-12u",
156 ip_string, 157 ip_string,
157 ingress_state_string(in_entry->entry_state), 158 ingress_state_string(in_entry->entry_state),
158 in_entry->ctrl_info.holding_time - 159 in_entry->ctrl_info.holding_time -
159 (now.tv_sec-in_entry->tv.tv_sec), 160 seconds_delta,
160 in_entry->packets_fwded); 161 in_entry->packets_fwded);
161 if (in_entry->shortcut) 162 if (in_entry->shortcut)
162 seq_printf(m, " %-3d %-3d", 163 seq_printf(m, " %-3d %-3d",
@@ -169,13 +170,14 @@ static int mpc_show(struct seq_file *m, void *v)
169 seq_printf(m, "Egress Entries:\nIngress MPC ATM addr\nCache-id State Holding time Packets recvd Latest IP addr VPI VCI\n"); 170 seq_printf(m, "Egress Entries:\nIngress MPC ATM addr\nCache-id State Holding time Packets recvd Latest IP addr VPI VCI\n");
170 for (eg_entry = mpc->eg_cache; eg_entry; eg_entry = eg_entry->next) { 171 for (eg_entry = mpc->eg_cache; eg_entry; eg_entry = eg_entry->next) {
171 unsigned char *p = eg_entry->ctrl_info.in_MPC_data_ATM_addr; 172 unsigned char *p = eg_entry->ctrl_info.in_MPC_data_ATM_addr;
173 unsigned long seconds_delta = now - eg_entry->time;
174
172 for (i = 0; i < ATM_ESA_LEN; i++) 175 for (i = 0; i < ATM_ESA_LEN; i++)
173 seq_printf(m, "%02x", p[i]); 176 seq_printf(m, "%02x", p[i]);
174 seq_printf(m, "\n%-16lu%s%-14lu%-15u", 177 seq_printf(m, "\n%-16lu%s%-14lu%-15u",
175 (unsigned long)ntohl(eg_entry->ctrl_info.cache_id), 178 (unsigned long)ntohl(eg_entry->ctrl_info.cache_id),
176 egress_state_string(eg_entry->entry_state), 179 egress_state_string(eg_entry->entry_state),
177 (eg_entry->ctrl_info.holding_time - 180 (eg_entry->ctrl_info.holding_time - seconds_delta),
178 (now.tv_sec-eg_entry->tv.tv_sec)),
179 eg_entry->packets_rcvd); 181 eg_entry->packets_rcvd);
180 182
181 /* latest IP address */ 183 /* latest IP address */
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 642f9272ab95..edc48edc95c1 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -37,7 +37,6 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
37 size_t count, loff_t *pos); 37 size_t count, loff_t *pos);
38 38
39static const struct file_operations proc_atm_dev_ops = { 39static const struct file_operations proc_atm_dev_ops = {
40 .owner = THIS_MODULE,
41 .read = proc_dev_atm_read, 40 .read = proc_dev_atm_read,
42 .llseek = noop_llseek, 41 .llseek = noop_llseek,
43}; 42};
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 06eac1f50c5e..47fdd399626b 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1931,7 +1931,6 @@ static int ax25_info_open(struct inode *inode, struct file *file)
1931} 1931}
1932 1932
1933static const struct file_operations ax25_info_fops = { 1933static const struct file_operations ax25_info_fops = {
1934 .owner = THIS_MODULE,
1935 .open = ax25_info_open, 1934 .open = ax25_info_open,
1936 .read = seq_read, 1935 .read = seq_read,
1937 .llseek = seq_lseek, 1936 .llseek = seq_lseek,
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 0446b892618a..525558972fd9 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -336,7 +336,6 @@ static int ax25_rt_info_open(struct inode *inode, struct file *file)
336} 336}
337 337
338const struct file_operations ax25_route_fops = { 338const struct file_operations ax25_route_fops = {
339 .owner = THIS_MODULE,
340 .open = ax25_rt_info_open, 339 .open = ax25_rt_info_open,
341 .read = seq_read, 340 .read = seq_read,
342 .llseek = seq_lseek, 341 .llseek = seq_lseek,
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index 83b035f56202..4ebe91ba317a 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -194,7 +194,6 @@ static int ax25_uid_info_open(struct inode *inode, struct file *file)
194} 194}
195 195
196const struct file_operations ax25_uid_fops = { 196const struct file_operations ax25_uid_fops = {
197 .owner = THIS_MODULE,
198 .open = ax25_uid_info_open, 197 .open = ax25_uid_info_open,
199 .read = seq_read, 198 .read = seq_read,
200 .llseek = seq_lseek, 199 .llseek = seq_lseek,
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index b73b96a2854b..c44f6515be5e 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -1,3 +1,20 @@
1# SPDX-License-Identifier: GPL-2.0
2# Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
3#
4# Marek Lindner, Simon Wunderlich
5#
6# This program is free software; you can redistribute it and/or
7# modify it under the terms of version 2 of the GNU General Public
8# License as published by the Free Software Foundation.
9#
10# This program is distributed in the hope that it will be useful, but
11# WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13# General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program; if not, see <http://www.gnu.org/licenses/>.
17
1# 18#
2# B.A.T.M.A.N meshing protocol 19# B.A.T.M.A.N meshing protocol
3# 20#
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 915987bc6d29..022f6e77307b 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,4 +1,4 @@
1# 1# SPDX-License-Identifier: GPL-2.0
2# Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2# Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
3# 3#
4# Marek Lindner, Simon Wunderlich 4# Marek Lindner, Simon Wunderlich
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index 44fd073b7546..80c72c7d3cad 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
@@ -37,7 +38,8 @@ char batadv_routing_algo[20] = "BATMAN_IV";
37static struct hlist_head batadv_algo_list; 38static struct hlist_head batadv_algo_list;
38 39
39/** 40/**
40 * batadv_algo_init - Initialize batman-adv algorithm management data structures 41 * batadv_algo_init() - Initialize batman-adv algorithm management data
42 * structures
41 */ 43 */
42void batadv_algo_init(void) 44void batadv_algo_init(void)
43{ 45{
@@ -59,6 +61,12 @@ static struct batadv_algo_ops *batadv_algo_get(char *name)
59 return bat_algo_ops; 61 return bat_algo_ops;
60} 62}
61 63
64/**
65 * batadv_algo_register() - Register callbacks for a mesh algorithm
66 * @bat_algo_ops: mesh algorithm callbacks to add
67 *
68 * Return: 0 on success or negative error number in case of failure
69 */
62int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops) 70int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
63{ 71{
64 struct batadv_algo_ops *bat_algo_ops_tmp; 72 struct batadv_algo_ops *bat_algo_ops_tmp;
@@ -88,6 +96,19 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
88 return 0; 96 return 0;
89} 97}
90 98
99/**
100 * batadv_algo_select() - Select algorithm of soft interface
101 * @bat_priv: the bat priv with all the soft interface information
102 * @name: name of the algorithm to select
103 *
104 * The algorithm callbacks for the soft interface will be set when the algorithm
105 * with the correct name was found. Any previous selected algorithm will not be
106 * deinitialized and the new selected algorithm will also not be initialized.
107 * It is therefore not allowed to call batadv_algo_select outside the creation
108 * function of the soft interface.
109 *
110 * Return: 0 on success or negative error number in case of failure
111 */
91int batadv_algo_select(struct batadv_priv *bat_priv, char *name) 112int batadv_algo_select(struct batadv_priv *bat_priv, char *name)
92{ 113{
93 struct batadv_algo_ops *bat_algo_ops; 114 struct batadv_algo_ops *bat_algo_ops;
@@ -102,6 +123,14 @@ int batadv_algo_select(struct batadv_priv *bat_priv, char *name)
102} 123}
103 124
104#ifdef CONFIG_BATMAN_ADV_DEBUGFS 125#ifdef CONFIG_BATMAN_ADV_DEBUGFS
126
127/**
128 * batadv_algo_seq_print_text() - Print the supported algorithms in a seq file
129 * @seq: seq file to print on
130 * @offset: not used
131 *
132 * Return: always 0
133 */
105int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) 134int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
106{ 135{
107 struct batadv_algo_ops *bat_algo_ops; 136 struct batadv_algo_ops *bat_algo_ops;
@@ -148,7 +177,7 @@ module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra,
148 0644); 177 0644);
149 178
150/** 179/**
151 * batadv_algo_dump_entry - fill in information about one supported routing 180 * batadv_algo_dump_entry() - fill in information about one supported routing
152 * algorithm 181 * algorithm
153 * @msg: netlink message to be sent back 182 * @msg: netlink message to be sent back
154 * @portid: Port to reply to 183 * @portid: Port to reply to
@@ -179,7 +208,7 @@ static int batadv_algo_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
179} 208}
180 209
181/** 210/**
182 * batadv_algo_dump - fill in information about supported routing 211 * batadv_algo_dump() - fill in information about supported routing
183 * algorithms 212 * algorithms
184 * @msg: netlink message to be sent back 213 * @msg: netlink message to be sent back
185 * @cb: Parameters to the netlink request 214 * @cb: Parameters to the netlink request
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 29f6312f9bf1..029221615ba3 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Linus Lüssing 4 * Marek Lindner, Linus Lüssing
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index bbe8414b6ee7..79e326383726 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
@@ -26,7 +27,7 @@
26#include <linux/cache.h> 27#include <linux/cache.h>
27#include <linux/errno.h> 28#include <linux/errno.h>
28#include <linux/etherdevice.h> 29#include <linux/etherdevice.h>
29#include <linux/fs.h> 30#include <linux/gfp.h>
30#include <linux/if_ether.h> 31#include <linux/if_ether.h>
31#include <linux/init.h> 32#include <linux/init.h>
32#include <linux/jiffies.h> 33#include <linux/jiffies.h>
@@ -51,6 +52,7 @@
51#include <linux/workqueue.h> 52#include <linux/workqueue.h>
52#include <net/genetlink.h> 53#include <net/genetlink.h>
53#include <net/netlink.h> 54#include <net/netlink.h>
55#include <uapi/linux/batadv_packet.h>
54#include <uapi/linux/batman_adv.h> 56#include <uapi/linux/batman_adv.h>
55 57
56#include "bat_algo.h" 58#include "bat_algo.h"
@@ -62,7 +64,6 @@
62#include "netlink.h" 64#include "netlink.h"
63#include "network-coding.h" 65#include "network-coding.h"
64#include "originator.h" 66#include "originator.h"
65#include "packet.h"
66#include "routing.h" 67#include "routing.h"
67#include "send.h" 68#include "send.h"
68#include "translation-table.h" 69#include "translation-table.h"
@@ -72,21 +73,28 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work);
72 73
73/** 74/**
74 * enum batadv_dup_status - duplicate status 75 * enum batadv_dup_status - duplicate status
75 * @BATADV_NO_DUP: the packet is no duplicate
76 * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the
77 * neighbor)
78 * @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor
79 * @BATADV_PROTECTED: originator is currently protected (after reboot)
80 */ 76 */
81enum batadv_dup_status { 77enum batadv_dup_status {
78 /** @BATADV_NO_DUP: the packet is no duplicate */
82 BATADV_NO_DUP = 0, 79 BATADV_NO_DUP = 0,
80
81 /**
82 * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for
83 * the neighbor)
84 */
83 BATADV_ORIG_DUP, 85 BATADV_ORIG_DUP,
86
87 /** @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor */
84 BATADV_NEIGH_DUP, 88 BATADV_NEIGH_DUP,
89
90 /**
91 * @BATADV_PROTECTED: originator is currently protected (after reboot)
92 */
85 BATADV_PROTECTED, 93 BATADV_PROTECTED,
86}; 94};
87 95
88/** 96/**
89 * batadv_ring_buffer_set - update the ring buffer with the given value 97 * batadv_ring_buffer_set() - update the ring buffer with the given value
90 * @lq_recv: pointer to the ring buffer 98 * @lq_recv: pointer to the ring buffer
91 * @lq_index: index to store the value at 99 * @lq_index: index to store the value at
92 * @value: value to store in the ring buffer 100 * @value: value to store in the ring buffer
@@ -98,7 +106,7 @@ static void batadv_ring_buffer_set(u8 lq_recv[], u8 *lq_index, u8 value)
98} 106}
99 107
100/** 108/**
101 * batadv_ring_buffer_avg - compute the average of all non-zero values stored 109 * batadv_ring_buffer_avg() - compute the average of all non-zero values stored
102 * in the given ring buffer 110 * in the given ring buffer
103 * @lq_recv: pointer to the ring buffer 111 * @lq_recv: pointer to the ring buffer
104 * 112 *
@@ -130,7 +138,7 @@ static u8 batadv_ring_buffer_avg(const u8 lq_recv[])
130} 138}
131 139
132/** 140/**
133 * batadv_iv_ogm_orig_free - free the private resources allocated for this 141 * batadv_iv_ogm_orig_free() - free the private resources allocated for this
134 * orig_node 142 * orig_node
135 * @orig_node: the orig_node for which the resources have to be free'd 143 * @orig_node: the orig_node for which the resources have to be free'd
136 */ 144 */
@@ -141,8 +149,8 @@ static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node)
141} 149}
142 150
143/** 151/**
144 * batadv_iv_ogm_orig_add_if - change the private structures of the orig_node to 152 * batadv_iv_ogm_orig_add_if() - change the private structures of the orig_node
145 * include the new hard-interface 153 * to include the new hard-interface
146 * @orig_node: the orig_node that has to be changed 154 * @orig_node: the orig_node that has to be changed
147 * @max_if_num: the current amount of interfaces 155 * @max_if_num: the current amount of interfaces
148 * 156 *
@@ -186,7 +194,7 @@ unlock:
186} 194}
187 195
188/** 196/**
189 * batadv_iv_ogm_drop_bcast_own_entry - drop section of bcast_own 197 * batadv_iv_ogm_drop_bcast_own_entry() - drop section of bcast_own
190 * @orig_node: the orig_node that has to be changed 198 * @orig_node: the orig_node that has to be changed
191 * @max_if_num: the current amount of interfaces 199 * @max_if_num: the current amount of interfaces
192 * @del_if_num: the index of the interface being removed 200 * @del_if_num: the index of the interface being removed
@@ -224,7 +232,7 @@ batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node,
224} 232}
225 233
226/** 234/**
227 * batadv_iv_ogm_drop_bcast_own_sum_entry - drop section of bcast_own_sum 235 * batadv_iv_ogm_drop_bcast_own_sum_entry() - drop section of bcast_own_sum
228 * @orig_node: the orig_node that has to be changed 236 * @orig_node: the orig_node that has to be changed
229 * @max_if_num: the current amount of interfaces 237 * @max_if_num: the current amount of interfaces
230 * @del_if_num: the index of the interface being removed 238 * @del_if_num: the index of the interface being removed
@@ -259,8 +267,8 @@ batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node,
259} 267}
260 268
261/** 269/**
262 * batadv_iv_ogm_orig_del_if - change the private structures of the orig_node to 270 * batadv_iv_ogm_orig_del_if() - change the private structures of the orig_node
263 * exclude the removed interface 271 * to exclude the removed interface
264 * @orig_node: the orig_node that has to be changed 272 * @orig_node: the orig_node that has to be changed
265 * @max_if_num: the current amount of interfaces 273 * @max_if_num: the current amount of interfaces
266 * @del_if_num: the index of the interface being removed 274 * @del_if_num: the index of the interface being removed
@@ -290,7 +298,8 @@ static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
290} 298}
291 299
292/** 300/**
293 * batadv_iv_ogm_orig_get - retrieve or create (if does not exist) an originator 301 * batadv_iv_ogm_orig_get() - retrieve or create (if does not exist) an
302 * originator
294 * @bat_priv: the bat priv with all the soft interface information 303 * @bat_priv: the bat priv with all the soft interface information
295 * @addr: mac address of the originator 304 * @addr: mac address of the originator
296 * 305 *
@@ -447,7 +456,7 @@ static u8 batadv_hop_penalty(u8 tq, const struct batadv_priv *bat_priv)
447} 456}
448 457
449/** 458/**
450 * batadv_iv_ogm_aggr_packet - checks if there is another OGM attached 459 * batadv_iv_ogm_aggr_packet() - checks if there is another OGM attached
451 * @buff_pos: current position in the skb 460 * @buff_pos: current position in the skb
452 * @packet_len: total length of the skb 461 * @packet_len: total length of the skb
453 * @tvlv_len: tvlv length of the previously considered OGM 462 * @tvlv_len: tvlv length of the previously considered OGM
@@ -557,7 +566,7 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
557} 566}
558 567
559/** 568/**
560 * batadv_iv_ogm_can_aggregate - find out if an OGM can be aggregated on an 569 * batadv_iv_ogm_can_aggregate() - find out if an OGM can be aggregated on an
561 * existing forward packet 570 * existing forward packet
562 * @new_bat_ogm_packet: OGM packet to be aggregated 571 * @new_bat_ogm_packet: OGM packet to be aggregated
563 * @bat_priv: the bat priv with all the soft interface information 572 * @bat_priv: the bat priv with all the soft interface information
@@ -660,7 +669,7 @@ out:
660} 669}
661 670
662/** 671/**
663 * batadv_iv_ogm_aggregate_new - create a new aggregated packet and add this 672 * batadv_iv_ogm_aggregate_new() - create a new aggregated packet and add this
664 * packet to it. 673 * packet to it.
665 * @packet_buff: pointer to the OGM 674 * @packet_buff: pointer to the OGM
666 * @packet_len: (total) length of the OGM 675 * @packet_len: (total) length of the OGM
@@ -743,7 +752,7 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr,
743} 752}
744 753
745/** 754/**
746 * batadv_iv_ogm_queue_add - queue up an OGM for transmission 755 * batadv_iv_ogm_queue_add() - queue up an OGM for transmission
747 * @bat_priv: the bat priv with all the soft interface information 756 * @bat_priv: the bat priv with all the soft interface information
748 * @packet_buff: pointer to the OGM 757 * @packet_buff: pointer to the OGM
749 * @packet_len: (total) length of the OGM 758 * @packet_len: (total) length of the OGM
@@ -869,8 +878,8 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node,
869} 878}
870 879
871/** 880/**
872 * batadv_iv_ogm_slide_own_bcast_window - bitshift own OGM broadcast windows for 881 * batadv_iv_ogm_slide_own_bcast_window() - bitshift own OGM broadcast windows
873 * the given interface 882 * for the given interface
874 * @hard_iface: the interface for which the windows have to be shifted 883 * @hard_iface: the interface for which the windows have to be shifted
875 */ 884 */
876static void 885static void
@@ -987,7 +996,7 @@ out:
987} 996}
988 997
989/** 998/**
990 * batadv_iv_ogm_orig_update - use OGM to update corresponding data in an 999 * batadv_iv_ogm_orig_update() - use OGM to update corresponding data in an
991 * originator 1000 * originator
992 * @bat_priv: the bat priv with all the soft interface information 1001 * @bat_priv: the bat priv with all the soft interface information
993 * @orig_node: the orig node who originally emitted the ogm packet 1002 * @orig_node: the orig node who originally emitted the ogm packet
@@ -1152,7 +1161,7 @@ out:
1152} 1161}
1153 1162
1154/** 1163/**
1155 * batadv_iv_ogm_calc_tq - calculate tq for current received ogm packet 1164 * batadv_iv_ogm_calc_tq() - calculate tq for current received ogm packet
1156 * @orig_node: the orig node who originally emitted the ogm packet 1165 * @orig_node: the orig node who originally emitted the ogm packet
1157 * @orig_neigh_node: the orig node struct of the neighbor who sent the packet 1166 * @orig_neigh_node: the orig node struct of the neighbor who sent the packet
1158 * @batadv_ogm_packet: the ogm packet 1167 * @batadv_ogm_packet: the ogm packet
@@ -1298,7 +1307,7 @@ out:
1298} 1307}
1299 1308
1300/** 1309/**
1301 * batadv_iv_ogm_update_seqnos - process a batman packet for all interfaces, 1310 * batadv_iv_ogm_update_seqnos() - process a batman packet for all interfaces,
1302 * adjust the sequence number and find out whether it is a duplicate 1311 * adjust the sequence number and find out whether it is a duplicate
1303 * @ethhdr: ethernet header of the packet 1312 * @ethhdr: ethernet header of the packet
1304 * @batadv_ogm_packet: OGM packet to be considered 1313 * @batadv_ogm_packet: OGM packet to be considered
@@ -1401,7 +1410,8 @@ out:
1401} 1410}
1402 1411
1403/** 1412/**
1404 * batadv_iv_ogm_process_per_outif - process a batman iv OGM for an outgoing if 1413 * batadv_iv_ogm_process_per_outif() - process a batman iv OGM for an outgoing
1414 * interface
1405 * @skb: the skb containing the OGM 1415 * @skb: the skb containing the OGM
1406 * @ogm_offset: offset from skb->data to start of ogm header 1416 * @ogm_offset: offset from skb->data to start of ogm header
1407 * @orig_node: the (cached) orig node for the originator of this OGM 1417 * @orig_node: the (cached) orig node for the originator of this OGM
@@ -1608,7 +1618,7 @@ out:
1608} 1618}
1609 1619
1610/** 1620/**
1611 * batadv_iv_ogm_process - process an incoming batman iv OGM 1621 * batadv_iv_ogm_process() - process an incoming batman iv OGM
1612 * @skb: the skb containing the OGM 1622 * @skb: the skb containing the OGM
1613 * @ogm_offset: offset to the OGM which should be processed (for aggregates) 1623 * @ogm_offset: offset to the OGM which should be processed (for aggregates)
1614 * @if_incoming: the interface where this packet was receved 1624 * @if_incoming: the interface where this packet was receved
@@ -1861,7 +1871,7 @@ free_skb:
1861 1871
1862#ifdef CONFIG_BATMAN_ADV_DEBUGFS 1872#ifdef CONFIG_BATMAN_ADV_DEBUGFS
1863/** 1873/**
1864 * batadv_iv_ogm_orig_print_neigh - print neighbors for the originator table 1874 * batadv_iv_ogm_orig_print_neigh() - print neighbors for the originator table
1865 * @orig_node: the orig_node for which the neighbors are printed 1875 * @orig_node: the orig_node for which the neighbors are printed
1866 * @if_outgoing: outgoing interface for these entries 1876 * @if_outgoing: outgoing interface for these entries
1867 * @seq: debugfs table seq_file struct 1877 * @seq: debugfs table seq_file struct
@@ -1890,7 +1900,7 @@ batadv_iv_ogm_orig_print_neigh(struct batadv_orig_node *orig_node,
1890} 1900}
1891 1901
1892/** 1902/**
1893 * batadv_iv_ogm_orig_print - print the originator table 1903 * batadv_iv_ogm_orig_print() - print the originator table
1894 * @bat_priv: the bat priv with all the soft interface information 1904 * @bat_priv: the bat priv with all the soft interface information
1895 * @seq: debugfs table seq_file struct 1905 * @seq: debugfs table seq_file struct
1896 * @if_outgoing: the outgoing interface for which this should be printed 1906 * @if_outgoing: the outgoing interface for which this should be printed
@@ -1960,7 +1970,7 @@ next:
1960#endif 1970#endif
1961 1971
1962/** 1972/**
1963 * batadv_iv_ogm_neigh_get_tq_avg - Get the TQ average for a neighbour on a 1973 * batadv_iv_ogm_neigh_get_tq_avg() - Get the TQ average for a neighbour on a
1964 * given outgoing interface. 1974 * given outgoing interface.
1965 * @neigh_node: Neighbour of interest 1975 * @neigh_node: Neighbour of interest
1966 * @if_outgoing: Outgoing interface of interest 1976 * @if_outgoing: Outgoing interface of interest
@@ -1986,7 +1996,7 @@ batadv_iv_ogm_neigh_get_tq_avg(struct batadv_neigh_node *neigh_node,
1986} 1996}
1987 1997
1988/** 1998/**
1989 * batadv_iv_ogm_orig_dump_subentry - Dump an originator subentry into a 1999 * batadv_iv_ogm_orig_dump_subentry() - Dump an originator subentry into a
1990 * message 2000 * message
1991 * @msg: Netlink message to dump into 2001 * @msg: Netlink message to dump into
1992 * @portid: Port making netlink request 2002 * @portid: Port making netlink request
@@ -2048,7 +2058,7 @@ batadv_iv_ogm_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
2048} 2058}
2049 2059
2050/** 2060/**
2051 * batadv_iv_ogm_orig_dump_entry - Dump an originator entry into a message 2061 * batadv_iv_ogm_orig_dump_entry() - Dump an originator entry into a message
2052 * @msg: Netlink message to dump into 2062 * @msg: Netlink message to dump into
2053 * @portid: Port making netlink request 2063 * @portid: Port making netlink request
2054 * @seq: Sequence number of netlink message 2064 * @seq: Sequence number of netlink message
@@ -2110,7 +2120,7 @@ batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
2110} 2120}
2111 2121
2112/** 2122/**
2113 * batadv_iv_ogm_orig_dump_bucket - Dump an originator bucket into a 2123 * batadv_iv_ogm_orig_dump_bucket() - Dump an originator bucket into a
2114 * message 2124 * message
2115 * @msg: Netlink message to dump into 2125 * @msg: Netlink message to dump into
2116 * @portid: Port making netlink request 2126 * @portid: Port making netlink request
@@ -2153,7 +2163,7 @@ batadv_iv_ogm_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
2153} 2163}
2154 2164
2155/** 2165/**
2156 * batadv_iv_ogm_orig_dump - Dump the originators into a message 2166 * batadv_iv_ogm_orig_dump() - Dump the originators into a message
2157 * @msg: Netlink message to dump into 2167 * @msg: Netlink message to dump into
2158 * @cb: Control block containing additional options 2168 * @cb: Control block containing additional options
2159 * @bat_priv: The bat priv with all the soft interface information 2169 * @bat_priv: The bat priv with all the soft interface information
@@ -2190,7 +2200,7 @@ batadv_iv_ogm_orig_dump(struct sk_buff *msg, struct netlink_callback *cb,
2190 2200
2191#ifdef CONFIG_BATMAN_ADV_DEBUGFS 2201#ifdef CONFIG_BATMAN_ADV_DEBUGFS
2192/** 2202/**
2193 * batadv_iv_hardif_neigh_print - print a single hop neighbour node 2203 * batadv_iv_hardif_neigh_print() - print a single hop neighbour node
2194 * @seq: neighbour table seq_file struct 2204 * @seq: neighbour table seq_file struct
2195 * @hardif_neigh: hardif neighbour information 2205 * @hardif_neigh: hardif neighbour information
2196 */ 2206 */
@@ -2209,7 +2219,7 @@ batadv_iv_hardif_neigh_print(struct seq_file *seq,
2209} 2219}
2210 2220
2211/** 2221/**
2212 * batadv_iv_ogm_neigh_print - print the single hop neighbour list 2222 * batadv_iv_ogm_neigh_print() - print the single hop neighbour list
2213 * @bat_priv: the bat priv with all the soft interface information 2223 * @bat_priv: the bat priv with all the soft interface information
2214 * @seq: neighbour table seq_file struct 2224 * @seq: neighbour table seq_file struct
2215 */ 2225 */
@@ -2242,7 +2252,7 @@ static void batadv_iv_neigh_print(struct batadv_priv *bat_priv,
2242#endif 2252#endif
2243 2253
2244/** 2254/**
2245 * batadv_iv_ogm_neigh_diff - calculate tq difference of two neighbors 2255 * batadv_iv_ogm_neigh_diff() - calculate tq difference of two neighbors
2246 * @neigh1: the first neighbor object of the comparison 2256 * @neigh1: the first neighbor object of the comparison
2247 * @if_outgoing1: outgoing interface for the first neighbor 2257 * @if_outgoing1: outgoing interface for the first neighbor
2248 * @neigh2: the second neighbor object of the comparison 2258 * @neigh2: the second neighbor object of the comparison
@@ -2287,7 +2297,7 @@ out:
2287} 2297}
2288 2298
2289/** 2299/**
2290 * batadv_iv_ogm_neigh_dump_neigh - Dump a neighbour into a netlink message 2300 * batadv_iv_ogm_neigh_dump_neigh() - Dump a neighbour into a netlink message
2291 * @msg: Netlink message to dump into 2301 * @msg: Netlink message to dump into
2292 * @portid: Port making netlink request 2302 * @portid: Port making netlink request
2293 * @seq: Sequence number of netlink message 2303 * @seq: Sequence number of netlink message
@@ -2326,7 +2336,7 @@ batadv_iv_ogm_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
2326} 2336}
2327 2337
2328/** 2338/**
2329 * batadv_iv_ogm_neigh_dump_hardif - Dump the neighbours of a hard interface 2339 * batadv_iv_ogm_neigh_dump_hardif() - Dump the neighbours of a hard interface
2330 * into a message 2340 * into a message
2331 * @msg: Netlink message to dump into 2341 * @msg: Netlink message to dump into
2332 * @portid: Port making netlink request 2342 * @portid: Port making netlink request
@@ -2365,7 +2375,7 @@ batadv_iv_ogm_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq,
2365} 2375}
2366 2376
2367/** 2377/**
2368 * batadv_iv_ogm_neigh_dump - Dump the neighbours into a message 2378 * batadv_iv_ogm_neigh_dump() - Dump the neighbours into a message
2369 * @msg: Netlink message to dump into 2379 * @msg: Netlink message to dump into
2370 * @cb: Control block containing additional options 2380 * @cb: Control block containing additional options
2371 * @bat_priv: The bat priv with all the soft interface information 2381 * @bat_priv: The bat priv with all the soft interface information
@@ -2417,7 +2427,7 @@ batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
2417} 2427}
2418 2428
2419/** 2429/**
2420 * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors 2430 * batadv_iv_ogm_neigh_cmp() - compare the metrics of two neighbors
2421 * @neigh1: the first neighbor object of the comparison 2431 * @neigh1: the first neighbor object of the comparison
2422 * @if_outgoing1: outgoing interface for the first neighbor 2432 * @if_outgoing1: outgoing interface for the first neighbor
2423 * @neigh2: the second neighbor object of the comparison 2433 * @neigh2: the second neighbor object of the comparison
@@ -2443,7 +2453,7 @@ static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
2443} 2453}
2444 2454
2445/** 2455/**
2446 * batadv_iv_ogm_neigh_is_sob - check if neigh1 is similarly good or better 2456 * batadv_iv_ogm_neigh_is_sob() - check if neigh1 is similarly good or better
2447 * than neigh2 from the metric prospective 2457 * than neigh2 from the metric prospective
2448 * @neigh1: the first neighbor object of the comparison 2458 * @neigh1: the first neighbor object of the comparison
2449 * @if_outgoing1: outgoing interface for the first neighbor 2459 * @if_outgoing1: outgoing interface for the first neighbor
@@ -2478,7 +2488,7 @@ static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface)
2478} 2488}
2479 2489
2480/** 2490/**
2481 * batadv_iv_init_sel_class - initialize GW selection class 2491 * batadv_iv_init_sel_class() - initialize GW selection class
2482 * @bat_priv: the bat priv with all the soft interface information 2492 * @bat_priv: the bat priv with all the soft interface information
2483 */ 2493 */
2484static void batadv_iv_init_sel_class(struct batadv_priv *bat_priv) 2494static void batadv_iv_init_sel_class(struct batadv_priv *bat_priv)
@@ -2703,7 +2713,7 @@ static void batadv_iv_gw_print(struct batadv_priv *bat_priv,
2703#endif 2713#endif
2704 2714
2705/** 2715/**
2706 * batadv_iv_gw_dump_entry - Dump a gateway into a message 2716 * batadv_iv_gw_dump_entry() - Dump a gateway into a message
2707 * @msg: Netlink message to dump into 2717 * @msg: Netlink message to dump into
2708 * @portid: Port making netlink request 2718 * @portid: Port making netlink request
2709 * @seq: Sequence number of netlink message 2719 * @seq: Sequence number of netlink message
@@ -2774,7 +2784,7 @@ out:
2774} 2784}
2775 2785
2776/** 2786/**
2777 * batadv_iv_gw_dump - Dump gateways into a message 2787 * batadv_iv_gw_dump() - Dump gateways into a message
2778 * @msg: Netlink message to dump into 2788 * @msg: Netlink message to dump into
2779 * @cb: Control block containing additional options 2789 * @cb: Control block containing additional options
2780 * @bat_priv: The bat priv with all the soft interface information 2790 * @bat_priv: The bat priv with all the soft interface information
@@ -2843,6 +2853,11 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
2843 }, 2853 },
2844}; 2854};
2845 2855
2856/**
2857 * batadv_iv_init() - B.A.T.M.A.N. IV initialization function
2858 *
2859 * Return: 0 on success or negative error number in case of failure
2860 */
2846int __init batadv_iv_init(void) 2861int __init batadv_iv_init(void)
2847{ 2862{
2848 int ret; 2863 int ret;
diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h
index ae2ab526bdb1..9dc0dd5c83df 100644
--- a/net/batman-adv/bat_iv_ogm.h
+++ b/net/batman-adv/bat_iv_ogm.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index e0e2bfcd6b3e..27e165ac9302 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Linus Lüssing, Marek Lindner 4 * Linus Lüssing, Marek Lindner
@@ -36,6 +37,7 @@
36#include <linux/workqueue.h> 37#include <linux/workqueue.h>
37#include <net/genetlink.h> 38#include <net/genetlink.h>
38#include <net/netlink.h> 39#include <net/netlink.h>
40#include <uapi/linux/batadv_packet.h>
39#include <uapi/linux/batman_adv.h> 41#include <uapi/linux/batman_adv.h>
40 42
41#include "bat_algo.h" 43#include "bat_algo.h"
@@ -48,7 +50,6 @@
48#include "log.h" 50#include "log.h"
49#include "netlink.h" 51#include "netlink.h"
50#include "originator.h" 52#include "originator.h"
51#include "packet.h"
52 53
53struct sk_buff; 54struct sk_buff;
54 55
@@ -99,7 +100,7 @@ static void batadv_v_primary_iface_set(struct batadv_hard_iface *hard_iface)
99} 100}
100 101
101/** 102/**
102 * batadv_v_iface_update_mac - react to hard-interface MAC address change 103 * batadv_v_iface_update_mac() - react to hard-interface MAC address change
103 * @hard_iface: the modified interface 104 * @hard_iface: the modified interface
104 * 105 *
105 * If the modified interface is the primary one, update the originator 106 * If the modified interface is the primary one, update the originator
@@ -130,7 +131,7 @@ batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh)
130 131
131#ifdef CONFIG_BATMAN_ADV_DEBUGFS 132#ifdef CONFIG_BATMAN_ADV_DEBUGFS
132/** 133/**
133 * batadv_v_orig_print_neigh - print neighbors for the originator table 134 * batadv_v_orig_print_neigh() - print neighbors for the originator table
134 * @orig_node: the orig_node for which the neighbors are printed 135 * @orig_node: the orig_node for which the neighbors are printed
135 * @if_outgoing: outgoing interface for these entries 136 * @if_outgoing: outgoing interface for these entries
136 * @seq: debugfs table seq_file struct 137 * @seq: debugfs table seq_file struct
@@ -160,7 +161,7 @@ batadv_v_orig_print_neigh(struct batadv_orig_node *orig_node,
160} 161}
161 162
162/** 163/**
163 * batadv_v_hardif_neigh_print - print a single ELP neighbour node 164 * batadv_v_hardif_neigh_print() - print a single ELP neighbour node
164 * @seq: neighbour table seq_file struct 165 * @seq: neighbour table seq_file struct
165 * @hardif_neigh: hardif neighbour information 166 * @hardif_neigh: hardif neighbour information
166 */ 167 */
@@ -181,7 +182,7 @@ batadv_v_hardif_neigh_print(struct seq_file *seq,
181} 182}
182 183
183/** 184/**
184 * batadv_v_neigh_print - print the single hop neighbour list 185 * batadv_v_neigh_print() - print the single hop neighbour list
185 * @bat_priv: the bat priv with all the soft interface information 186 * @bat_priv: the bat priv with all the soft interface information
186 * @seq: neighbour table seq_file struct 187 * @seq: neighbour table seq_file struct
187 */ 188 */
@@ -215,7 +216,7 @@ static void batadv_v_neigh_print(struct batadv_priv *bat_priv,
215#endif 216#endif
216 217
217/** 218/**
218 * batadv_v_neigh_dump_neigh - Dump a neighbour into a message 219 * batadv_v_neigh_dump_neigh() - Dump a neighbour into a message
219 * @msg: Netlink message to dump into 220 * @msg: Netlink message to dump into
220 * @portid: Port making netlink request 221 * @portid: Port making netlink request
221 * @seq: Sequence number of netlink message 222 * @seq: Sequence number of netlink message
@@ -258,7 +259,7 @@ batadv_v_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
258} 259}
259 260
260/** 261/**
261 * batadv_v_neigh_dump_hardif - Dump the neighbours of a hard interface into 262 * batadv_v_neigh_dump_hardif() - Dump the neighbours of a hard interface into
262 * a message 263 * a message
263 * @msg: Netlink message to dump into 264 * @msg: Netlink message to dump into
264 * @portid: Port making netlink request 265 * @portid: Port making netlink request
@@ -296,7 +297,7 @@ batadv_v_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq,
296} 297}
297 298
298/** 299/**
299 * batadv_v_neigh_dump - Dump the neighbours of a hard interface into a 300 * batadv_v_neigh_dump() - Dump the neighbours of a hard interface into a
300 * message 301 * message
301 * @msg: Netlink message to dump into 302 * @msg: Netlink message to dump into
302 * @cb: Control block containing additional options 303 * @cb: Control block containing additional options
@@ -348,7 +349,7 @@ batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
348 349
349#ifdef CONFIG_BATMAN_ADV_DEBUGFS 350#ifdef CONFIG_BATMAN_ADV_DEBUGFS
350/** 351/**
351 * batadv_v_orig_print - print the originator table 352 * batadv_v_orig_print() - print the originator table
352 * @bat_priv: the bat priv with all the soft interface information 353 * @bat_priv: the bat priv with all the soft interface information
353 * @seq: debugfs table seq_file struct 354 * @seq: debugfs table seq_file struct
354 * @if_outgoing: the outgoing interface for which this should be printed 355 * @if_outgoing: the outgoing interface for which this should be printed
@@ -416,8 +417,7 @@ next:
416#endif 417#endif
417 418
418/** 419/**
419 * batadv_v_orig_dump_subentry - Dump an originator subentry into a 420 * batadv_v_orig_dump_subentry() - Dump an originator subentry into a message
420 * message
421 * @msg: Netlink message to dump into 421 * @msg: Netlink message to dump into
422 * @portid: Port making netlink request 422 * @portid: Port making netlink request
423 * @seq: Sequence number of netlink message 423 * @seq: Sequence number of netlink message
@@ -483,7 +483,7 @@ batadv_v_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
483} 483}
484 484
485/** 485/**
486 * batadv_v_orig_dump_entry - Dump an originator entry into a message 486 * batadv_v_orig_dump_entry() - Dump an originator entry into a message
487 * @msg: Netlink message to dump into 487 * @msg: Netlink message to dump into
488 * @portid: Port making netlink request 488 * @portid: Port making netlink request
489 * @seq: Sequence number of netlink message 489 * @seq: Sequence number of netlink message
@@ -536,8 +536,7 @@ batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
536} 536}
537 537
538/** 538/**
539 * batadv_v_orig_dump_bucket - Dump an originator bucket into a 539 * batadv_v_orig_dump_bucket() - Dump an originator bucket into a message
540 * message
541 * @msg: Netlink message to dump into 540 * @msg: Netlink message to dump into
542 * @portid: Port making netlink request 541 * @portid: Port making netlink request
543 * @seq: Sequence number of netlink message 542 * @seq: Sequence number of netlink message
@@ -578,7 +577,7 @@ batadv_v_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
578} 577}
579 578
580/** 579/**
581 * batadv_v_orig_dump - Dump the originators into a message 580 * batadv_v_orig_dump() - Dump the originators into a message
582 * @msg: Netlink message to dump into 581 * @msg: Netlink message to dump into
583 * @cb: Control block containing additional options 582 * @cb: Control block containing additional options
584 * @bat_priv: The bat priv with all the soft interface information 583 * @bat_priv: The bat priv with all the soft interface information
@@ -668,7 +667,7 @@ err_ifinfo1:
668} 667}
669 668
670/** 669/**
671 * batadv_v_init_sel_class - initialize GW selection class 670 * batadv_v_init_sel_class() - initialize GW selection class
672 * @bat_priv: the bat priv with all the soft interface information 671 * @bat_priv: the bat priv with all the soft interface information
673 */ 672 */
674static void batadv_v_init_sel_class(struct batadv_priv *bat_priv) 673static void batadv_v_init_sel_class(struct batadv_priv *bat_priv)
@@ -704,7 +703,7 @@ static ssize_t batadv_v_show_sel_class(struct batadv_priv *bat_priv, char *buff)
704} 703}
705 704
706/** 705/**
707 * batadv_v_gw_throughput_get - retrieve the GW-bandwidth for a given GW 706 * batadv_v_gw_throughput_get() - retrieve the GW-bandwidth for a given GW
708 * @gw_node: the GW to retrieve the metric for 707 * @gw_node: the GW to retrieve the metric for
709 * @bw: the pointer where the metric will be stored. The metric is computed as 708 * @bw: the pointer where the metric will be stored. The metric is computed as
710 * the minimum between the GW advertised throughput and the path throughput to 709 * the minimum between the GW advertised throughput and the path throughput to
@@ -747,7 +746,7 @@ out:
747} 746}
748 747
749/** 748/**
750 * batadv_v_gw_get_best_gw_node - retrieve the best GW node 749 * batadv_v_gw_get_best_gw_node() - retrieve the best GW node
751 * @bat_priv: the bat priv with all the soft interface information 750 * @bat_priv: the bat priv with all the soft interface information
752 * 751 *
753 * Return: the GW node having the best GW-metric, NULL if no GW is known 752 * Return: the GW node having the best GW-metric, NULL if no GW is known
@@ -785,7 +784,7 @@ next:
785} 784}
786 785
787/** 786/**
788 * batadv_v_gw_is_eligible - check if a originator would be selected as GW 787 * batadv_v_gw_is_eligible() - check if a originator would be selected as GW
789 * @bat_priv: the bat priv with all the soft interface information 788 * @bat_priv: the bat priv with all the soft interface information
790 * @curr_gw_orig: originator representing the currently selected GW 789 * @curr_gw_orig: originator representing the currently selected GW
791 * @orig_node: the originator representing the new candidate 790 * @orig_node: the originator representing the new candidate
@@ -884,7 +883,7 @@ out:
884} 883}
885 884
886/** 885/**
887 * batadv_v_gw_print - print the gateway list 886 * batadv_v_gw_print() - print the gateway list
888 * @bat_priv: the bat priv with all the soft interface information 887 * @bat_priv: the bat priv with all the soft interface information
889 * @seq: gateway table seq_file struct 888 * @seq: gateway table seq_file struct
890 */ 889 */
@@ -913,7 +912,7 @@ static void batadv_v_gw_print(struct batadv_priv *bat_priv,
913#endif 912#endif
914 913
915/** 914/**
916 * batadv_v_gw_dump_entry - Dump a gateway into a message 915 * batadv_v_gw_dump_entry() - Dump a gateway into a message
917 * @msg: Netlink message to dump into 916 * @msg: Netlink message to dump into
918 * @portid: Port making netlink request 917 * @portid: Port making netlink request
919 * @seq: Sequence number of netlink message 918 * @seq: Sequence number of netlink message
@@ -1004,7 +1003,7 @@ out:
1004} 1003}
1005 1004
1006/** 1005/**
1007 * batadv_v_gw_dump - Dump gateways into a message 1006 * batadv_v_gw_dump() - Dump gateways into a message
1008 * @msg: Netlink message to dump into 1007 * @msg: Netlink message to dump into
1009 * @cb: Control block containing additional options 1008 * @cb: Control block containing additional options
1010 * @bat_priv: The bat priv with all the soft interface information 1009 * @bat_priv: The bat priv with all the soft interface information
@@ -1074,7 +1073,7 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = {
1074}; 1073};
1075 1074
1076/** 1075/**
1077 * batadv_v_hardif_init - initialize the algorithm specific fields in the 1076 * batadv_v_hardif_init() - initialize the algorithm specific fields in the
1078 * hard-interface object 1077 * hard-interface object
1079 * @hard_iface: the hard-interface to initialize 1078 * @hard_iface: the hard-interface to initialize
1080 */ 1079 */
@@ -1088,7 +1087,7 @@ void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface)
1088} 1087}
1089 1088
1090/** 1089/**
1091 * batadv_v_mesh_init - initialize the B.A.T.M.A.N. V private resources for a 1090 * batadv_v_mesh_init() - initialize the B.A.T.M.A.N. V private resources for a
1092 * mesh 1091 * mesh
1093 * @bat_priv: the object representing the mesh interface to initialise 1092 * @bat_priv: the object representing the mesh interface to initialise
1094 * 1093 *
@@ -1106,7 +1105,7 @@ int batadv_v_mesh_init(struct batadv_priv *bat_priv)
1106} 1105}
1107 1106
1108/** 1107/**
1109 * batadv_v_mesh_free - free the B.A.T.M.A.N. V private resources for a mesh 1108 * batadv_v_mesh_free() - free the B.A.T.M.A.N. V private resources for a mesh
1110 * @bat_priv: the object representing the mesh interface to free 1109 * @bat_priv: the object representing the mesh interface to free
1111 */ 1110 */
1112void batadv_v_mesh_free(struct batadv_priv *bat_priv) 1111void batadv_v_mesh_free(struct batadv_priv *bat_priv)
@@ -1115,7 +1114,7 @@ void batadv_v_mesh_free(struct batadv_priv *bat_priv)
1115} 1114}
1116 1115
1117/** 1116/**
1118 * batadv_v_init - B.A.T.M.A.N. V initialization function 1117 * batadv_v_init() - B.A.T.M.A.N. V initialization function
1119 * 1118 *
1120 * Description: Takes care of initializing all the subcomponents. 1119 * Description: Takes care of initializing all the subcomponents.
1121 * It is invoked upon module load only. 1120 * It is invoked upon module load only.
diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h
index dd7c4b647e6b..a17ab68bbce8 100644
--- a/net/batman-adv/bat_v.h
+++ b/net/batman-adv/bat_v.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Linus Lüssing 4 * Marek Lindner, Linus Lüssing
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 1de992c58b35..a83478c46597 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Linus Lüssing, Marek Lindner 4 * Linus Lüssing, Marek Lindner
@@ -24,7 +25,7 @@
24#include <linux/errno.h> 25#include <linux/errno.h>
25#include <linux/etherdevice.h> 26#include <linux/etherdevice.h>
26#include <linux/ethtool.h> 27#include <linux/ethtool.h>
27#include <linux/fs.h> 28#include <linux/gfp.h>
28#include <linux/if_ether.h> 29#include <linux/if_ether.h>
29#include <linux/jiffies.h> 30#include <linux/jiffies.h>
30#include <linux/kernel.h> 31#include <linux/kernel.h>
@@ -41,18 +42,18 @@
41#include <linux/types.h> 42#include <linux/types.h>
42#include <linux/workqueue.h> 43#include <linux/workqueue.h>
43#include <net/cfg80211.h> 44#include <net/cfg80211.h>
45#include <uapi/linux/batadv_packet.h>
44 46
45#include "bat_algo.h" 47#include "bat_algo.h"
46#include "bat_v_ogm.h" 48#include "bat_v_ogm.h"
47#include "hard-interface.h" 49#include "hard-interface.h"
48#include "log.h" 50#include "log.h"
49#include "originator.h" 51#include "originator.h"
50#include "packet.h"
51#include "routing.h" 52#include "routing.h"
52#include "send.h" 53#include "send.h"
53 54
54/** 55/**
55 * batadv_v_elp_start_timer - restart timer for ELP periodic work 56 * batadv_v_elp_start_timer() - restart timer for ELP periodic work
56 * @hard_iface: the interface for which the timer has to be reset 57 * @hard_iface: the interface for which the timer has to be reset
57 */ 58 */
58static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface) 59static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface)
@@ -67,7 +68,7 @@ static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface)
67} 68}
68 69
69/** 70/**
70 * batadv_v_elp_get_throughput - get the throughput towards a neighbour 71 * batadv_v_elp_get_throughput() - get the throughput towards a neighbour
71 * @neigh: the neighbour for which the throughput has to be obtained 72 * @neigh: the neighbour for which the throughput has to be obtained
72 * 73 *
73 * Return: The throughput towards the given neighbour in multiples of 100kpbs 74 * Return: The throughput towards the given neighbour in multiples of 100kpbs
@@ -153,8 +154,8 @@ default_throughput:
153} 154}
154 155
155/** 156/**
156 * batadv_v_elp_throughput_metric_update - worker updating the throughput metric 157 * batadv_v_elp_throughput_metric_update() - worker updating the throughput
157 * of a single hop neighbour 158 * metric of a single hop neighbour
158 * @work: the work queue item 159 * @work: the work queue item
159 */ 160 */
160void batadv_v_elp_throughput_metric_update(struct work_struct *work) 161void batadv_v_elp_throughput_metric_update(struct work_struct *work)
@@ -177,7 +178,7 @@ void batadv_v_elp_throughput_metric_update(struct work_struct *work)
177} 178}
178 179
179/** 180/**
180 * batadv_v_elp_wifi_neigh_probe - send link probing packets to a neighbour 181 * batadv_v_elp_wifi_neigh_probe() - send link probing packets to a neighbour
181 * @neigh: the neighbour to probe 182 * @neigh: the neighbour to probe
182 * 183 *
183 * Sends a predefined number of unicast wifi packets to a given neighbour in 184 * Sends a predefined number of unicast wifi packets to a given neighbour in
@@ -240,7 +241,7 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh)
240} 241}
241 242
242/** 243/**
243 * batadv_v_elp_periodic_work - ELP periodic task per interface 244 * batadv_v_elp_periodic_work() - ELP periodic task per interface
244 * @work: work queue item 245 * @work: work queue item
245 * 246 *
246 * Emits broadcast ELP message in regular intervals. 247 * Emits broadcast ELP message in regular intervals.
@@ -327,7 +328,7 @@ out:
327} 328}
328 329
329/** 330/**
330 * batadv_v_elp_iface_enable - setup the ELP interface private resources 331 * batadv_v_elp_iface_enable() - setup the ELP interface private resources
331 * @hard_iface: interface for which the data has to be prepared 332 * @hard_iface: interface for which the data has to be prepared
332 * 333 *
333 * Return: 0 on success or a -ENOMEM in case of failure. 334 * Return: 0 on success or a -ENOMEM in case of failure.
@@ -375,7 +376,7 @@ out:
375} 376}
376 377
377/** 378/**
378 * batadv_v_elp_iface_disable - release ELP interface private resources 379 * batadv_v_elp_iface_disable() - release ELP interface private resources
379 * @hard_iface: interface for which the resources have to be released 380 * @hard_iface: interface for which the resources have to be released
380 */ 381 */
381void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface) 382void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface)
@@ -387,7 +388,7 @@ void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface)
387} 388}
388 389
389/** 390/**
390 * batadv_v_elp_iface_activate - update the ELP buffer belonging to the given 391 * batadv_v_elp_iface_activate() - update the ELP buffer belonging to the given
391 * hard-interface 392 * hard-interface
392 * @primary_iface: the new primary interface 393 * @primary_iface: the new primary interface
393 * @hard_iface: interface holding the to-be-updated buffer 394 * @hard_iface: interface holding the to-be-updated buffer
@@ -408,7 +409,7 @@ void batadv_v_elp_iface_activate(struct batadv_hard_iface *primary_iface,
408} 409}
409 410
410/** 411/**
411 * batadv_v_elp_primary_iface_set - change internal data to reflect the new 412 * batadv_v_elp_primary_iface_set() - change internal data to reflect the new
412 * primary interface 413 * primary interface
413 * @primary_iface: the new primary interface 414 * @primary_iface: the new primary interface
414 */ 415 */
@@ -428,7 +429,7 @@ void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface)
428} 429}
429 430
430/** 431/**
431 * batadv_v_elp_neigh_update - update an ELP neighbour node 432 * batadv_v_elp_neigh_update() - update an ELP neighbour node
432 * @bat_priv: the bat priv with all the soft interface information 433 * @bat_priv: the bat priv with all the soft interface information
433 * @neigh_addr: the neighbour interface address 434 * @neigh_addr: the neighbour interface address
434 * @if_incoming: the interface the packet was received through 435 * @if_incoming: the interface the packet was received through
@@ -488,7 +489,7 @@ orig_free:
488} 489}
489 490
490/** 491/**
491 * batadv_v_elp_packet_recv - main ELP packet handler 492 * batadv_v_elp_packet_recv() - main ELP packet handler
492 * @skb: the received packet 493 * @skb: the received packet
493 * @if_incoming: the interface this packet was received through 494 * @if_incoming: the interface this packet was received through
494 * 495 *
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index 376ead280ab9..5e39d0588a48 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Linus Lüssing, Marek Lindner 4 * Linus Lüssing, Marek Lindner
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index c251445a42a0..ba59b77c605d 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Antonio Quartulli 4 * Antonio Quartulli
@@ -22,7 +23,7 @@
22#include <linux/byteorder/generic.h> 23#include <linux/byteorder/generic.h>
23#include <linux/errno.h> 24#include <linux/errno.h>
24#include <linux/etherdevice.h> 25#include <linux/etherdevice.h>
25#include <linux/fs.h> 26#include <linux/gfp.h>
26#include <linux/if_ether.h> 27#include <linux/if_ether.h>
27#include <linux/jiffies.h> 28#include <linux/jiffies.h>
28#include <linux/kernel.h> 29#include <linux/kernel.h>
@@ -38,20 +39,20 @@
38#include <linux/string.h> 39#include <linux/string.h>
39#include <linux/types.h> 40#include <linux/types.h>
40#include <linux/workqueue.h> 41#include <linux/workqueue.h>
42#include <uapi/linux/batadv_packet.h>
41 43
42#include "bat_algo.h" 44#include "bat_algo.h"
43#include "hard-interface.h" 45#include "hard-interface.h"
44#include "hash.h" 46#include "hash.h"
45#include "log.h" 47#include "log.h"
46#include "originator.h" 48#include "originator.h"
47#include "packet.h"
48#include "routing.h" 49#include "routing.h"
49#include "send.h" 50#include "send.h"
50#include "translation-table.h" 51#include "translation-table.h"
51#include "tvlv.h" 52#include "tvlv.h"
52 53
53/** 54/**
54 * batadv_v_ogm_orig_get - retrieve and possibly create an originator node 55 * batadv_v_ogm_orig_get() - retrieve and possibly create an originator node
55 * @bat_priv: the bat priv with all the soft interface information 56 * @bat_priv: the bat priv with all the soft interface information
56 * @addr: the address of the originator 57 * @addr: the address of the originator
57 * 58 *
@@ -88,7 +89,7 @@ struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv,
88} 89}
89 90
90/** 91/**
91 * batadv_v_ogm_start_timer - restart the OGM sending timer 92 * batadv_v_ogm_start_timer() - restart the OGM sending timer
92 * @bat_priv: the bat priv with all the soft interface information 93 * @bat_priv: the bat priv with all the soft interface information
93 */ 94 */
94static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv) 95static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv)
@@ -107,7 +108,7 @@ static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv)
107} 108}
108 109
109/** 110/**
110 * batadv_v_ogm_send_to_if - send a batman ogm using a given interface 111 * batadv_v_ogm_send_to_if() - send a batman ogm using a given interface
111 * @skb: the OGM to send 112 * @skb: the OGM to send
112 * @hard_iface: the interface to use to send the OGM 113 * @hard_iface: the interface to use to send the OGM
113 */ 114 */
@@ -127,7 +128,7 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb,
127} 128}
128 129
129/** 130/**
130 * batadv_v_ogm_send - periodic worker broadcasting the own OGM 131 * batadv_v_ogm_send() - periodic worker broadcasting the own OGM
131 * @work: work queue item 132 * @work: work queue item
132 */ 133 */
133static void batadv_v_ogm_send(struct work_struct *work) 134static void batadv_v_ogm_send(struct work_struct *work)
@@ -235,7 +236,7 @@ out:
235} 236}
236 237
237/** 238/**
238 * batadv_v_ogm_iface_enable - prepare an interface for B.A.T.M.A.N. V 239 * batadv_v_ogm_iface_enable() - prepare an interface for B.A.T.M.A.N. V
239 * @hard_iface: the interface to prepare 240 * @hard_iface: the interface to prepare
240 * 241 *
241 * Takes care of scheduling own OGM sending routine for this interface. 242 * Takes care of scheduling own OGM sending routine for this interface.
@@ -252,7 +253,7 @@ int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
252} 253}
253 254
254/** 255/**
255 * batadv_v_ogm_primary_iface_set - set a new primary interface 256 * batadv_v_ogm_primary_iface_set() - set a new primary interface
256 * @primary_iface: the new primary interface 257 * @primary_iface: the new primary interface
257 */ 258 */
258void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface) 259void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface)
@@ -268,8 +269,8 @@ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface)
268} 269}
269 270
270/** 271/**
271 * batadv_v_forward_penalty - apply a penalty to the throughput metric forwarded 272 * batadv_v_forward_penalty() - apply a penalty to the throughput metric
272 * with B.A.T.M.A.N. V OGMs 273 * forwarded with B.A.T.M.A.N. V OGMs
273 * @bat_priv: the bat priv with all the soft interface information 274 * @bat_priv: the bat priv with all the soft interface information
274 * @if_incoming: the interface where the OGM has been received 275 * @if_incoming: the interface where the OGM has been received
275 * @if_outgoing: the interface where the OGM has to be forwarded to 276 * @if_outgoing: the interface where the OGM has to be forwarded to
@@ -314,7 +315,7 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv,
314} 315}
315 316
316/** 317/**
317 * batadv_v_ogm_forward - check conditions and forward an OGM to the given 318 * batadv_v_ogm_forward() - check conditions and forward an OGM to the given
318 * outgoing interface 319 * outgoing interface
319 * @bat_priv: the bat priv with all the soft interface information 320 * @bat_priv: the bat priv with all the soft interface information
320 * @ogm_received: previously received OGM to be forwarded 321 * @ogm_received: previously received OGM to be forwarded
@@ -405,7 +406,7 @@ out:
405} 406}
406 407
407/** 408/**
408 * batadv_v_ogm_metric_update - update route metric based on OGM 409 * batadv_v_ogm_metric_update() - update route metric based on OGM
409 * @bat_priv: the bat priv with all the soft interface information 410 * @bat_priv: the bat priv with all the soft interface information
410 * @ogm2: OGM2 structure 411 * @ogm2: OGM2 structure
411 * @orig_node: Originator structure for which the OGM has been received 412 * @orig_node: Originator structure for which the OGM has been received
@@ -490,7 +491,7 @@ out:
490} 491}
491 492
492/** 493/**
493 * batadv_v_ogm_route_update - update routes based on OGM 494 * batadv_v_ogm_route_update() - update routes based on OGM
494 * @bat_priv: the bat priv with all the soft interface information 495 * @bat_priv: the bat priv with all the soft interface information
495 * @ethhdr: the Ethernet header of the OGM2 496 * @ethhdr: the Ethernet header of the OGM2
496 * @ogm2: OGM2 structure 497 * @ogm2: OGM2 structure
@@ -590,7 +591,7 @@ out:
590} 591}
591 592
592/** 593/**
593 * batadv_v_ogm_process_per_outif - process a batman v OGM for an outgoing if 594 * batadv_v_ogm_process_per_outif() - process a batman v OGM for an outgoing if
594 * @bat_priv: the bat priv with all the soft interface information 595 * @bat_priv: the bat priv with all the soft interface information
595 * @ethhdr: the Ethernet header of the OGM2 596 * @ethhdr: the Ethernet header of the OGM2
596 * @ogm2: OGM2 structure 597 * @ogm2: OGM2 structure
@@ -639,7 +640,7 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv,
639} 640}
640 641
641/** 642/**
642 * batadv_v_ogm_aggr_packet - checks if there is another OGM aggregated 643 * batadv_v_ogm_aggr_packet() - checks if there is another OGM aggregated
643 * @buff_pos: current position in the skb 644 * @buff_pos: current position in the skb
644 * @packet_len: total length of the skb 645 * @packet_len: total length of the skb
645 * @tvlv_len: tvlv length of the previously considered OGM 646 * @tvlv_len: tvlv length of the previously considered OGM
@@ -659,7 +660,7 @@ static bool batadv_v_ogm_aggr_packet(int buff_pos, int packet_len,
659} 660}
660 661
661/** 662/**
662 * batadv_v_ogm_process - process an incoming batman v OGM 663 * batadv_v_ogm_process() - process an incoming batman v OGM
663 * @skb: the skb containing the OGM 664 * @skb: the skb containing the OGM
664 * @ogm_offset: offset to the OGM which should be processed (for aggregates) 665 * @ogm_offset: offset to the OGM which should be processed (for aggregates)
665 * @if_incoming: the interface where this packet was receved 666 * @if_incoming: the interface where this packet was receved
@@ -787,7 +788,7 @@ out:
787} 788}
788 789
789/** 790/**
790 * batadv_v_ogm_packet_recv - OGM2 receiving handler 791 * batadv_v_ogm_packet_recv() - OGM2 receiving handler
791 * @skb: the received OGM 792 * @skb: the received OGM
792 * @if_incoming: the interface where this OGM has been received 793 * @if_incoming: the interface where this OGM has been received
793 * 794 *
@@ -851,7 +852,7 @@ free_skb:
851} 852}
852 853
853/** 854/**
854 * batadv_v_ogm_init - initialise the OGM2 engine 855 * batadv_v_ogm_init() - initialise the OGM2 engine
855 * @bat_priv: the bat priv with all the soft interface information 856 * @bat_priv: the bat priv with all the soft interface information
856 * 857 *
857 * Return: 0 on success or a negative error code in case of failure 858 * Return: 0 on success or a negative error code in case of failure
@@ -884,7 +885,7 @@ int batadv_v_ogm_init(struct batadv_priv *bat_priv)
884} 885}
885 886
886/** 887/**
887 * batadv_v_ogm_free - free OGM private resources 888 * batadv_v_ogm_free() - free OGM private resources
888 * @bat_priv: the bat priv with all the soft interface information 889 * @bat_priv: the bat priv with all the soft interface information
889 */ 890 */
890void batadv_v_ogm_free(struct batadv_priv *bat_priv) 891void batadv_v_ogm_free(struct batadv_priv *bat_priv)
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index 2068770b542d..6a4c14ccc3c6 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Antonio Quartulli 4 * Antonio Quartulli
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index 2b070c7e31da..bdc1ef06e05b 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Simon Wunderlich, Marek Lindner 4 * Simon Wunderlich, Marek Lindner
@@ -32,7 +33,7 @@ static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n)
32} 33}
33 34
34/** 35/**
35 * batadv_bit_get_packet - receive and process one packet within the sequence 36 * batadv_bit_get_packet() - receive and process one packet within the sequence
36 * number window 37 * number window
37 * @priv: the bat priv with all the soft interface information 38 * @priv: the bat priv with all the soft interface information
38 * @seq_bits: pointer to the sequence number receive packet 39 * @seq_bits: pointer to the sequence number receive packet
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index cc262c9d97e0..ca9d0753dd6b 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Simon Wunderlich, Marek Lindner 4 * Simon Wunderlich, Marek Lindner
@@ -26,7 +27,7 @@
26#include <linux/types.h> 27#include <linux/types.h>
27 28
28/** 29/**
29 * batadv_test_bit - check if bit is set in the current window 30 * batadv_test_bit() - check if bit is set in the current window
30 * 31 *
31 * @seq_bits: pointer to the sequence number receive packet 32 * @seq_bits: pointer to the sequence number receive packet
32 * @last_seqno: latest sequence number in seq_bits 33 * @last_seqno: latest sequence number in seq_bits
@@ -46,7 +47,12 @@ static inline bool batadv_test_bit(const unsigned long *seq_bits,
46 return test_bit(diff, seq_bits) != 0; 47 return test_bit(diff, seq_bits) != 0;
47} 48}
48 49
49/* turn corresponding bit on, so we can remember that we got the packet */ 50/**
51 * batadv_set_bit() - Turn corresponding bit on, so we can remember that we got
52 * the packet
53 * @seq_bits: bitmap of the packet receive window
54 * @n: relative sequence number of newly received packet
55 */
50static inline void batadv_set_bit(unsigned long *seq_bits, s32 n) 56static inline void batadv_set_bit(unsigned long *seq_bits, s32 n)
51{ 57{
52 /* if too old, just drop it */ 58 /* if too old, just drop it */
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index cdd8e8e4df0b..fad47853ad3c 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Simon Wunderlich 4 * Simon Wunderlich
@@ -24,7 +25,7 @@
24#include <linux/crc16.h> 25#include <linux/crc16.h>
25#include <linux/errno.h> 26#include <linux/errno.h>
26#include <linux/etherdevice.h> 27#include <linux/etherdevice.h>
27#include <linux/fs.h> 28#include <linux/gfp.h>
28#include <linux/if_arp.h> 29#include <linux/if_arp.h>
29#include <linux/if_ether.h> 30#include <linux/if_ether.h>
30#include <linux/if_vlan.h> 31#include <linux/if_vlan.h>
@@ -49,6 +50,7 @@
49#include <net/genetlink.h> 50#include <net/genetlink.h>
50#include <net/netlink.h> 51#include <net/netlink.h>
51#include <net/sock.h> 52#include <net/sock.h>
53#include <uapi/linux/batadv_packet.h>
52#include <uapi/linux/batman_adv.h> 54#include <uapi/linux/batman_adv.h>
53 55
54#include "hard-interface.h" 56#include "hard-interface.h"
@@ -56,7 +58,6 @@
56#include "log.h" 58#include "log.h"
57#include "netlink.h" 59#include "netlink.h"
58#include "originator.h" 60#include "originator.h"
59#include "packet.h"
60#include "soft-interface.h" 61#include "soft-interface.h"
61#include "sysfs.h" 62#include "sysfs.h"
62#include "translation-table.h" 63#include "translation-table.h"
@@ -69,7 +70,7 @@ batadv_bla_send_announce(struct batadv_priv *bat_priv,
69 struct batadv_bla_backbone_gw *backbone_gw); 70 struct batadv_bla_backbone_gw *backbone_gw);
70 71
71/** 72/**
72 * batadv_choose_claim - choose the right bucket for a claim. 73 * batadv_choose_claim() - choose the right bucket for a claim.
73 * @data: data to hash 74 * @data: data to hash
74 * @size: size of the hash table 75 * @size: size of the hash table
75 * 76 *
@@ -87,7 +88,7 @@ static inline u32 batadv_choose_claim(const void *data, u32 size)
87} 88}
88 89
89/** 90/**
90 * batadv_choose_backbone_gw - choose the right bucket for a backbone gateway. 91 * batadv_choose_backbone_gw() - choose the right bucket for a backbone gateway.
91 * @data: data to hash 92 * @data: data to hash
92 * @size: size of the hash table 93 * @size: size of the hash table
93 * 94 *
@@ -105,7 +106,7 @@ static inline u32 batadv_choose_backbone_gw(const void *data, u32 size)
105} 106}
106 107
107/** 108/**
108 * batadv_compare_backbone_gw - compare address and vid of two backbone gws 109 * batadv_compare_backbone_gw() - compare address and vid of two backbone gws
109 * @node: list node of the first entry to compare 110 * @node: list node of the first entry to compare
110 * @data2: pointer to the second backbone gateway 111 * @data2: pointer to the second backbone gateway
111 * 112 *
@@ -129,7 +130,7 @@ static bool batadv_compare_backbone_gw(const struct hlist_node *node,
129} 130}
130 131
131/** 132/**
132 * batadv_compare_claim - compare address and vid of two claims 133 * batadv_compare_claim() - compare address and vid of two claims
133 * @node: list node of the first entry to compare 134 * @node: list node of the first entry to compare
134 * @data2: pointer to the second claims 135 * @data2: pointer to the second claims
135 * 136 *
@@ -153,7 +154,7 @@ static bool batadv_compare_claim(const struct hlist_node *node,
153} 154}
154 155
155/** 156/**
156 * batadv_backbone_gw_release - release backbone gw from lists and queue for 157 * batadv_backbone_gw_release() - release backbone gw from lists and queue for
157 * free after rcu grace period 158 * free after rcu grace period
158 * @ref: kref pointer of the backbone gw 159 * @ref: kref pointer of the backbone gw
159 */ 160 */
@@ -168,7 +169,7 @@ static void batadv_backbone_gw_release(struct kref *ref)
168} 169}
169 170
170/** 171/**
171 * batadv_backbone_gw_put - decrement the backbone gw refcounter and possibly 172 * batadv_backbone_gw_put() - decrement the backbone gw refcounter and possibly
172 * release it 173 * release it
173 * @backbone_gw: backbone gateway to be free'd 174 * @backbone_gw: backbone gateway to be free'd
174 */ 175 */
@@ -178,8 +179,8 @@ static void batadv_backbone_gw_put(struct batadv_bla_backbone_gw *backbone_gw)
178} 179}
179 180
180/** 181/**
181 * batadv_claim_release - release claim from lists and queue for free after rcu 182 * batadv_claim_release() - release claim from lists and queue for free after
182 * grace period 183 * rcu grace period
183 * @ref: kref pointer of the claim 184 * @ref: kref pointer of the claim
184 */ 185 */
185static void batadv_claim_release(struct kref *ref) 186static void batadv_claim_release(struct kref *ref)
@@ -204,8 +205,7 @@ static void batadv_claim_release(struct kref *ref)
204} 205}
205 206
206/** 207/**
207 * batadv_claim_put - decrement the claim refcounter and possibly 208 * batadv_claim_put() - decrement the claim refcounter and possibly release it
208 * release it
209 * @claim: claim to be free'd 209 * @claim: claim to be free'd
210 */ 210 */
211static void batadv_claim_put(struct batadv_bla_claim *claim) 211static void batadv_claim_put(struct batadv_bla_claim *claim)
@@ -214,7 +214,7 @@ static void batadv_claim_put(struct batadv_bla_claim *claim)
214} 214}
215 215
216/** 216/**
217 * batadv_claim_hash_find - looks for a claim in the claim hash 217 * batadv_claim_hash_find() - looks for a claim in the claim hash
218 * @bat_priv: the bat priv with all the soft interface information 218 * @bat_priv: the bat priv with all the soft interface information
219 * @data: search data (may be local/static data) 219 * @data: search data (may be local/static data)
220 * 220 *
@@ -253,7 +253,7 @@ batadv_claim_hash_find(struct batadv_priv *bat_priv,
253} 253}
254 254
255/** 255/**
256 * batadv_backbone_hash_find - looks for a backbone gateway in the hash 256 * batadv_backbone_hash_find() - looks for a backbone gateway in the hash
257 * @bat_priv: the bat priv with all the soft interface information 257 * @bat_priv: the bat priv with all the soft interface information
258 * @addr: the address of the originator 258 * @addr: the address of the originator
259 * @vid: the VLAN ID 259 * @vid: the VLAN ID
@@ -297,7 +297,7 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr,
297} 297}
298 298
299/** 299/**
300 * batadv_bla_del_backbone_claims - delete all claims for a backbone 300 * batadv_bla_del_backbone_claims() - delete all claims for a backbone
301 * @backbone_gw: backbone gateway where the claims should be removed 301 * @backbone_gw: backbone gateway where the claims should be removed
302 */ 302 */
303static void 303static void
@@ -337,7 +337,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
337} 337}
338 338
339/** 339/**
340 * batadv_bla_send_claim - sends a claim frame according to the provided info 340 * batadv_bla_send_claim() - sends a claim frame according to the provided info
341 * @bat_priv: the bat priv with all the soft interface information 341 * @bat_priv: the bat priv with all the soft interface information
342 * @mac: the mac address to be announced within the claim 342 * @mac: the mac address to be announced within the claim
343 * @vid: the VLAN ID 343 * @vid: the VLAN ID
@@ -457,7 +457,7 @@ out:
457} 457}
458 458
459/** 459/**
460 * batadv_bla_loopdetect_report - worker for reporting the loop 460 * batadv_bla_loopdetect_report() - worker for reporting the loop
461 * @work: work queue item 461 * @work: work queue item
462 * 462 *
463 * Throws an uevent, as the loopdetect check function can't do that itself 463 * Throws an uevent, as the loopdetect check function can't do that itself
@@ -487,7 +487,7 @@ static void batadv_bla_loopdetect_report(struct work_struct *work)
487} 487}
488 488
489/** 489/**
490 * batadv_bla_get_backbone_gw - finds or creates a backbone gateway 490 * batadv_bla_get_backbone_gw() - finds or creates a backbone gateway
491 * @bat_priv: the bat priv with all the soft interface information 491 * @bat_priv: the bat priv with all the soft interface information
492 * @orig: the mac address of the originator 492 * @orig: the mac address of the originator
493 * @vid: the VLAN ID 493 * @vid: the VLAN ID
@@ -560,7 +560,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
560} 560}
561 561
562/** 562/**
563 * batadv_bla_update_own_backbone_gw - updates the own backbone gw for a VLAN 563 * batadv_bla_update_own_backbone_gw() - updates the own backbone gw for a VLAN
564 * @bat_priv: the bat priv with all the soft interface information 564 * @bat_priv: the bat priv with all the soft interface information
565 * @primary_if: the selected primary interface 565 * @primary_if: the selected primary interface
566 * @vid: VLAN identifier 566 * @vid: VLAN identifier
@@ -586,7 +586,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv,
586} 586}
587 587
588/** 588/**
589 * batadv_bla_answer_request - answer a bla request by sending own claims 589 * batadv_bla_answer_request() - answer a bla request by sending own claims
590 * @bat_priv: the bat priv with all the soft interface information 590 * @bat_priv: the bat priv with all the soft interface information
591 * @primary_if: interface where the request came on 591 * @primary_if: interface where the request came on
592 * @vid: the vid where the request came on 592 * @vid: the vid where the request came on
@@ -636,7 +636,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv,
636} 636}
637 637
638/** 638/**
639 * batadv_bla_send_request - send a request to repeat claims 639 * batadv_bla_send_request() - send a request to repeat claims
640 * @backbone_gw: the backbone gateway from whom we are out of sync 640 * @backbone_gw: the backbone gateway from whom we are out of sync
641 * 641 *
642 * When the crc is wrong, ask the backbone gateway for a full table update. 642 * When the crc is wrong, ask the backbone gateway for a full table update.
@@ -663,7 +663,7 @@ static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw)
663} 663}
664 664
665/** 665/**
666 * batadv_bla_send_announce - Send an announcement frame 666 * batadv_bla_send_announce() - Send an announcement frame
667 * @bat_priv: the bat priv with all the soft interface information 667 * @bat_priv: the bat priv with all the soft interface information
668 * @backbone_gw: our backbone gateway which should be announced 668 * @backbone_gw: our backbone gateway which should be announced
669 */ 669 */
@@ -684,7 +684,7 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
684} 684}
685 685
686/** 686/**
687 * batadv_bla_add_claim - Adds a claim in the claim hash 687 * batadv_bla_add_claim() - Adds a claim in the claim hash
688 * @bat_priv: the bat priv with all the soft interface information 688 * @bat_priv: the bat priv with all the soft interface information
689 * @mac: the mac address of the claim 689 * @mac: the mac address of the claim
690 * @vid: the VLAN ID of the frame 690 * @vid: the VLAN ID of the frame
@@ -774,7 +774,7 @@ claim_free_ref:
774} 774}
775 775
776/** 776/**
777 * batadv_bla_claim_get_backbone_gw - Get valid reference for backbone_gw of 777 * batadv_bla_claim_get_backbone_gw() - Get valid reference for backbone_gw of
778 * claim 778 * claim
779 * @claim: claim whose backbone_gw should be returned 779 * @claim: claim whose backbone_gw should be returned
780 * 780 *
@@ -794,7 +794,7 @@ batadv_bla_claim_get_backbone_gw(struct batadv_bla_claim *claim)
794} 794}
795 795
796/** 796/**
797 * batadv_bla_del_claim - delete a claim from the claim hash 797 * batadv_bla_del_claim() - delete a claim from the claim hash
798 * @bat_priv: the bat priv with all the soft interface information 798 * @bat_priv: the bat priv with all the soft interface information
799 * @mac: mac address of the claim to be removed 799 * @mac: mac address of the claim to be removed
800 * @vid: VLAN id for the claim to be removed 800 * @vid: VLAN id for the claim to be removed
@@ -822,7 +822,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
822} 822}
823 823
824/** 824/**
825 * batadv_handle_announce - check for ANNOUNCE frame 825 * batadv_handle_announce() - check for ANNOUNCE frame
826 * @bat_priv: the bat priv with all the soft interface information 826 * @bat_priv: the bat priv with all the soft interface information
827 * @an_addr: announcement mac address (ARP Sender HW address) 827 * @an_addr: announcement mac address (ARP Sender HW address)
828 * @backbone_addr: originator address of the sender (Ethernet source MAC) 828 * @backbone_addr: originator address of the sender (Ethernet source MAC)
@@ -880,7 +880,7 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
880} 880}
881 881
882/** 882/**
883 * batadv_handle_request - check for REQUEST frame 883 * batadv_handle_request() - check for REQUEST frame
884 * @bat_priv: the bat priv with all the soft interface information 884 * @bat_priv: the bat priv with all the soft interface information
885 * @primary_if: the primary hard interface of this batman soft interface 885 * @primary_if: the primary hard interface of this batman soft interface
886 * @backbone_addr: backbone address to be requested (ARP sender HW MAC) 886 * @backbone_addr: backbone address to be requested (ARP sender HW MAC)
@@ -913,7 +913,7 @@ static bool batadv_handle_request(struct batadv_priv *bat_priv,
913} 913}
914 914
915/** 915/**
916 * batadv_handle_unclaim - check for UNCLAIM frame 916 * batadv_handle_unclaim() - check for UNCLAIM frame
917 * @bat_priv: the bat priv with all the soft interface information 917 * @bat_priv: the bat priv with all the soft interface information
918 * @primary_if: the primary hard interface of this batman soft interface 918 * @primary_if: the primary hard interface of this batman soft interface
919 * @backbone_addr: originator address of the backbone (Ethernet source) 919 * @backbone_addr: originator address of the backbone (Ethernet source)
@@ -951,7 +951,7 @@ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv,
951} 951}
952 952
953/** 953/**
954 * batadv_handle_claim - check for CLAIM frame 954 * batadv_handle_claim() - check for CLAIM frame
955 * @bat_priv: the bat priv with all the soft interface information 955 * @bat_priv: the bat priv with all the soft interface information
956 * @primary_if: the primary hard interface of this batman soft interface 956 * @primary_if: the primary hard interface of this batman soft interface
957 * @backbone_addr: originator address of the backbone (Ethernet Source) 957 * @backbone_addr: originator address of the backbone (Ethernet Source)
@@ -988,7 +988,7 @@ static bool batadv_handle_claim(struct batadv_priv *bat_priv,
988} 988}
989 989
990/** 990/**
991 * batadv_check_claim_group - check for claim group membership 991 * batadv_check_claim_group() - check for claim group membership
992 * @bat_priv: the bat priv with all the soft interface information 992 * @bat_priv: the bat priv with all the soft interface information
993 * @primary_if: the primary interface of this batman interface 993 * @primary_if: the primary interface of this batman interface
994 * @hw_src: the Hardware source in the ARP Header 994 * @hw_src: the Hardware source in the ARP Header
@@ -1063,7 +1063,7 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
1063} 1063}
1064 1064
1065/** 1065/**
1066 * batadv_bla_process_claim - Check if this is a claim frame, and process it 1066 * batadv_bla_process_claim() - Check if this is a claim frame, and process it
1067 * @bat_priv: the bat priv with all the soft interface information 1067 * @bat_priv: the bat priv with all the soft interface information
1068 * @primary_if: the primary hard interface of this batman soft interface 1068 * @primary_if: the primary hard interface of this batman soft interface
1069 * @skb: the frame to be checked 1069 * @skb: the frame to be checked
@@ -1205,7 +1205,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv,
1205} 1205}
1206 1206
1207/** 1207/**
1208 * batadv_bla_purge_backbone_gw - Remove backbone gateways after a timeout or 1208 * batadv_bla_purge_backbone_gw() - Remove backbone gateways after a timeout or
1209 * immediately 1209 * immediately
1210 * @bat_priv: the bat priv with all the soft interface information 1210 * @bat_priv: the bat priv with all the soft interface information
1211 * @now: whether the whole hash shall be wiped now 1211 * @now: whether the whole hash shall be wiped now
@@ -1258,7 +1258,7 @@ purge_now:
1258} 1258}
1259 1259
1260/** 1260/**
1261 * batadv_bla_purge_claims - Remove claims after a timeout or immediately 1261 * batadv_bla_purge_claims() - Remove claims after a timeout or immediately
1262 * @bat_priv: the bat priv with all the soft interface information 1262 * @bat_priv: the bat priv with all the soft interface information
1263 * @primary_if: the selected primary interface, may be NULL if now is set 1263 * @primary_if: the selected primary interface, may be NULL if now is set
1264 * @now: whether the whole hash shall be wiped now 1264 * @now: whether the whole hash shall be wiped now
@@ -1316,7 +1316,7 @@ skip:
1316} 1316}
1317 1317
1318/** 1318/**
1319 * batadv_bla_update_orig_address - Update the backbone gateways when the own 1319 * batadv_bla_update_orig_address() - Update the backbone gateways when the own
1320 * originator address changes 1320 * originator address changes
1321 * @bat_priv: the bat priv with all the soft interface information 1321 * @bat_priv: the bat priv with all the soft interface information
1322 * @primary_if: the new selected primary_if 1322 * @primary_if: the new selected primary_if
@@ -1372,7 +1372,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
1372} 1372}
1373 1373
1374/** 1374/**
1375 * batadv_bla_send_loopdetect - send a loopdetect frame 1375 * batadv_bla_send_loopdetect() - send a loopdetect frame
1376 * @bat_priv: the bat priv with all the soft interface information 1376 * @bat_priv: the bat priv with all the soft interface information
1377 * @backbone_gw: the backbone gateway for which a loop should be detected 1377 * @backbone_gw: the backbone gateway for which a loop should be detected
1378 * 1378 *
@@ -1392,7 +1392,7 @@ batadv_bla_send_loopdetect(struct batadv_priv *bat_priv,
1392} 1392}
1393 1393
1394/** 1394/**
1395 * batadv_bla_status_update - purge bla interfaces if necessary 1395 * batadv_bla_status_update() - purge bla interfaces if necessary
1396 * @net_dev: the soft interface net device 1396 * @net_dev: the soft interface net device
1397 */ 1397 */
1398void batadv_bla_status_update(struct net_device *net_dev) 1398void batadv_bla_status_update(struct net_device *net_dev)
@@ -1412,7 +1412,7 @@ void batadv_bla_status_update(struct net_device *net_dev)
1412} 1412}
1413 1413
1414/** 1414/**
1415 * batadv_bla_periodic_work - performs periodic bla work 1415 * batadv_bla_periodic_work() - performs periodic bla work
1416 * @work: kernel work struct 1416 * @work: kernel work struct
1417 * 1417 *
1418 * periodic work to do: 1418 * periodic work to do:
@@ -1517,7 +1517,7 @@ static struct lock_class_key batadv_claim_hash_lock_class_key;
1517static struct lock_class_key batadv_backbone_hash_lock_class_key; 1517static struct lock_class_key batadv_backbone_hash_lock_class_key;
1518 1518
1519/** 1519/**
1520 * batadv_bla_init - initialize all bla structures 1520 * batadv_bla_init() - initialize all bla structures
1521 * @bat_priv: the bat priv with all the soft interface information 1521 * @bat_priv: the bat priv with all the soft interface information
1522 * 1522 *
1523 * Return: 0 on success, < 0 on error. 1523 * Return: 0 on success, < 0 on error.
@@ -1579,7 +1579,7 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
1579} 1579}
1580 1580
1581/** 1581/**
1582 * batadv_bla_check_bcast_duplist - Check if a frame is in the broadcast dup. 1582 * batadv_bla_check_bcast_duplist() - Check if a frame is in the broadcast dup.
1583 * @bat_priv: the bat priv with all the soft interface information 1583 * @bat_priv: the bat priv with all the soft interface information
1584 * @skb: contains the bcast_packet to be checked 1584 * @skb: contains the bcast_packet to be checked
1585 * 1585 *
@@ -1652,7 +1652,7 @@ out:
1652} 1652}
1653 1653
1654/** 1654/**
1655 * batadv_bla_is_backbone_gw_orig - Check if the originator is a gateway for 1655 * batadv_bla_is_backbone_gw_orig() - Check if the originator is a gateway for
1656 * the VLAN identified by vid. 1656 * the VLAN identified by vid.
1657 * @bat_priv: the bat priv with all the soft interface information 1657 * @bat_priv: the bat priv with all the soft interface information
1658 * @orig: originator mac address 1658 * @orig: originator mac address
@@ -1692,7 +1692,7 @@ bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
1692} 1692}
1693 1693
1694/** 1694/**
1695 * batadv_bla_is_backbone_gw - check if originator is a backbone gw for a VLAN. 1695 * batadv_bla_is_backbone_gw() - check if originator is a backbone gw for a VLAN
1696 * @skb: the frame to be checked 1696 * @skb: the frame to be checked
1697 * @orig_node: the orig_node of the frame 1697 * @orig_node: the orig_node of the frame
1698 * @hdr_size: maximum length of the frame 1698 * @hdr_size: maximum length of the frame
@@ -1726,7 +1726,7 @@ bool batadv_bla_is_backbone_gw(struct sk_buff *skb,
1726} 1726}
1727 1727
1728/** 1728/**
1729 * batadv_bla_free - free all bla structures 1729 * batadv_bla_free() - free all bla structures
1730 * @bat_priv: the bat priv with all the soft interface information 1730 * @bat_priv: the bat priv with all the soft interface information
1731 * 1731 *
1732 * for softinterface free or module unload 1732 * for softinterface free or module unload
@@ -1753,7 +1753,7 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
1753} 1753}
1754 1754
1755/** 1755/**
1756 * batadv_bla_loopdetect_check - check and handle a detected loop 1756 * batadv_bla_loopdetect_check() - check and handle a detected loop
1757 * @bat_priv: the bat priv with all the soft interface information 1757 * @bat_priv: the bat priv with all the soft interface information
1758 * @skb: the packet to check 1758 * @skb: the packet to check
1759 * @primary_if: interface where the request came on 1759 * @primary_if: interface where the request came on
@@ -1802,7 +1802,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb,
1802} 1802}
1803 1803
1804/** 1804/**
1805 * batadv_bla_rx - check packets coming from the mesh. 1805 * batadv_bla_rx() - check packets coming from the mesh.
1806 * @bat_priv: the bat priv with all the soft interface information 1806 * @bat_priv: the bat priv with all the soft interface information
1807 * @skb: the frame to be checked 1807 * @skb: the frame to be checked
1808 * @vid: the VLAN ID of the frame 1808 * @vid: the VLAN ID of the frame
@@ -1914,7 +1914,7 @@ out:
1914} 1914}
1915 1915
1916/** 1916/**
1917 * batadv_bla_tx - check packets going into the mesh 1917 * batadv_bla_tx() - check packets going into the mesh
1918 * @bat_priv: the bat priv with all the soft interface information 1918 * @bat_priv: the bat priv with all the soft interface information
1919 * @skb: the frame to be checked 1919 * @skb: the frame to be checked
1920 * @vid: the VLAN ID of the frame 1920 * @vid: the VLAN ID of the frame
@@ -2022,7 +2022,7 @@ out:
2022 2022
2023#ifdef CONFIG_BATMAN_ADV_DEBUGFS 2023#ifdef CONFIG_BATMAN_ADV_DEBUGFS
2024/** 2024/**
2025 * batadv_bla_claim_table_seq_print_text - print the claim table in a seq file 2025 * batadv_bla_claim_table_seq_print_text() - print the claim table in a seq file
2026 * @seq: seq file to print on 2026 * @seq: seq file to print on
2027 * @offset: not used 2027 * @offset: not used
2028 * 2028 *
@@ -2084,7 +2084,7 @@ out:
2084#endif 2084#endif
2085 2085
2086/** 2086/**
2087 * batadv_bla_claim_dump_entry - dump one entry of the claim table 2087 * batadv_bla_claim_dump_entry() - dump one entry of the claim table
2088 * to a netlink socket 2088 * to a netlink socket
2089 * @msg: buffer for the message 2089 * @msg: buffer for the message
2090 * @portid: netlink port 2090 * @portid: netlink port
@@ -2143,7 +2143,7 @@ out:
2143} 2143}
2144 2144
2145/** 2145/**
2146 * batadv_bla_claim_dump_bucket - dump one bucket of the claim table 2146 * batadv_bla_claim_dump_bucket() - dump one bucket of the claim table
2147 * to a netlink socket 2147 * to a netlink socket
2148 * @msg: buffer for the message 2148 * @msg: buffer for the message
2149 * @portid: netlink port 2149 * @portid: netlink port
@@ -2180,7 +2180,7 @@ unlock:
2180} 2180}
2181 2181
2182/** 2182/**
2183 * batadv_bla_claim_dump - dump claim table to a netlink socket 2183 * batadv_bla_claim_dump() - dump claim table to a netlink socket
2184 * @msg: buffer for the message 2184 * @msg: buffer for the message
2185 * @cb: callback structure containing arguments 2185 * @cb: callback structure containing arguments
2186 * 2186 *
@@ -2247,8 +2247,8 @@ out:
2247 2247
2248#ifdef CONFIG_BATMAN_ADV_DEBUGFS 2248#ifdef CONFIG_BATMAN_ADV_DEBUGFS
2249/** 2249/**
2250 * batadv_bla_backbone_table_seq_print_text - print the backbone table in a seq 2250 * batadv_bla_backbone_table_seq_print_text() - print the backbone table in a
2251 * file 2251 * seq file
2252 * @seq: seq file to print on 2252 * @seq: seq file to print on
2253 * @offset: not used 2253 * @offset: not used
2254 * 2254 *
@@ -2312,8 +2312,8 @@ out:
2312#endif 2312#endif
2313 2313
2314/** 2314/**
2315 * batadv_bla_backbone_dump_entry - dump one entry of the backbone table 2315 * batadv_bla_backbone_dump_entry() - dump one entry of the backbone table to a
2316 * to a netlink socket 2316 * netlink socket
2317 * @msg: buffer for the message 2317 * @msg: buffer for the message
2318 * @portid: netlink port 2318 * @portid: netlink port
2319 * @seq: Sequence number of netlink message 2319 * @seq: Sequence number of netlink message
@@ -2373,8 +2373,8 @@ out:
2373} 2373}
2374 2374
2375/** 2375/**
2376 * batadv_bla_backbone_dump_bucket - dump one bucket of the backbone table 2376 * batadv_bla_backbone_dump_bucket() - dump one bucket of the backbone table to
2377 * to a netlink socket 2377 * a netlink socket
2378 * @msg: buffer for the message 2378 * @msg: buffer for the message
2379 * @portid: netlink port 2379 * @portid: netlink port
2380 * @seq: Sequence number of netlink message 2380 * @seq: Sequence number of netlink message
@@ -2410,7 +2410,7 @@ unlock:
2410} 2410}
2411 2411
2412/** 2412/**
2413 * batadv_bla_backbone_dump - dump backbone table to a netlink socket 2413 * batadv_bla_backbone_dump() - dump backbone table to a netlink socket
2414 * @msg: buffer for the message 2414 * @msg: buffer for the message
2415 * @cb: callback structure containing arguments 2415 * @cb: callback structure containing arguments
2416 * 2416 *
@@ -2477,7 +2477,7 @@ out:
2477 2477
2478#ifdef CONFIG_BATMAN_ADV_DAT 2478#ifdef CONFIG_BATMAN_ADV_DAT
2479/** 2479/**
2480 * batadv_bla_check_claim - check if address is claimed 2480 * batadv_bla_check_claim() - check if address is claimed
2481 * 2481 *
2482 * @bat_priv: the bat priv with all the soft interface information 2482 * @bat_priv: the bat priv with all the soft interface information
2483 * @addr: mac address of which the claim status is checked 2483 * @addr: mac address of which the claim status is checked
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 234775748b8e..b27571abcd2f 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Simon Wunderlich 4 * Simon Wunderlich
@@ -30,8 +31,8 @@ struct seq_file;
30struct sk_buff; 31struct sk_buff;
31 32
32/** 33/**
33 * batadv_bla_is_loopdetect_mac - check if the mac address is from a loop detect 34 * batadv_bla_is_loopdetect_mac() - check if the mac address is from a loop
34 * frame sent by bridge loop avoidance 35 * detect frame sent by bridge loop avoidance
35 * @mac: mac address to check 36 * @mac: mac address to check
36 * 37 *
37 * Return: true if the it looks like a loop detect frame 38 * Return: true if the it looks like a loop detect frame
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index e32ad47c6efd..21d1189957a7 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner 4 * Marek Lindner
@@ -25,7 +26,6 @@
25#include <linux/fs.h> 26#include <linux/fs.h>
26#include <linux/netdevice.h> 27#include <linux/netdevice.h>
27#include <linux/printk.h> 28#include <linux/printk.h>
28#include <linux/sched.h> /* for linux/wait.h */
29#include <linux/seq_file.h> 29#include <linux/seq_file.h>
30#include <linux/stat.h> 30#include <linux/stat.h>
31#include <linux/stddef.h> 31#include <linux/stddef.h>
@@ -66,8 +66,8 @@ static int batadv_originators_open(struct inode *inode, struct file *file)
66} 66}
67 67
68/** 68/**
69 * batadv_originators_hardif_open - handles debugfs output for the 69 * batadv_originators_hardif_open() - handles debugfs output for the originator
70 * originator table of an hard interface 70 * table of an hard interface
71 * @inode: inode pointer to debugfs file 71 * @inode: inode pointer to debugfs file
72 * @file: pointer to the seq_file 72 * @file: pointer to the seq_file
73 * 73 *
@@ -117,7 +117,7 @@ static int batadv_bla_backbone_table_open(struct inode *inode,
117 117
118#ifdef CONFIG_BATMAN_ADV_DAT 118#ifdef CONFIG_BATMAN_ADV_DAT
119/** 119/**
120 * batadv_dat_cache_open - Prepare file handler for reads from dat_chache 120 * batadv_dat_cache_open() - Prepare file handler for reads from dat_chache
121 * @inode: inode which was opened 121 * @inode: inode which was opened
122 * @file: file handle to be initialized 122 * @file: file handle to be initialized
123 * 123 *
@@ -154,7 +154,7 @@ static int batadv_nc_nodes_open(struct inode *inode, struct file *file)
154 154
155#ifdef CONFIG_BATMAN_ADV_MCAST 155#ifdef CONFIG_BATMAN_ADV_MCAST
156/** 156/**
157 * batadv_mcast_flags_open - prepare file handler for reads from mcast_flags 157 * batadv_mcast_flags_open() - prepare file handler for reads from mcast_flags
158 * @inode: inode which was opened 158 * @inode: inode which was opened
159 * @file: file handle to be initialized 159 * @file: file handle to be initialized
160 * 160 *
@@ -259,6 +259,9 @@ static struct batadv_debuginfo *batadv_hardif_debuginfos[] = {
259 NULL, 259 NULL,
260}; 260};
261 261
262/**
263 * batadv_debugfs_init() - Initialize soft interface independent debugfs entries
264 */
262void batadv_debugfs_init(void) 265void batadv_debugfs_init(void)
263{ 266{
264 struct batadv_debuginfo **bat_debug; 267 struct batadv_debuginfo **bat_debug;
@@ -289,6 +292,9 @@ err:
289 batadv_debugfs = NULL; 292 batadv_debugfs = NULL;
290} 293}
291 294
295/**
296 * batadv_debugfs_destroy() - Remove all debugfs entries
297 */
292void batadv_debugfs_destroy(void) 298void batadv_debugfs_destroy(void)
293{ 299{
294 debugfs_remove_recursive(batadv_debugfs); 300 debugfs_remove_recursive(batadv_debugfs);
@@ -296,7 +302,7 @@ void batadv_debugfs_destroy(void)
296} 302}
297 303
298/** 304/**
299 * batadv_debugfs_add_hardif - creates the base directory for a hard interface 305 * batadv_debugfs_add_hardif() - creates the base directory for a hard interface
300 * in debugfs. 306 * in debugfs.
301 * @hard_iface: hard interface which should be added. 307 * @hard_iface: hard interface which should be added.
302 * 308 *
@@ -338,7 +344,7 @@ out:
338} 344}
339 345
340/** 346/**
341 * batadv_debugfs_del_hardif - delete the base directory for a hard interface 347 * batadv_debugfs_del_hardif() - delete the base directory for a hard interface
342 * in debugfs. 348 * in debugfs.
343 * @hard_iface: hard interface which is deleted. 349 * @hard_iface: hard interface which is deleted.
344 */ 350 */
@@ -355,6 +361,12 @@ void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface)
355 } 361 }
356} 362}
357 363
364/**
365 * batadv_debugfs_add_meshif() - Initialize interface dependent debugfs entries
366 * @dev: netdev struct of the soft interface
367 *
368 * Return: 0 on success or negative error number in case of failure
369 */
358int batadv_debugfs_add_meshif(struct net_device *dev) 370int batadv_debugfs_add_meshif(struct net_device *dev)
359{ 371{
360 struct batadv_priv *bat_priv = netdev_priv(dev); 372 struct batadv_priv *bat_priv = netdev_priv(dev);
@@ -401,6 +413,10 @@ out:
401 return -ENOMEM; 413 return -ENOMEM;
402} 414}
403 415
416/**
417 * batadv_debugfs_del_meshif() - Remove interface dependent debugfs entries
418 * @dev: netdev struct of the soft interface
419 */
404void batadv_debugfs_del_meshif(struct net_device *dev) 420void batadv_debugfs_del_meshif(struct net_device *dev)
405{ 421{
406 struct batadv_priv *bat_priv = netdev_priv(dev); 422 struct batadv_priv *bat_priv = netdev_priv(dev);
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 9c5d4a65b98c..90a08d35c501 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner 4 * Marek Lindner
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 760c0de72582..9703c791ffc5 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Antonio Quartulli 4 * Antonio Quartulli
@@ -23,7 +24,7 @@
23#include <linux/byteorder/generic.h> 24#include <linux/byteorder/generic.h>
24#include <linux/errno.h> 25#include <linux/errno.h>
25#include <linux/etherdevice.h> 26#include <linux/etherdevice.h>
26#include <linux/fs.h> 27#include <linux/gfp.h>
27#include <linux/if_arp.h> 28#include <linux/if_arp.h>
28#include <linux/if_ether.h> 29#include <linux/if_ether.h>
29#include <linux/if_vlan.h> 30#include <linux/if_vlan.h>
@@ -55,7 +56,7 @@
55static void batadv_dat_purge(struct work_struct *work); 56static void batadv_dat_purge(struct work_struct *work);
56 57
57/** 58/**
58 * batadv_dat_start_timer - initialise the DAT periodic worker 59 * batadv_dat_start_timer() - initialise the DAT periodic worker
59 * @bat_priv: the bat priv with all the soft interface information 60 * @bat_priv: the bat priv with all the soft interface information
60 */ 61 */
61static void batadv_dat_start_timer(struct batadv_priv *bat_priv) 62static void batadv_dat_start_timer(struct batadv_priv *bat_priv)
@@ -66,7 +67,7 @@ static void batadv_dat_start_timer(struct batadv_priv *bat_priv)
66} 67}
67 68
68/** 69/**
69 * batadv_dat_entry_release - release dat_entry from lists and queue for free 70 * batadv_dat_entry_release() - release dat_entry from lists and queue for free
70 * after rcu grace period 71 * after rcu grace period
71 * @ref: kref pointer of the dat_entry 72 * @ref: kref pointer of the dat_entry
72 */ 73 */
@@ -80,7 +81,7 @@ static void batadv_dat_entry_release(struct kref *ref)
80} 81}
81 82
82/** 83/**
83 * batadv_dat_entry_put - decrement the dat_entry refcounter and possibly 84 * batadv_dat_entry_put() - decrement the dat_entry refcounter and possibly
84 * release it 85 * release it
85 * @dat_entry: dat_entry to be free'd 86 * @dat_entry: dat_entry to be free'd
86 */ 87 */
@@ -90,7 +91,7 @@ static void batadv_dat_entry_put(struct batadv_dat_entry *dat_entry)
90} 91}
91 92
92/** 93/**
93 * batadv_dat_to_purge - check whether a dat_entry has to be purged or not 94 * batadv_dat_to_purge() - check whether a dat_entry has to be purged or not
94 * @dat_entry: the entry to check 95 * @dat_entry: the entry to check
95 * 96 *
96 * Return: true if the entry has to be purged now, false otherwise. 97 * Return: true if the entry has to be purged now, false otherwise.
@@ -102,7 +103,7 @@ static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry)
102} 103}
103 104
104/** 105/**
105 * __batadv_dat_purge - delete entries from the DAT local storage 106 * __batadv_dat_purge() - delete entries from the DAT local storage
106 * @bat_priv: the bat priv with all the soft interface information 107 * @bat_priv: the bat priv with all the soft interface information
107 * @to_purge: function in charge to decide whether an entry has to be purged or 108 * @to_purge: function in charge to decide whether an entry has to be purged or
108 * not. This function takes the dat_entry as argument and has to 109 * not. This function takes the dat_entry as argument and has to
@@ -145,8 +146,8 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv,
145} 146}
146 147
147/** 148/**
148 * batadv_dat_purge - periodic task that deletes old entries from the local DAT 149 * batadv_dat_purge() - periodic task that deletes old entries from the local
149 * hash table 150 * DAT hash table
150 * @work: kernel work struct 151 * @work: kernel work struct
151 */ 152 */
152static void batadv_dat_purge(struct work_struct *work) 153static void batadv_dat_purge(struct work_struct *work)
@@ -164,7 +165,7 @@ static void batadv_dat_purge(struct work_struct *work)
164} 165}
165 166
166/** 167/**
167 * batadv_compare_dat - comparing function used in the local DAT hash table 168 * batadv_compare_dat() - comparing function used in the local DAT hash table
168 * @node: node in the local table 169 * @node: node in the local table
169 * @data2: second object to compare the node to 170 * @data2: second object to compare the node to
170 * 171 *
@@ -179,7 +180,7 @@ static bool batadv_compare_dat(const struct hlist_node *node, const void *data2)
179} 180}
180 181
181/** 182/**
182 * batadv_arp_hw_src - extract the hw_src field from an ARP packet 183 * batadv_arp_hw_src() - extract the hw_src field from an ARP packet
183 * @skb: ARP packet 184 * @skb: ARP packet
184 * @hdr_size: size of the possible header before the ARP packet 185 * @hdr_size: size of the possible header before the ARP packet
185 * 186 *
@@ -196,7 +197,7 @@ static u8 *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size)
196} 197}
197 198
198/** 199/**
199 * batadv_arp_ip_src - extract the ip_src field from an ARP packet 200 * batadv_arp_ip_src() - extract the ip_src field from an ARP packet
200 * @skb: ARP packet 201 * @skb: ARP packet
201 * @hdr_size: size of the possible header before the ARP packet 202 * @hdr_size: size of the possible header before the ARP packet
202 * 203 *
@@ -208,7 +209,7 @@ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size)
208} 209}
209 210
210/** 211/**
211 * batadv_arp_hw_dst - extract the hw_dst field from an ARP packet 212 * batadv_arp_hw_dst() - extract the hw_dst field from an ARP packet
212 * @skb: ARP packet 213 * @skb: ARP packet
213 * @hdr_size: size of the possible header before the ARP packet 214 * @hdr_size: size of the possible header before the ARP packet
214 * 215 *
@@ -220,7 +221,7 @@ static u8 *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size)
220} 221}
221 222
222/** 223/**
223 * batadv_arp_ip_dst - extract the ip_dst field from an ARP packet 224 * batadv_arp_ip_dst() - extract the ip_dst field from an ARP packet
224 * @skb: ARP packet 225 * @skb: ARP packet
225 * @hdr_size: size of the possible header before the ARP packet 226 * @hdr_size: size of the possible header before the ARP packet
226 * 227 *
@@ -232,7 +233,7 @@ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size)
232} 233}
233 234
234/** 235/**
235 * batadv_hash_dat - compute the hash value for an IP address 236 * batadv_hash_dat() - compute the hash value for an IP address
236 * @data: data to hash 237 * @data: data to hash
237 * @size: size of the hash table 238 * @size: size of the hash table
238 * 239 *
@@ -267,7 +268,7 @@ static u32 batadv_hash_dat(const void *data, u32 size)
267} 268}
268 269
269/** 270/**
270 * batadv_dat_entry_hash_find - look for a given dat_entry in the local hash 271 * batadv_dat_entry_hash_find() - look for a given dat_entry in the local hash
271 * table 272 * table
272 * @bat_priv: the bat priv with all the soft interface information 273 * @bat_priv: the bat priv with all the soft interface information
273 * @ip: search key 274 * @ip: search key
@@ -310,7 +311,7 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip,
310} 311}
311 312
312/** 313/**
313 * batadv_dat_entry_add - add a new dat entry or update it if already exists 314 * batadv_dat_entry_add() - add a new dat entry or update it if already exists
314 * @bat_priv: the bat priv with all the soft interface information 315 * @bat_priv: the bat priv with all the soft interface information
315 * @ip: ipv4 to add/edit 316 * @ip: ipv4 to add/edit
316 * @mac_addr: mac address to assign to the given ipv4 317 * @mac_addr: mac address to assign to the given ipv4
@@ -367,7 +368,8 @@ out:
367#ifdef CONFIG_BATMAN_ADV_DEBUG 368#ifdef CONFIG_BATMAN_ADV_DEBUG
368 369
369/** 370/**
370 * batadv_dbg_arp - print a debug message containing all the ARP packet details 371 * batadv_dbg_arp() - print a debug message containing all the ARP packet
372 * details
371 * @bat_priv: the bat priv with all the soft interface information 373 * @bat_priv: the bat priv with all the soft interface information
372 * @skb: ARP packet 374 * @skb: ARP packet
373 * @hdr_size: size of the possible header before the ARP packet 375 * @hdr_size: size of the possible header before the ARP packet
@@ -448,7 +450,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
448#endif /* CONFIG_BATMAN_ADV_DEBUG */ 450#endif /* CONFIG_BATMAN_ADV_DEBUG */
449 451
450/** 452/**
451 * batadv_is_orig_node_eligible - check whether a node can be a DHT candidate 453 * batadv_is_orig_node_eligible() - check whether a node can be a DHT candidate
452 * @res: the array with the already selected candidates 454 * @res: the array with the already selected candidates
453 * @select: number of already selected candidates 455 * @select: number of already selected candidates
454 * @tmp_max: address of the currently evaluated node 456 * @tmp_max: address of the currently evaluated node
@@ -502,7 +504,7 @@ out:
502} 504}
503 505
504/** 506/**
505 * batadv_choose_next_candidate - select the next DHT candidate 507 * batadv_choose_next_candidate() - select the next DHT candidate
506 * @bat_priv: the bat priv with all the soft interface information 508 * @bat_priv: the bat priv with all the soft interface information
507 * @cands: candidates array 509 * @cands: candidates array
508 * @select: number of candidates already present in the array 510 * @select: number of candidates already present in the array
@@ -566,8 +568,8 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
566} 568}
567 569
568/** 570/**
569 * batadv_dat_select_candidates - select the nodes which the DHT message has to 571 * batadv_dat_select_candidates() - select the nodes which the DHT message has
570 * be sent to 572 * to be sent to
571 * @bat_priv: the bat priv with all the soft interface information 573 * @bat_priv: the bat priv with all the soft interface information
572 * @ip_dst: ipv4 to look up in the DHT 574 * @ip_dst: ipv4 to look up in the DHT
573 * @vid: VLAN identifier 575 * @vid: VLAN identifier
@@ -612,7 +614,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst,
612} 614}
613 615
614/** 616/**
615 * batadv_dat_send_data - send a payload to the selected candidates 617 * batadv_dat_send_data() - send a payload to the selected candidates
616 * @bat_priv: the bat priv with all the soft interface information 618 * @bat_priv: the bat priv with all the soft interface information
617 * @skb: payload to send 619 * @skb: payload to send
618 * @ip: the DHT key 620 * @ip: the DHT key
@@ -688,7 +690,7 @@ out:
688} 690}
689 691
690/** 692/**
691 * batadv_dat_tvlv_container_update - update the dat tvlv container after dat 693 * batadv_dat_tvlv_container_update() - update the dat tvlv container after dat
692 * setting change 694 * setting change
693 * @bat_priv: the bat priv with all the soft interface information 695 * @bat_priv: the bat priv with all the soft interface information
694 */ 696 */
@@ -710,7 +712,7 @@ static void batadv_dat_tvlv_container_update(struct batadv_priv *bat_priv)
710} 712}
711 713
712/** 714/**
713 * batadv_dat_status_update - update the dat tvlv container after dat 715 * batadv_dat_status_update() - update the dat tvlv container after dat
714 * setting change 716 * setting change
715 * @net_dev: the soft interface net device 717 * @net_dev: the soft interface net device
716 */ 718 */
@@ -722,7 +724,7 @@ void batadv_dat_status_update(struct net_device *net_dev)
722} 724}
723 725
724/** 726/**
725 * batadv_dat_tvlv_ogm_handler_v1 - process incoming dat tvlv container 727 * batadv_dat_tvlv_ogm_handler_v1() - process incoming dat tvlv container
726 * @bat_priv: the bat priv with all the soft interface information 728 * @bat_priv: the bat priv with all the soft interface information
727 * @orig: the orig_node of the ogm 729 * @orig: the orig_node of the ogm
728 * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) 730 * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
@@ -741,7 +743,7 @@ static void batadv_dat_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
741} 743}
742 744
743/** 745/**
744 * batadv_dat_hash_free - free the local DAT hash table 746 * batadv_dat_hash_free() - free the local DAT hash table
745 * @bat_priv: the bat priv with all the soft interface information 747 * @bat_priv: the bat priv with all the soft interface information
746 */ 748 */
747static void batadv_dat_hash_free(struct batadv_priv *bat_priv) 749static void batadv_dat_hash_free(struct batadv_priv *bat_priv)
@@ -757,7 +759,7 @@ static void batadv_dat_hash_free(struct batadv_priv *bat_priv)
757} 759}
758 760
759/** 761/**
760 * batadv_dat_init - initialise the DAT internals 762 * batadv_dat_init() - initialise the DAT internals
761 * @bat_priv: the bat priv with all the soft interface information 763 * @bat_priv: the bat priv with all the soft interface information
762 * 764 *
763 * Return: 0 in case of success, a negative error code otherwise 765 * Return: 0 in case of success, a negative error code otherwise
@@ -782,7 +784,7 @@ int batadv_dat_init(struct batadv_priv *bat_priv)
782} 784}
783 785
784/** 786/**
785 * batadv_dat_free - free the DAT internals 787 * batadv_dat_free() - free the DAT internals
786 * @bat_priv: the bat priv with all the soft interface information 788 * @bat_priv: the bat priv with all the soft interface information
787 */ 789 */
788void batadv_dat_free(struct batadv_priv *bat_priv) 790void batadv_dat_free(struct batadv_priv *bat_priv)
@@ -797,7 +799,7 @@ void batadv_dat_free(struct batadv_priv *bat_priv)
797 799
798#ifdef CONFIG_BATMAN_ADV_DEBUGFS 800#ifdef CONFIG_BATMAN_ADV_DEBUGFS
799/** 801/**
800 * batadv_dat_cache_seq_print_text - print the local DAT hash table 802 * batadv_dat_cache_seq_print_text() - print the local DAT hash table
801 * @seq: seq file to print on 803 * @seq: seq file to print on
802 * @offset: not used 804 * @offset: not used
803 * 805 *
@@ -850,7 +852,7 @@ out:
850#endif 852#endif
851 853
852/** 854/**
853 * batadv_arp_get_type - parse an ARP packet and gets the type 855 * batadv_arp_get_type() - parse an ARP packet and gets the type
854 * @bat_priv: the bat priv with all the soft interface information 856 * @bat_priv: the bat priv with all the soft interface information
855 * @skb: packet to analyse 857 * @skb: packet to analyse
856 * @hdr_size: size of the possible header before the ARP packet in the skb 858 * @hdr_size: size of the possible header before the ARP packet in the skb
@@ -924,7 +926,7 @@ out:
924} 926}
925 927
926/** 928/**
927 * batadv_dat_get_vid - extract the VLAN identifier from skb if any 929 * batadv_dat_get_vid() - extract the VLAN identifier from skb if any
928 * @skb: the buffer containing the packet to extract the VID from 930 * @skb: the buffer containing the packet to extract the VID from
929 * @hdr_size: the size of the batman-adv header encapsulating the packet 931 * @hdr_size: the size of the batman-adv header encapsulating the packet
930 * 932 *
@@ -950,7 +952,7 @@ static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size)
950} 952}
951 953
952/** 954/**
953 * batadv_dat_arp_create_reply - create an ARP Reply 955 * batadv_dat_arp_create_reply() - create an ARP Reply
954 * @bat_priv: the bat priv with all the soft interface information 956 * @bat_priv: the bat priv with all the soft interface information
955 * @ip_src: ARP sender IP 957 * @ip_src: ARP sender IP
956 * @ip_dst: ARP target IP 958 * @ip_dst: ARP target IP
@@ -985,7 +987,7 @@ batadv_dat_arp_create_reply(struct batadv_priv *bat_priv, __be32 ip_src,
985} 987}
986 988
987/** 989/**
988 * batadv_dat_snoop_outgoing_arp_request - snoop the ARP request and try to 990 * batadv_dat_snoop_outgoing_arp_request() - snoop the ARP request and try to
989 * answer using DAT 991 * answer using DAT
990 * @bat_priv: the bat priv with all the soft interface information 992 * @bat_priv: the bat priv with all the soft interface information
991 * @skb: packet to check 993 * @skb: packet to check
@@ -1083,7 +1085,7 @@ out:
1083} 1085}
1084 1086
1085/** 1087/**
1086 * batadv_dat_snoop_incoming_arp_request - snoop the ARP request and try to 1088 * batadv_dat_snoop_incoming_arp_request() - snoop the ARP request and try to
1087 * answer using the local DAT storage 1089 * answer using the local DAT storage
1088 * @bat_priv: the bat priv with all the soft interface information 1090 * @bat_priv: the bat priv with all the soft interface information
1089 * @skb: packet to check 1091 * @skb: packet to check
@@ -1153,7 +1155,7 @@ out:
1153} 1155}
1154 1156
1155/** 1157/**
1156 * batadv_dat_snoop_outgoing_arp_reply - snoop the ARP reply and fill the DHT 1158 * batadv_dat_snoop_outgoing_arp_reply() - snoop the ARP reply and fill the DHT
1157 * @bat_priv: the bat priv with all the soft interface information 1159 * @bat_priv: the bat priv with all the soft interface information
1158 * @skb: packet to check 1160 * @skb: packet to check
1159 */ 1161 */
@@ -1193,8 +1195,8 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
1193} 1195}
1194 1196
1195/** 1197/**
1196 * batadv_dat_snoop_incoming_arp_reply - snoop the ARP reply and fill the local 1198 * batadv_dat_snoop_incoming_arp_reply() - snoop the ARP reply and fill the
1197 * DAT storage only 1199 * local DAT storage only
1198 * @bat_priv: the bat priv with all the soft interface information 1200 * @bat_priv: the bat priv with all the soft interface information
1199 * @skb: packet to check 1201 * @skb: packet to check
1200 * @hdr_size: size of the encapsulation header 1202 * @hdr_size: size of the encapsulation header
@@ -1282,8 +1284,8 @@ out:
1282} 1284}
1283 1285
1284/** 1286/**
1285 * batadv_dat_drop_broadcast_packet - check if an ARP request has to be dropped 1287 * batadv_dat_drop_broadcast_packet() - check if an ARP request has to be
1286 * (because the node has already obtained the reply via DAT) or not 1288 * dropped (because the node has already obtained the reply via DAT) or not
1287 * @bat_priv: the bat priv with all the soft interface information 1289 * @bat_priv: the bat priv with all the soft interface information
1288 * @forw_packet: the broadcast packet 1290 * @forw_packet: the broadcast packet
1289 * 1291 *
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index ec364a3c1c66..12897eb46268 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Antonio Quartulli 4 * Antonio Quartulli
@@ -23,9 +24,9 @@
23#include <linux/compiler.h> 24#include <linux/compiler.h>
24#include <linux/netdevice.h> 25#include <linux/netdevice.h>
25#include <linux/types.h> 26#include <linux/types.h>
27#include <uapi/linux/batadv_packet.h>
26 28
27#include "originator.h" 29#include "originator.h"
28#include "packet.h"
29 30
30struct seq_file; 31struct seq_file;
31struct sk_buff; 32struct sk_buff;
@@ -48,7 +49,7 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
48 struct batadv_forw_packet *forw_packet); 49 struct batadv_forw_packet *forw_packet);
49 50
50/** 51/**
51 * batadv_dat_init_orig_node_addr - assign a DAT address to the orig_node 52 * batadv_dat_init_orig_node_addr() - assign a DAT address to the orig_node
52 * @orig_node: the node to assign the DAT address to 53 * @orig_node: the node to assign the DAT address to
53 */ 54 */
54static inline void 55static inline void
@@ -61,7 +62,7 @@ batadv_dat_init_orig_node_addr(struct batadv_orig_node *orig_node)
61} 62}
62 63
63/** 64/**
64 * batadv_dat_init_own_addr - assign a DAT address to the node itself 65 * batadv_dat_init_own_addr() - assign a DAT address to the node itself
65 * @bat_priv: the bat priv with all the soft interface information 66 * @bat_priv: the bat priv with all the soft interface information
66 * @primary_if: a pointer to the primary interface 67 * @primary_if: a pointer to the primary interface
67 */ 68 */
@@ -82,7 +83,7 @@ void batadv_dat_free(struct batadv_priv *bat_priv);
82int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset); 83int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset);
83 84
84/** 85/**
85 * batadv_dat_inc_counter - increment the correct DAT packet counter 86 * batadv_dat_inc_counter() - increment the correct DAT packet counter
86 * @bat_priv: the bat priv with all the soft interface information 87 * @bat_priv: the bat priv with all the soft interface information
87 * @subtype: the 4addr subtype of the packet to be counted 88 * @subtype: the 4addr subtype of the packet to be counted
88 * 89 *
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index ebe6e38934e4..22dde42fd80e 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Martin Hundebøll <martin@hundeboll.net> 4 * Martin Hundebøll <martin@hundeboll.net>
@@ -22,7 +23,7 @@
22#include <linux/byteorder/generic.h> 23#include <linux/byteorder/generic.h>
23#include <linux/errno.h> 24#include <linux/errno.h>
24#include <linux/etherdevice.h> 25#include <linux/etherdevice.h>
25#include <linux/fs.h> 26#include <linux/gfp.h>
26#include <linux/if_ether.h> 27#include <linux/if_ether.h>
27#include <linux/jiffies.h> 28#include <linux/jiffies.h>
28#include <linux/kernel.h> 29#include <linux/kernel.h>
@@ -32,16 +33,16 @@
32#include <linux/slab.h> 33#include <linux/slab.h>
33#include <linux/spinlock.h> 34#include <linux/spinlock.h>
34#include <linux/string.h> 35#include <linux/string.h>
36#include <uapi/linux/batadv_packet.h>
35 37
36#include "hard-interface.h" 38#include "hard-interface.h"
37#include "originator.h" 39#include "originator.h"
38#include "packet.h"
39#include "routing.h" 40#include "routing.h"
40#include "send.h" 41#include "send.h"
41#include "soft-interface.h" 42#include "soft-interface.h"
42 43
43/** 44/**
44 * batadv_frag_clear_chain - delete entries in the fragment buffer chain 45 * batadv_frag_clear_chain() - delete entries in the fragment buffer chain
45 * @head: head of chain with entries. 46 * @head: head of chain with entries.
46 * @dropped: whether the chain is cleared because all fragments are dropped 47 * @dropped: whether the chain is cleared because all fragments are dropped
47 * 48 *
@@ -65,7 +66,7 @@ static void batadv_frag_clear_chain(struct hlist_head *head, bool dropped)
65} 66}
66 67
67/** 68/**
68 * batadv_frag_purge_orig - free fragments associated to an orig 69 * batadv_frag_purge_orig() - free fragments associated to an orig
69 * @orig_node: originator to free fragments from 70 * @orig_node: originator to free fragments from
70 * @check_cb: optional function to tell if an entry should be purged 71 * @check_cb: optional function to tell if an entry should be purged
71 */ 72 */
@@ -89,7 +90,7 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node,
89} 90}
90 91
91/** 92/**
92 * batadv_frag_size_limit - maximum possible size of packet to be fragmented 93 * batadv_frag_size_limit() - maximum possible size of packet to be fragmented
93 * 94 *
94 * Return: the maximum size of payload that can be fragmented. 95 * Return: the maximum size of payload that can be fragmented.
95 */ 96 */
@@ -104,7 +105,7 @@ static int batadv_frag_size_limit(void)
104} 105}
105 106
106/** 107/**
107 * batadv_frag_init_chain - check and prepare fragment chain for new fragment 108 * batadv_frag_init_chain() - check and prepare fragment chain for new fragment
108 * @chain: chain in fragments table to init 109 * @chain: chain in fragments table to init
109 * @seqno: sequence number of the received fragment 110 * @seqno: sequence number of the received fragment
110 * 111 *
@@ -134,7 +135,7 @@ static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain,
134} 135}
135 136
136/** 137/**
137 * batadv_frag_insert_packet - insert a fragment into a fragment chain 138 * batadv_frag_insert_packet() - insert a fragment into a fragment chain
138 * @orig_node: originator that the fragment was received from 139 * @orig_node: originator that the fragment was received from
139 * @skb: skb to insert 140 * @skb: skb to insert
140 * @chain_out: list head to attach complete chains of fragments to 141 * @chain_out: list head to attach complete chains of fragments to
@@ -248,7 +249,7 @@ err:
248} 249}
249 250
250/** 251/**
251 * batadv_frag_merge_packets - merge a chain of fragments 252 * batadv_frag_merge_packets() - merge a chain of fragments
252 * @chain: head of chain with fragments 253 * @chain: head of chain with fragments
253 * 254 *
254 * Expand the first skb in the chain and copy the content of the remaining 255 * Expand the first skb in the chain and copy the content of the remaining
@@ -306,7 +307,7 @@ free:
306} 307}
307 308
308/** 309/**
309 * batadv_frag_skb_buffer - buffer fragment for later merge 310 * batadv_frag_skb_buffer() - buffer fragment for later merge
310 * @skb: skb to buffer 311 * @skb: skb to buffer
311 * @orig_node_src: originator that the skb is received from 312 * @orig_node_src: originator that the skb is received from
312 * 313 *
@@ -346,7 +347,7 @@ out_err:
346} 347}
347 348
348/** 349/**
349 * batadv_frag_skb_fwd - forward fragments that would exceed MTU when merged 350 * batadv_frag_skb_fwd() - forward fragments that would exceed MTU when merged
350 * @skb: skb to forward 351 * @skb: skb to forward
351 * @recv_if: interface that the skb is received on 352 * @recv_if: interface that the skb is received on
352 * @orig_node_src: originator that the skb is received from 353 * @orig_node_src: originator that the skb is received from
@@ -400,7 +401,7 @@ out:
400} 401}
401 402
402/** 403/**
403 * batadv_frag_create - create a fragment from skb 404 * batadv_frag_create() - create a fragment from skb
404 * @skb: skb to create fragment from 405 * @skb: skb to create fragment from
405 * @frag_head: header to use in new fragment 406 * @frag_head: header to use in new fragment
406 * @fragment_size: size of new fragment 407 * @fragment_size: size of new fragment
@@ -438,7 +439,7 @@ err:
438} 439}
439 440
440/** 441/**
441 * batadv_frag_send_packet - create up to 16 fragments from the passed skb 442 * batadv_frag_send_packet() - create up to 16 fragments from the passed skb
442 * @skb: skb to create fragments from 443 * @skb: skb to create fragments from
443 * @orig_node: final destination of the created fragments 444 * @orig_node: final destination of the created fragments
444 * @neigh_node: next-hop of the created fragments 445 * @neigh_node: next-hop of the created fragments
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 1a2d6c308745..138b22a1836a 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Martin Hundebøll <martin@hundeboll.net> 4 * Martin Hundebøll <martin@hundeboll.net>
@@ -39,7 +40,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
39 struct batadv_neigh_node *neigh_node); 40 struct batadv_neigh_node *neigh_node);
40 41
41/** 42/**
42 * batadv_frag_check_entry - check if a list of fragments has timed out 43 * batadv_frag_check_entry() - check if a list of fragments has timed out
43 * @frags_entry: table entry to check 44 * @frags_entry: table entry to check
44 * 45 *
45 * Return: true if the frags entry has timed out, false otherwise. 46 * Return: true if the frags entry has timed out, false otherwise.
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 10d521f0b17f..37fe9a644f22 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner 4 * Marek Lindner
@@ -22,7 +23,7 @@
22#include <linux/byteorder/generic.h> 23#include <linux/byteorder/generic.h>
23#include <linux/errno.h> 24#include <linux/errno.h>
24#include <linux/etherdevice.h> 25#include <linux/etherdevice.h>
25#include <linux/fs.h> 26#include <linux/gfp.h>
26#include <linux/if_ether.h> 27#include <linux/if_ether.h>
27#include <linux/if_vlan.h> 28#include <linux/if_vlan.h>
28#include <linux/in.h> 29#include <linux/in.h>
@@ -42,6 +43,7 @@
42#include <linux/stddef.h> 43#include <linux/stddef.h>
43#include <linux/udp.h> 44#include <linux/udp.h>
44#include <net/sock.h> 45#include <net/sock.h>
46#include <uapi/linux/batadv_packet.h>
45#include <uapi/linux/batman_adv.h> 47#include <uapi/linux/batman_adv.h>
46 48
47#include "gateway_common.h" 49#include "gateway_common.h"
@@ -49,7 +51,6 @@
49#include "log.h" 51#include "log.h"
50#include "netlink.h" 52#include "netlink.h"
51#include "originator.h" 53#include "originator.h"
52#include "packet.h"
53#include "routing.h" 54#include "routing.h"
54#include "soft-interface.h" 55#include "soft-interface.h"
55#include "sysfs.h" 56#include "sysfs.h"
@@ -68,8 +69,8 @@
68#define BATADV_DHCP_CHADDR_OFFSET 28 69#define BATADV_DHCP_CHADDR_OFFSET 28
69 70
70/** 71/**
71 * batadv_gw_node_release - release gw_node from lists and queue for free after 72 * batadv_gw_node_release() - release gw_node from lists and queue for free
72 * rcu grace period 73 * after rcu grace period
73 * @ref: kref pointer of the gw_node 74 * @ref: kref pointer of the gw_node
74 */ 75 */
75static void batadv_gw_node_release(struct kref *ref) 76static void batadv_gw_node_release(struct kref *ref)
@@ -83,7 +84,8 @@ static void batadv_gw_node_release(struct kref *ref)
83} 84}
84 85
85/** 86/**
86 * batadv_gw_node_put - decrement the gw_node refcounter and possibly release it 87 * batadv_gw_node_put() - decrement the gw_node refcounter and possibly release
88 * it
87 * @gw_node: gateway node to free 89 * @gw_node: gateway node to free
88 */ 90 */
89void batadv_gw_node_put(struct batadv_gw_node *gw_node) 91void batadv_gw_node_put(struct batadv_gw_node *gw_node)
@@ -91,6 +93,12 @@ void batadv_gw_node_put(struct batadv_gw_node *gw_node)
91 kref_put(&gw_node->refcount, batadv_gw_node_release); 93 kref_put(&gw_node->refcount, batadv_gw_node_release);
92} 94}
93 95
96/**
97 * batadv_gw_get_selected_gw_node() - Get currently selected gateway
98 * @bat_priv: the bat priv with all the soft interface information
99 *
100 * Return: selected gateway (with increased refcnt), NULL on errors
101 */
94struct batadv_gw_node * 102struct batadv_gw_node *
95batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv) 103batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv)
96{ 104{
@@ -109,6 +117,12 @@ out:
109 return gw_node; 117 return gw_node;
110} 118}
111 119
120/**
121 * batadv_gw_get_selected_orig() - Get originator of currently selected gateway
122 * @bat_priv: the bat priv with all the soft interface information
123 *
124 * Return: orig_node of selected gateway (with increased refcnt), NULL on errors
125 */
112struct batadv_orig_node * 126struct batadv_orig_node *
113batadv_gw_get_selected_orig(struct batadv_priv *bat_priv) 127batadv_gw_get_selected_orig(struct batadv_priv *bat_priv)
114{ 128{
@@ -155,7 +169,7 @@ static void batadv_gw_select(struct batadv_priv *bat_priv,
155} 169}
156 170
157/** 171/**
158 * batadv_gw_reselect - force a gateway reselection 172 * batadv_gw_reselect() - force a gateway reselection
159 * @bat_priv: the bat priv with all the soft interface information 173 * @bat_priv: the bat priv with all the soft interface information
160 * 174 *
161 * Set a flag to remind the GW component to perform a new gateway reselection. 175 * Set a flag to remind the GW component to perform a new gateway reselection.
@@ -171,7 +185,7 @@ void batadv_gw_reselect(struct batadv_priv *bat_priv)
171} 185}
172 186
173/** 187/**
174 * batadv_gw_check_client_stop - check if client mode has been switched off 188 * batadv_gw_check_client_stop() - check if client mode has been switched off
175 * @bat_priv: the bat priv with all the soft interface information 189 * @bat_priv: the bat priv with all the soft interface information
176 * 190 *
177 * This function assumes the caller has checked that the gw state *is actually 191 * This function assumes the caller has checked that the gw state *is actually
@@ -202,6 +216,10 @@ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv)
202 batadv_gw_node_put(curr_gw); 216 batadv_gw_node_put(curr_gw);
203} 217}
204 218
219/**
220 * batadv_gw_election() - Elect the best gateway
221 * @bat_priv: the bat priv with all the soft interface information
222 */
205void batadv_gw_election(struct batadv_priv *bat_priv) 223void batadv_gw_election(struct batadv_priv *bat_priv)
206{ 224{
207 struct batadv_gw_node *curr_gw = NULL; 225 struct batadv_gw_node *curr_gw = NULL;
@@ -290,6 +308,11 @@ out:
290 batadv_neigh_ifinfo_put(router_ifinfo); 308 batadv_neigh_ifinfo_put(router_ifinfo);
291} 309}
292 310
311/**
312 * batadv_gw_check_election() - Elect orig node as best gateway when eligible
313 * @bat_priv: the bat priv with all the soft interface information
314 * @orig_node: orig node which is to be checked
315 */
293void batadv_gw_check_election(struct batadv_priv *bat_priv, 316void batadv_gw_check_election(struct batadv_priv *bat_priv,
294 struct batadv_orig_node *orig_node) 317 struct batadv_orig_node *orig_node)
295{ 318{
@@ -321,7 +344,7 @@ out:
321} 344}
322 345
323/** 346/**
324 * batadv_gw_node_add - add gateway node to list of available gateways 347 * batadv_gw_node_add() - add gateway node to list of available gateways
325 * @bat_priv: the bat priv with all the soft interface information 348 * @bat_priv: the bat priv with all the soft interface information
326 * @orig_node: originator announcing gateway capabilities 349 * @orig_node: originator announcing gateway capabilities
327 * @gateway: announced bandwidth information 350 * @gateway: announced bandwidth information
@@ -364,7 +387,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
364} 387}
365 388
366/** 389/**
367 * batadv_gw_node_get - retrieve gateway node from list of available gateways 390 * batadv_gw_node_get() - retrieve gateway node from list of available gateways
368 * @bat_priv: the bat priv with all the soft interface information 391 * @bat_priv: the bat priv with all the soft interface information
369 * @orig_node: originator announcing gateway capabilities 392 * @orig_node: originator announcing gateway capabilities
370 * 393 *
@@ -393,7 +416,7 @@ struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
393} 416}
394 417
395/** 418/**
396 * batadv_gw_node_update - update list of available gateways with changed 419 * batadv_gw_node_update() - update list of available gateways with changed
397 * bandwidth information 420 * bandwidth information
398 * @bat_priv: the bat priv with all the soft interface information 421 * @bat_priv: the bat priv with all the soft interface information
399 * @orig_node: originator announcing gateway capabilities 422 * @orig_node: originator announcing gateway capabilities
@@ -458,6 +481,11 @@ out:
458 batadv_gw_node_put(gw_node); 481 batadv_gw_node_put(gw_node);
459} 482}
460 483
484/**
485 * batadv_gw_node_delete() - Remove orig_node from gateway list
486 * @bat_priv: the bat priv with all the soft interface information
487 * @orig_node: orig node which is currently in process of being removed
488 */
461void batadv_gw_node_delete(struct batadv_priv *bat_priv, 489void batadv_gw_node_delete(struct batadv_priv *bat_priv,
462 struct batadv_orig_node *orig_node) 490 struct batadv_orig_node *orig_node)
463{ 491{
@@ -469,6 +497,10 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv,
469 batadv_gw_node_update(bat_priv, orig_node, &gateway); 497 batadv_gw_node_update(bat_priv, orig_node, &gateway);
470} 498}
471 499
500/**
501 * batadv_gw_node_free() - Free gateway information from soft interface
502 * @bat_priv: the bat priv with all the soft interface information
503 */
472void batadv_gw_node_free(struct batadv_priv *bat_priv) 504void batadv_gw_node_free(struct batadv_priv *bat_priv)
473{ 505{
474 struct batadv_gw_node *gw_node; 506 struct batadv_gw_node *gw_node;
@@ -484,6 +516,14 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv)
484} 516}
485 517
486#ifdef CONFIG_BATMAN_ADV_DEBUGFS 518#ifdef CONFIG_BATMAN_ADV_DEBUGFS
519
520/**
521 * batadv_gw_client_seq_print_text() - Print the gateway table in a seq file
522 * @seq: seq file to print on
523 * @offset: not used
524 *
525 * Return: always 0
526 */
487int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) 527int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
488{ 528{
489 struct net_device *net_dev = (struct net_device *)seq->private; 529 struct net_device *net_dev = (struct net_device *)seq->private;
@@ -514,7 +554,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
514#endif 554#endif
515 555
516/** 556/**
517 * batadv_gw_dump - Dump gateways into a message 557 * batadv_gw_dump() - Dump gateways into a message
518 * @msg: Netlink message to dump into 558 * @msg: Netlink message to dump into
519 * @cb: Control block containing additional options 559 * @cb: Control block containing additional options
520 * 560 *
@@ -567,7 +607,7 @@ out:
567} 607}
568 608
569/** 609/**
570 * batadv_gw_dhcp_recipient_get - check if a packet is a DHCP message 610 * batadv_gw_dhcp_recipient_get() - check if a packet is a DHCP message
571 * @skb: the packet to check 611 * @skb: the packet to check
572 * @header_len: a pointer to the batman-adv header size 612 * @header_len: a pointer to the batman-adv header size
573 * @chaddr: buffer where the client address will be stored. Valid 613 * @chaddr: buffer where the client address will be stored. Valid
@@ -686,7 +726,8 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
686} 726}
687 727
688/** 728/**
689 * batadv_gw_out_of_range - check if the dhcp request destination is the best gw 729 * batadv_gw_out_of_range() - check if the dhcp request destination is the best
730 * gateway
690 * @bat_priv: the bat priv with all the soft interface information 731 * @bat_priv: the bat priv with all the soft interface information
691 * @skb: the outgoing packet 732 * @skb: the outgoing packet
692 * 733 *
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 3baa3d466e5e..981f58421a32 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner 4 * Marek Lindner
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 2c26039c23fc..b3e156af2256 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner 4 * Marek Lindner
@@ -26,15 +27,15 @@
26#include <linux/netdevice.h> 27#include <linux/netdevice.h>
27#include <linux/stddef.h> 28#include <linux/stddef.h>
28#include <linux/string.h> 29#include <linux/string.h>
30#include <uapi/linux/batadv_packet.h>
29 31
30#include "gateway_client.h" 32#include "gateway_client.h"
31#include "log.h" 33#include "log.h"
32#include "packet.h"
33#include "tvlv.h" 34#include "tvlv.h"
34 35
35/** 36/**
36 * batadv_parse_throughput - parse supplied string buffer to extract throughput 37 * batadv_parse_throughput() - parse supplied string buffer to extract
37 * information 38 * throughput information
38 * @net_dev: the soft interface net device 39 * @net_dev: the soft interface net device
39 * @buff: string buffer to parse 40 * @buff: string buffer to parse
40 * @description: text shown when throughput string cannot be parsed 41 * @description: text shown when throughput string cannot be parsed
@@ -100,8 +101,8 @@ bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
100} 101}
101 102
102/** 103/**
103 * batadv_parse_gw_bandwidth - parse supplied string buffer to extract download 104 * batadv_parse_gw_bandwidth() - parse supplied string buffer to extract
104 * and upload bandwidth information 105 * download and upload bandwidth information
105 * @net_dev: the soft interface net device 106 * @net_dev: the soft interface net device
106 * @buff: string buffer to parse 107 * @buff: string buffer to parse
107 * @down: pointer holding the returned download bandwidth information 108 * @down: pointer holding the returned download bandwidth information
@@ -136,8 +137,8 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff,
136} 137}
137 138
138/** 139/**
139 * batadv_gw_tvlv_container_update - update the gw tvlv container after gateway 140 * batadv_gw_tvlv_container_update() - update the gw tvlv container after
140 * setting change 141 * gateway setting change
141 * @bat_priv: the bat priv with all the soft interface information 142 * @bat_priv: the bat priv with all the soft interface information
142 */ 143 */
143void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv) 144void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv)
@@ -164,6 +165,15 @@ void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv)
164 } 165 }
165} 166}
166 167
168/**
169 * batadv_gw_bandwidth_set() - Parse and set download/upload gateway bandwidth
170 * from supplied string buffer
171 * @net_dev: netdev struct of the soft interface
172 * @buff: the buffer containing the user data
173 * @count: number of bytes in the buffer
174 *
175 * Return: 'count' on success or a negative error code in case of failure
176 */
167ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff, 177ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
168 size_t count) 178 size_t count)
169{ 179{
@@ -207,7 +217,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
207} 217}
208 218
209/** 219/**
210 * batadv_gw_tvlv_ogm_handler_v1 - process incoming gateway tvlv container 220 * batadv_gw_tvlv_ogm_handler_v1() - process incoming gateway tvlv container
211 * @bat_priv: the bat priv with all the soft interface information 221 * @bat_priv: the bat priv with all the soft interface information
212 * @orig: the orig_node of the ogm 222 * @orig: the orig_node of the ogm
213 * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) 223 * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
@@ -248,7 +258,7 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
248} 258}
249 259
250/** 260/**
251 * batadv_gw_init - initialise the gateway handling internals 261 * batadv_gw_init() - initialise the gateway handling internals
252 * @bat_priv: the bat priv with all the soft interface information 262 * @bat_priv: the bat priv with all the soft interface information
253 */ 263 */
254void batadv_gw_init(struct batadv_priv *bat_priv) 264void batadv_gw_init(struct batadv_priv *bat_priv)
@@ -264,7 +274,7 @@ void batadv_gw_init(struct batadv_priv *bat_priv)
264} 274}
265 275
266/** 276/**
267 * batadv_gw_free - free the gateway handling internals 277 * batadv_gw_free() - free the gateway handling internals
268 * @bat_priv: the bat priv with all the soft interface information 278 * @bat_priv: the bat priv with all the soft interface information
269 */ 279 */
270void batadv_gw_free(struct batadv_priv *bat_priv) 280void batadv_gw_free(struct batadv_priv *bat_priv)
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index 0a6a97d201f2..afebd9c7edf4 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner 4 * Marek Lindner
@@ -32,11 +33,12 @@ enum batadv_gw_modes {
32 33
33/** 34/**
34 * enum batadv_bandwidth_units - bandwidth unit types 35 * enum batadv_bandwidth_units - bandwidth unit types
35 * @BATADV_BW_UNIT_KBIT: unit type kbit
36 * @BATADV_BW_UNIT_MBIT: unit type mbit
37 */ 36 */
38enum batadv_bandwidth_units { 37enum batadv_bandwidth_units {
38 /** @BATADV_BW_UNIT_KBIT: unit type kbit */
39 BATADV_BW_UNIT_KBIT, 39 BATADV_BW_UNIT_KBIT,
40
41 /** @BATADV_BW_UNIT_MBIT: unit type mbit */
40 BATADV_BW_UNIT_MBIT, 42 BATADV_BW_UNIT_MBIT,
41}; 43};
42 44
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 4e3d5340ad96..5f186bff284a 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
@@ -22,7 +23,7 @@
22#include <linux/bug.h> 23#include <linux/bug.h>
23#include <linux/byteorder/generic.h> 24#include <linux/byteorder/generic.h>
24#include <linux/errno.h> 25#include <linux/errno.h>
25#include <linux/fs.h> 26#include <linux/gfp.h>
26#include <linux/if.h> 27#include <linux/if.h>
27#include <linux/if_arp.h> 28#include <linux/if_arp.h>
28#include <linux/if_ether.h> 29#include <linux/if_ether.h>
@@ -37,6 +38,7 @@
37#include <linux/spinlock.h> 38#include <linux/spinlock.h>
38#include <net/net_namespace.h> 39#include <net/net_namespace.h>
39#include <net/rtnetlink.h> 40#include <net/rtnetlink.h>
41#include <uapi/linux/batadv_packet.h>
40 42
41#include "bat_v.h" 43#include "bat_v.h"
42#include "bridge_loop_avoidance.h" 44#include "bridge_loop_avoidance.h"
@@ -45,14 +47,13 @@
45#include "gateway_client.h" 47#include "gateway_client.h"
46#include "log.h" 48#include "log.h"
47#include "originator.h" 49#include "originator.h"
48#include "packet.h"
49#include "send.h" 50#include "send.h"
50#include "soft-interface.h" 51#include "soft-interface.h"
51#include "sysfs.h" 52#include "sysfs.h"
52#include "translation-table.h" 53#include "translation-table.h"
53 54
54/** 55/**
55 * batadv_hardif_release - release hard interface from lists and queue for 56 * batadv_hardif_release() - release hard interface from lists and queue for
56 * free after rcu grace period 57 * free after rcu grace period
57 * @ref: kref pointer of the hard interface 58 * @ref: kref pointer of the hard interface
58 */ 59 */
@@ -66,6 +67,12 @@ void batadv_hardif_release(struct kref *ref)
66 kfree_rcu(hard_iface, rcu); 67 kfree_rcu(hard_iface, rcu);
67} 68}
68 69
70/**
71 * batadv_hardif_get_by_netdev() - Get hard interface object of a net_device
72 * @net_dev: net_device to search for
73 *
74 * Return: batadv_hard_iface of net_dev (with increased refcnt), NULL on errors
75 */
69struct batadv_hard_iface * 76struct batadv_hard_iface *
70batadv_hardif_get_by_netdev(const struct net_device *net_dev) 77batadv_hardif_get_by_netdev(const struct net_device *net_dev)
71{ 78{
@@ -86,7 +93,7 @@ out:
86} 93}
87 94
88/** 95/**
89 * batadv_getlink_net - return link net namespace (of use fallback) 96 * batadv_getlink_net() - return link net namespace (of use fallback)
90 * @netdev: net_device to check 97 * @netdev: net_device to check
91 * @fallback_net: return in case get_link_net is not available for @netdev 98 * @fallback_net: return in case get_link_net is not available for @netdev
92 * 99 *
@@ -105,7 +112,7 @@ static struct net *batadv_getlink_net(const struct net_device *netdev,
105} 112}
106 113
107/** 114/**
108 * batadv_mutual_parents - check if two devices are each others parent 115 * batadv_mutual_parents() - check if two devices are each others parent
109 * @dev1: 1st net dev 116 * @dev1: 1st net dev
110 * @net1: 1st devices netns 117 * @net1: 1st devices netns
111 * @dev2: 2nd net dev 118 * @dev2: 2nd net dev
@@ -138,7 +145,7 @@ static bool batadv_mutual_parents(const struct net_device *dev1,
138} 145}
139 146
140/** 147/**
141 * batadv_is_on_batman_iface - check if a device is a batman iface descendant 148 * batadv_is_on_batman_iface() - check if a device is a batman iface descendant
142 * @net_dev: the device to check 149 * @net_dev: the device to check
143 * 150 *
144 * If the user creates any virtual device on top of a batman-adv interface, it 151 * If the user creates any virtual device on top of a batman-adv interface, it
@@ -202,7 +209,7 @@ static bool batadv_is_valid_iface(const struct net_device *net_dev)
202} 209}
203 210
204/** 211/**
205 * batadv_get_real_netdevice - check if the given netdev struct is a virtual 212 * batadv_get_real_netdevice() - check if the given netdev struct is a virtual
206 * interface on top of another 'real' interface 213 * interface on top of another 'real' interface
207 * @netdev: the device to check 214 * @netdev: the device to check
208 * 215 *
@@ -246,7 +253,7 @@ out:
246} 253}
247 254
248/** 255/**
249 * batadv_get_real_netdev - check if the given net_device struct is a virtual 256 * batadv_get_real_netdev() - check if the given net_device struct is a virtual
250 * interface on top of another 'real' interface 257 * interface on top of another 'real' interface
251 * @net_device: the device to check 258 * @net_device: the device to check
252 * 259 *
@@ -265,7 +272,7 @@ struct net_device *batadv_get_real_netdev(struct net_device *net_device)
265} 272}
266 273
267/** 274/**
268 * batadv_is_wext_netdev - check if the given net_device struct is a 275 * batadv_is_wext_netdev() - check if the given net_device struct is a
269 * wext wifi interface 276 * wext wifi interface
270 * @net_device: the device to check 277 * @net_device: the device to check
271 * 278 *
@@ -289,7 +296,7 @@ static bool batadv_is_wext_netdev(struct net_device *net_device)
289} 296}
290 297
291/** 298/**
292 * batadv_is_cfg80211_netdev - check if the given net_device struct is a 299 * batadv_is_cfg80211_netdev() - check if the given net_device struct is a
293 * cfg80211 wifi interface 300 * cfg80211 wifi interface
294 * @net_device: the device to check 301 * @net_device: the device to check
295 * 302 *
@@ -309,7 +316,7 @@ static bool batadv_is_cfg80211_netdev(struct net_device *net_device)
309} 316}
310 317
311/** 318/**
312 * batadv_wifi_flags_evaluate - calculate wifi flags for net_device 319 * batadv_wifi_flags_evaluate() - calculate wifi flags for net_device
313 * @net_device: the device to check 320 * @net_device: the device to check
314 * 321 *
315 * Return: batadv_hard_iface_wifi_flags flags of the device 322 * Return: batadv_hard_iface_wifi_flags flags of the device
@@ -344,7 +351,7 @@ out:
344} 351}
345 352
346/** 353/**
347 * batadv_is_cfg80211_hardif - check if the given hardif is a cfg80211 wifi 354 * batadv_is_cfg80211_hardif() - check if the given hardif is a cfg80211 wifi
348 * interface 355 * interface
349 * @hard_iface: the device to check 356 * @hard_iface: the device to check
350 * 357 *
@@ -362,7 +369,7 @@ bool batadv_is_cfg80211_hardif(struct batadv_hard_iface *hard_iface)
362} 369}
363 370
364/** 371/**
365 * batadv_is_wifi_hardif - check if the given hardif is a wifi interface 372 * batadv_is_wifi_hardif() - check if the given hardif is a wifi interface
366 * @hard_iface: the device to check 373 * @hard_iface: the device to check
367 * 374 *
368 * Return: true if the net device is a 802.11 wireless device, false otherwise. 375 * Return: true if the net device is a 802.11 wireless device, false otherwise.
@@ -376,7 +383,7 @@ bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface)
376} 383}
377 384
378/** 385/**
379 * batadv_hardif_no_broadcast - check whether (re)broadcast is necessary 386 * batadv_hardif_no_broadcast() - check whether (re)broadcast is necessary
380 * @if_outgoing: the outgoing interface checked and considered for (re)broadcast 387 * @if_outgoing: the outgoing interface checked and considered for (re)broadcast
381 * @orig_addr: the originator of this packet 388 * @orig_addr: the originator of this packet
382 * @orig_neigh: originator address of the forwarder we just got the packet from 389 * @orig_neigh: originator address of the forwarder we just got the packet from
@@ -560,6 +567,13 @@ static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface)
560 soft_iface->needed_tailroom = lower_tailroom; 567 soft_iface->needed_tailroom = lower_tailroom;
561} 568}
562 569
570/**
571 * batadv_hardif_min_mtu() - Calculate maximum MTU for soft interface
572 * @soft_iface: netdev struct of the soft interface
573 *
574 * Return: MTU for the soft-interface (limited by the minimal MTU of all active
575 * slave interfaces)
576 */
563int batadv_hardif_min_mtu(struct net_device *soft_iface) 577int batadv_hardif_min_mtu(struct net_device *soft_iface)
564{ 578{
565 struct batadv_priv *bat_priv = netdev_priv(soft_iface); 579 struct batadv_priv *bat_priv = netdev_priv(soft_iface);
@@ -606,7 +620,11 @@ out:
606 return min_t(int, min_mtu - batadv_max_header_len(), ETH_DATA_LEN); 620 return min_t(int, min_mtu - batadv_max_header_len(), ETH_DATA_LEN);
607} 621}
608 622
609/* adjusts the MTU if a new interface with a smaller MTU appeared. */ 623/**
624 * batadv_update_min_mtu() - Adjusts the MTU if a new interface with a smaller
625 * MTU appeared
626 * @soft_iface: netdev struct of the soft interface
627 */
610void batadv_update_min_mtu(struct net_device *soft_iface) 628void batadv_update_min_mtu(struct net_device *soft_iface)
611{ 629{
612 soft_iface->mtu = batadv_hardif_min_mtu(soft_iface); 630 soft_iface->mtu = batadv_hardif_min_mtu(soft_iface);
@@ -667,7 +685,7 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
667} 685}
668 686
669/** 687/**
670 * batadv_master_del_slave - remove hard_iface from the current master interface 688 * batadv_master_del_slave() - remove hard_iface from the current master iface
671 * @slave: the interface enslaved in another master 689 * @slave: the interface enslaved in another master
672 * @master: the master from which slave has to be removed 690 * @master: the master from which slave has to be removed
673 * 691 *
@@ -691,6 +709,14 @@ static int batadv_master_del_slave(struct batadv_hard_iface *slave,
691 return ret; 709 return ret;
692} 710}
693 711
712/**
713 * batadv_hardif_enable_interface() - Enslave hard interface to soft interface
714 * @hard_iface: hard interface to add to soft interface
715 * @net: the applicable net namespace
716 * @iface_name: name of the soft interface
717 *
718 * Return: 0 on success or negative error number in case of failure
719 */
694int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, 720int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
695 struct net *net, const char *iface_name) 721 struct net *net, const char *iface_name)
696{ 722{
@@ -802,6 +828,12 @@ err:
802 return ret; 828 return ret;
803} 829}
804 830
831/**
832 * batadv_hardif_disable_interface() - Remove hard interface from soft interface
833 * @hard_iface: hard interface to be removed
834 * @autodel: whether to delete soft interface when it doesn't contain any other
835 * slave interfaces
836 */
805void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, 837void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
806 enum batadv_hard_if_cleanup autodel) 838 enum batadv_hard_if_cleanup autodel)
807{ 839{
@@ -936,6 +968,9 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface)
936 batadv_hardif_put(hard_iface); 968 batadv_hardif_put(hard_iface);
937} 969}
938 970
971/**
972 * batadv_hardif_remove_interfaces() - Remove all hard interfaces
973 */
939void batadv_hardif_remove_interfaces(void) 974void batadv_hardif_remove_interfaces(void)
940{ 975{
941 struct batadv_hard_iface *hard_iface, *hard_iface_tmp; 976 struct batadv_hard_iface *hard_iface, *hard_iface_tmp;
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 9f9890ff7a22..de5e9a374ece 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
@@ -30,36 +31,74 @@
30struct net_device; 31struct net_device;
31struct net; 32struct net;
32 33
34/**
35 * enum batadv_hard_if_state - State of a hard interface
36 */
33enum batadv_hard_if_state { 37enum batadv_hard_if_state {
38 /**
39 * @BATADV_IF_NOT_IN_USE: interface is not used as slave interface of a
40 * batman-adv soft interface
41 */
34 BATADV_IF_NOT_IN_USE, 42 BATADV_IF_NOT_IN_USE,
43
44 /**
45 * @BATADV_IF_TO_BE_REMOVED: interface will be removed from soft
46 * interface
47 */
35 BATADV_IF_TO_BE_REMOVED, 48 BATADV_IF_TO_BE_REMOVED,
49
50 /** @BATADV_IF_INACTIVE: interface is deactivated */
36 BATADV_IF_INACTIVE, 51 BATADV_IF_INACTIVE,
52
53 /** @BATADV_IF_ACTIVE: interface is used */
37 BATADV_IF_ACTIVE, 54 BATADV_IF_ACTIVE,
55
56 /** @BATADV_IF_TO_BE_ACTIVATED: interface is getting activated */
38 BATADV_IF_TO_BE_ACTIVATED, 57 BATADV_IF_TO_BE_ACTIVATED,
58
59 /**
60 * @BATADV_IF_I_WANT_YOU: interface is queued up (using sysfs) for being
61 * added as slave interface of a batman-adv soft interface
62 */
39 BATADV_IF_I_WANT_YOU, 63 BATADV_IF_I_WANT_YOU,
40}; 64};
41 65
42/** 66/**
43 * enum batadv_hard_if_bcast - broadcast avoidance options 67 * enum batadv_hard_if_bcast - broadcast avoidance options
44 * @BATADV_HARDIF_BCAST_OK: Do broadcast on according hard interface
45 * @BATADV_HARDIF_BCAST_NORECIPIENT: Broadcast not needed, there is no recipient
46 * @BATADV_HARDIF_BCAST_DUPFWD: There is just the neighbor we got it from
47 * @BATADV_HARDIF_BCAST_DUPORIG: There is just the originator
48 */ 68 */
49enum batadv_hard_if_bcast { 69enum batadv_hard_if_bcast {
70 /** @BATADV_HARDIF_BCAST_OK: Do broadcast on according hard interface */
50 BATADV_HARDIF_BCAST_OK = 0, 71 BATADV_HARDIF_BCAST_OK = 0,
72
73 /**
74 * @BATADV_HARDIF_BCAST_NORECIPIENT: Broadcast not needed, there is no
75 * recipient
76 */
51 BATADV_HARDIF_BCAST_NORECIPIENT, 77 BATADV_HARDIF_BCAST_NORECIPIENT,
78
79 /**
80 * @BATADV_HARDIF_BCAST_DUPFWD: There is just the neighbor we got it
81 * from
82 */
52 BATADV_HARDIF_BCAST_DUPFWD, 83 BATADV_HARDIF_BCAST_DUPFWD,
84
85 /** @BATADV_HARDIF_BCAST_DUPORIG: There is just the originator */
53 BATADV_HARDIF_BCAST_DUPORIG, 86 BATADV_HARDIF_BCAST_DUPORIG,
54}; 87};
55 88
56/** 89/**
57 * enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal 90 * enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal
58 * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface
59 * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was removed
60 */ 91 */
61enum batadv_hard_if_cleanup { 92enum batadv_hard_if_cleanup {
93 /**
94 * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface
95 */
62 BATADV_IF_CLEANUP_KEEP, 96 BATADV_IF_CLEANUP_KEEP,
97
98 /**
99 * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was
100 * removed
101 */
63 BATADV_IF_CLEANUP_AUTO, 102 BATADV_IF_CLEANUP_AUTO,
64}; 103};
65 104
@@ -82,7 +121,7 @@ int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing,
82 u8 *orig_addr, u8 *orig_neigh); 121 u8 *orig_addr, u8 *orig_neigh);
83 122
84/** 123/**
85 * batadv_hardif_put - decrement the hard interface refcounter and possibly 124 * batadv_hardif_put() - decrement the hard interface refcounter and possibly
86 * release it 125 * release it
87 * @hard_iface: the hard interface to free 126 * @hard_iface: the hard interface to free
88 */ 127 */
@@ -91,6 +130,12 @@ static inline void batadv_hardif_put(struct batadv_hard_iface *hard_iface)
91 kref_put(&hard_iface->refcount, batadv_hardif_release); 130 kref_put(&hard_iface->refcount, batadv_hardif_release);
92} 131}
93 132
133/**
134 * batadv_primary_if_get_selected() - Get reference to primary interface
135 * @bat_priv: the bat priv with all the soft interface information
136 *
137 * Return: primary interface (with increased refcnt), otherwise NULL
138 */
94static inline struct batadv_hard_iface * 139static inline struct batadv_hard_iface *
95batadv_primary_if_get_selected(struct batadv_priv *bat_priv) 140batadv_primary_if_get_selected(struct batadv_priv *bat_priv)
96{ 141{
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index b5f7e13918ac..04d964358c98 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Simon Wunderlich, Marek Lindner 4 * Simon Wunderlich, Marek Lindner
@@ -18,7 +19,7 @@
18#include "hash.h" 19#include "hash.h"
19#include "main.h" 20#include "main.h"
20 21
21#include <linux/fs.h> 22#include <linux/gfp.h>
22#include <linux/lockdep.h> 23#include <linux/lockdep.h>
23#include <linux/slab.h> 24#include <linux/slab.h>
24 25
@@ -33,7 +34,10 @@ static void batadv_hash_init(struct batadv_hashtable *hash)
33 } 34 }
34} 35}
35 36
36/* free only the hashtable and the hash itself. */ 37/**
38 * batadv_hash_destroy() - Free only the hashtable and the hash itself
39 * @hash: hash object to destroy
40 */
37void batadv_hash_destroy(struct batadv_hashtable *hash) 41void batadv_hash_destroy(struct batadv_hashtable *hash)
38{ 42{
39 kfree(hash->list_locks); 43 kfree(hash->list_locks);
@@ -41,7 +45,12 @@ void batadv_hash_destroy(struct batadv_hashtable *hash)
41 kfree(hash); 45 kfree(hash);
42} 46}
43 47
44/* allocates and clears the hash */ 48/**
49 * batadv_hash_new() - Allocates and clears the hashtable
50 * @size: number of hash buckets to allocate
51 *
52 * Return: newly allocated hashtable, NULL on errors
53 */
45struct batadv_hashtable *batadv_hash_new(u32 size) 54struct batadv_hashtable *batadv_hash_new(u32 size)
46{ 55{
47 struct batadv_hashtable *hash; 56 struct batadv_hashtable *hash;
@@ -70,6 +79,11 @@ free_hash:
70 return NULL; 79 return NULL;
71} 80}
72 81
82/**
83 * batadv_hash_set_lock_class() - Set specific lockdep class for hash spinlocks
84 * @hash: hash object to modify
85 * @key: lockdep class key address
86 */
73void batadv_hash_set_lock_class(struct batadv_hashtable *hash, 87void batadv_hash_set_lock_class(struct batadv_hashtable *hash,
74 struct lock_class_key *key) 88 struct lock_class_key *key)
75{ 89{
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 0c905e91c5e2..4ce1b6d3ad5c 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Simon Wunderlich, Marek Lindner 4 * Simon Wunderlich, Marek Lindner
@@ -45,10 +46,18 @@ typedef bool (*batadv_hashdata_compare_cb)(const struct hlist_node *,
45typedef u32 (*batadv_hashdata_choose_cb)(const void *, u32); 46typedef u32 (*batadv_hashdata_choose_cb)(const void *, u32);
46typedef void (*batadv_hashdata_free_cb)(struct hlist_node *, void *); 47typedef void (*batadv_hashdata_free_cb)(struct hlist_node *, void *);
47 48
49/**
50 * struct batadv_hashtable - Wrapper of simple hlist based hashtable
51 */
48struct batadv_hashtable { 52struct batadv_hashtable {
49 struct hlist_head *table; /* the hashtable itself with the buckets */ 53 /** @table: the hashtable itself with the buckets */
50 spinlock_t *list_locks; /* spinlock for each hash list entry */ 54 struct hlist_head *table;
51 u32 size; /* size of hashtable */ 55
56 /** @list_locks: spinlock for each hash list entry */
57 spinlock_t *list_locks;
58
59 /** @size: size of hashtable */
60 u32 size;
52}; 61};
53 62
54/* allocates and clears the hash */ 63/* allocates and clears the hash */
@@ -62,7 +71,7 @@ void batadv_hash_set_lock_class(struct batadv_hashtable *hash,
62void batadv_hash_destroy(struct batadv_hashtable *hash); 71void batadv_hash_destroy(struct batadv_hashtable *hash);
63 72
64/** 73/**
65 * batadv_hash_add - adds data to the hashtable 74 * batadv_hash_add() - adds data to the hashtable
66 * @hash: storage hash table 75 * @hash: storage hash table
67 * @compare: callback to determine if 2 hash elements are identical 76 * @compare: callback to determine if 2 hash elements are identical
68 * @choose: callback calculating the hash index 77 * @choose: callback calculating the hash index
@@ -112,8 +121,15 @@ out:
112 return ret; 121 return ret;
113} 122}
114 123
115/* removes data from hash, if found. data could be the structure you use with 124/**
116 * just the key filled, we just need the key for comparing. 125 * batadv_hash_remove() - Removes data from hash, if found
126 * @hash: hash table
127 * @compare: callback to determine if 2 hash elements are identical
128 * @choose: callback calculating the hash index
129 * @data: data passed to the aforementioned callbacks as argument
130 *
131 * ata could be the structure you use with just the key filled, we just need
132 * the key for comparing.
117 * 133 *
118 * Return: returns pointer do data on success, so you can remove the used 134 * Return: returns pointer do data on success, so you can remove the used
119 * structure yourself, or NULL on error 135 * structure yourself, or NULL on error
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index bded31121d12..e91f29c7c638 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner 4 * Marek Lindner
@@ -26,6 +27,7 @@
26#include <linux/export.h> 27#include <linux/export.h>
27#include <linux/fcntl.h> 28#include <linux/fcntl.h>
28#include <linux/fs.h> 29#include <linux/fs.h>
30#include <linux/gfp.h>
29#include <linux/if_ether.h> 31#include <linux/if_ether.h>
30#include <linux/kernel.h> 32#include <linux/kernel.h>
31#include <linux/list.h> 33#include <linux/list.h>
@@ -42,11 +44,11 @@
42#include <linux/string.h> 44#include <linux/string.h>
43#include <linux/uaccess.h> 45#include <linux/uaccess.h>
44#include <linux/wait.h> 46#include <linux/wait.h>
47#include <uapi/linux/batadv_packet.h>
45 48
46#include "hard-interface.h" 49#include "hard-interface.h"
47#include "log.h" 50#include "log.h"
48#include "originator.h" 51#include "originator.h"
49#include "packet.h"
50#include "send.h" 52#include "send.h"
51 53
52static struct batadv_socket_client *batadv_socket_client_hash[256]; 54static struct batadv_socket_client *batadv_socket_client_hash[256];
@@ -55,6 +57,9 @@ static void batadv_socket_add_packet(struct batadv_socket_client *socket_client,
55 struct batadv_icmp_header *icmph, 57 struct batadv_icmp_header *icmph,
56 size_t icmp_len); 58 size_t icmp_len);
57 59
60/**
61 * batadv_socket_init() - Initialize soft interface independent socket data
62 */
58void batadv_socket_init(void) 63void batadv_socket_init(void)
59{ 64{
60 memset(batadv_socket_client_hash, 0, sizeof(batadv_socket_client_hash)); 65 memset(batadv_socket_client_hash, 0, sizeof(batadv_socket_client_hash));
@@ -292,14 +297,14 @@ out:
292 return len; 297 return len;
293} 298}
294 299
295static unsigned int batadv_socket_poll(struct file *file, poll_table *wait) 300static __poll_t batadv_socket_poll(struct file *file, poll_table *wait)
296{ 301{
297 struct batadv_socket_client *socket_client = file->private_data; 302 struct batadv_socket_client *socket_client = file->private_data;
298 303
299 poll_wait(file, &socket_client->queue_wait, wait); 304 poll_wait(file, &socket_client->queue_wait, wait);
300 305
301 if (socket_client->queue_len > 0) 306 if (socket_client->queue_len > 0)
302 return POLLIN | POLLRDNORM; 307 return EPOLLIN | EPOLLRDNORM;
303 308
304 return 0; 309 return 0;
305} 310}
@@ -314,6 +319,12 @@ static const struct file_operations batadv_fops = {
314 .llseek = no_llseek, 319 .llseek = no_llseek,
315}; 320};
316 321
322/**
323 * batadv_socket_setup() - Create debugfs "socket" file
324 * @bat_priv: the bat priv with all the soft interface information
325 *
326 * Return: 0 on success or negative error number in case of failure
327 */
317int batadv_socket_setup(struct batadv_priv *bat_priv) 328int batadv_socket_setup(struct batadv_priv *bat_priv)
318{ 329{
319 struct dentry *d; 330 struct dentry *d;
@@ -333,7 +344,7 @@ err:
333} 344}
334 345
335/** 346/**
336 * batadv_socket_add_packet - schedule an icmp packet to be sent to 347 * batadv_socket_add_packet() - schedule an icmp packet to be sent to
337 * userspace on an icmp socket. 348 * userspace on an icmp socket.
338 * @socket_client: the socket this packet belongs to 349 * @socket_client: the socket this packet belongs to
339 * @icmph: pointer to the header of the icmp packet 350 * @icmph: pointer to the header of the icmp packet
@@ -390,7 +401,7 @@ static void batadv_socket_add_packet(struct batadv_socket_client *socket_client,
390} 401}
391 402
392/** 403/**
393 * batadv_socket_receive_packet - schedule an icmp packet to be received 404 * batadv_socket_receive_packet() - schedule an icmp packet to be received
394 * locally and sent to userspace. 405 * locally and sent to userspace.
395 * @icmph: pointer to the header of the icmp packet 406 * @icmph: pointer to the header of the icmp packet
396 * @icmp_len: total length of the icmp packet 407 * @icmp_len: total length of the icmp packet
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index f3fec40aae86..84cddd01eeab 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner 4 * Marek Lindner
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index 4ef4bde2cc2d..dc9fa37ddd14 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner 4 * Marek Lindner
@@ -24,6 +25,7 @@
24#include <linux/export.h> 25#include <linux/export.h>
25#include <linux/fcntl.h> 26#include <linux/fcntl.h>
26#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/gfp.h>
27#include <linux/jiffies.h> 29#include <linux/jiffies.h>
28#include <linux/kernel.h> 30#include <linux/kernel.h>
29#include <linux/module.h> 31#include <linux/module.h>
@@ -86,6 +88,13 @@ static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log,
86 return 0; 88 return 0;
87} 89}
88 90
91/**
92 * batadv_debug_log() - Add debug log entry
93 * @bat_priv: the bat priv with all the soft interface information
94 * @fmt: format string
95 *
96 * Return: 0 on success or negative error number in case of failure
97 */
89int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) 98int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
90{ 99{
91 va_list args; 100 va_list args;
@@ -176,7 +185,7 @@ static ssize_t batadv_log_read(struct file *file, char __user *buf,
176 return error; 185 return error;
177} 186}
178 187
179static unsigned int batadv_log_poll(struct file *file, poll_table *wait) 188static __poll_t batadv_log_poll(struct file *file, poll_table *wait)
180{ 189{
181 struct batadv_priv *bat_priv = file->private_data; 190 struct batadv_priv *bat_priv = file->private_data;
182 struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; 191 struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
@@ -184,7 +193,7 @@ static unsigned int batadv_log_poll(struct file *file, poll_table *wait)
184 poll_wait(file, &debug_log->queue_wait, wait); 193 poll_wait(file, &debug_log->queue_wait, wait);
185 194
186 if (!batadv_log_empty(debug_log)) 195 if (!batadv_log_empty(debug_log))
187 return POLLIN | POLLRDNORM; 196 return EPOLLIN | EPOLLRDNORM;
188 197
189 return 0; 198 return 0;
190} 199}
@@ -197,6 +206,12 @@ static const struct file_operations batadv_log_fops = {
197 .llseek = no_llseek, 206 .llseek = no_llseek,
198}; 207};
199 208
209/**
210 * batadv_debug_log_setup() - Initialize debug log
211 * @bat_priv: the bat priv with all the soft interface information
212 *
213 * Return: 0 on success or negative error number in case of failure
214 */
200int batadv_debug_log_setup(struct batadv_priv *bat_priv) 215int batadv_debug_log_setup(struct batadv_priv *bat_priv)
201{ 216{
202 struct dentry *d; 217 struct dentry *d;
@@ -222,6 +237,10 @@ err:
222 return -ENOMEM; 237 return -ENOMEM;
223} 238}
224 239
240/**
241 * batadv_debug_log_cleanup() - Destroy debug log
242 * @bat_priv: the bat priv with all the soft interface information
243 */
225void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) 244void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
226{ 245{
227 kfree(bat_priv->debug_log); 246 kfree(bat_priv->debug_log);
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index 65ce97efa6b5..35e02b2b9e72 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
@@ -44,25 +45,33 @@ static inline void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
44 45
45/** 46/**
46 * enum batadv_dbg_level - available log levels 47 * enum batadv_dbg_level - available log levels
47 * @BATADV_DBG_BATMAN: OGM and TQ computations related messages
48 * @BATADV_DBG_ROUTES: route added / changed / deleted
49 * @BATADV_DBG_TT: translation table messages
50 * @BATADV_DBG_BLA: bridge loop avoidance messages
51 * @BATADV_DBG_DAT: ARP snooping and DAT related messages
52 * @BATADV_DBG_NC: network coding related messages
53 * @BATADV_DBG_MCAST: multicast related messages
54 * @BATADV_DBG_TP_METER: throughput meter messages
55 * @BATADV_DBG_ALL: the union of all the above log levels
56 */ 48 */
57enum batadv_dbg_level { 49enum batadv_dbg_level {
50 /** @BATADV_DBG_BATMAN: OGM and TQ computations related messages */
58 BATADV_DBG_BATMAN = BIT(0), 51 BATADV_DBG_BATMAN = BIT(0),
52
53 /** @BATADV_DBG_ROUTES: route added / changed / deleted */
59 BATADV_DBG_ROUTES = BIT(1), 54 BATADV_DBG_ROUTES = BIT(1),
55
56 /** @BATADV_DBG_TT: translation table messages */
60 BATADV_DBG_TT = BIT(2), 57 BATADV_DBG_TT = BIT(2),
58
59 /** @BATADV_DBG_BLA: bridge loop avoidance messages */
61 BATADV_DBG_BLA = BIT(3), 60 BATADV_DBG_BLA = BIT(3),
61
62 /** @BATADV_DBG_DAT: ARP snooping and DAT related messages */
62 BATADV_DBG_DAT = BIT(4), 63 BATADV_DBG_DAT = BIT(4),
64
65 /** @BATADV_DBG_NC: network coding related messages */
63 BATADV_DBG_NC = BIT(5), 66 BATADV_DBG_NC = BIT(5),
67
68 /** @BATADV_DBG_MCAST: multicast related messages */
64 BATADV_DBG_MCAST = BIT(6), 69 BATADV_DBG_MCAST = BIT(6),
70
71 /** @BATADV_DBG_TP_METER: throughput meter messages */
65 BATADV_DBG_TP_METER = BIT(7), 72 BATADV_DBG_TP_METER = BIT(7),
73
74 /** @BATADV_DBG_ALL: the union of all the above log levels */
66 BATADV_DBG_ALL = 255, 75 BATADV_DBG_ALL = 255,
67}; 76};
68 77
@@ -70,7 +79,14 @@ enum batadv_dbg_level {
70int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) 79int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
71__printf(2, 3); 80__printf(2, 3);
72 81
73/* possibly ratelimited debug output */ 82/**
83 * _batadv_dbg() - Store debug output with(out) ratelimiting
84 * @type: type of debug message
85 * @bat_priv: the bat priv with all the soft interface information
86 * @ratelimited: whether output should be rate limited
87 * @fmt: format string
88 * @arg...: variable arguments
89 */
74#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \ 90#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \
75 do { \ 91 do { \
76 struct batadv_priv *__batpriv = (bat_priv); \ 92 struct batadv_priv *__batpriv = (bat_priv); \
@@ -89,11 +105,30 @@ static inline void _batadv_dbg(int type __always_unused,
89} 105}
90#endif 106#endif
91 107
108/**
109 * batadv_dbg() - Store debug output without ratelimiting
110 * @type: type of debug message
111 * @bat_priv: the bat priv with all the soft interface information
112 * @arg...: format string and variable arguments
113 */
92#define batadv_dbg(type, bat_priv, arg...) \ 114#define batadv_dbg(type, bat_priv, arg...) \
93 _batadv_dbg(type, bat_priv, 0, ## arg) 115 _batadv_dbg(type, bat_priv, 0, ## arg)
116
117/**
118 * batadv_dbg_ratelimited() - Store debug output with ratelimiting
119 * @type: type of debug message
120 * @bat_priv: the bat priv with all the soft interface information
121 * @arg...: format string and variable arguments
122 */
94#define batadv_dbg_ratelimited(type, bat_priv, arg...) \ 123#define batadv_dbg_ratelimited(type, bat_priv, arg...) \
95 _batadv_dbg(type, bat_priv, 1, ## arg) 124 _batadv_dbg(type, bat_priv, 1, ## arg)
96 125
126/**
127 * batadv_info() - Store message in debug buffer and print it to kmsg buffer
128 * @net_dev: the soft interface net device
129 * @fmt: format string
130 * @arg...: variable arguments
131 */
97#define batadv_info(net_dev, fmt, arg...) \ 132#define batadv_info(net_dev, fmt, arg...) \
98 do { \ 133 do { \
99 struct net_device *_netdev = (net_dev); \ 134 struct net_device *_netdev = (net_dev); \
@@ -101,6 +136,13 @@ static inline void _batadv_dbg(int type __always_unused,
101 batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \ 136 batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \
102 pr_info("%s: " fmt, _netdev->name, ## arg); \ 137 pr_info("%s: " fmt, _netdev->name, ## arg); \
103 } while (0) 138 } while (0)
139
140/**
141 * batadv_err() - Store error in debug buffer and print it to kmsg buffer
142 * @net_dev: the soft interface net device
143 * @fmt: format string
144 * @arg...: variable arguments
145 */
104#define batadv_err(net_dev, fmt, arg...) \ 146#define batadv_err(net_dev, fmt, arg...) \
105 do { \ 147 do { \
106 struct net_device *_netdev = (net_dev); \ 148 struct net_device *_netdev = (net_dev); \
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 4daed7ad46f2..d31c8266e244 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
@@ -18,12 +19,12 @@
18#include "main.h" 19#include "main.h"
19 20
20#include <linux/atomic.h> 21#include <linux/atomic.h>
21#include <linux/bug.h> 22#include <linux/build_bug.h>
22#include <linux/byteorder/generic.h> 23#include <linux/byteorder/generic.h>
23#include <linux/crc32c.h> 24#include <linux/crc32c.h>
24#include <linux/errno.h> 25#include <linux/errno.h>
25#include <linux/fs.h>
26#include <linux/genetlink.h> 26#include <linux/genetlink.h>
27#include <linux/gfp.h>
27#include <linux/if_ether.h> 28#include <linux/if_ether.h>
28#include <linux/if_vlan.h> 29#include <linux/if_vlan.h>
29#include <linux/init.h> 30#include <linux/init.h>
@@ -45,6 +46,7 @@
45#include <linux/workqueue.h> 46#include <linux/workqueue.h>
46#include <net/dsfield.h> 47#include <net/dsfield.h>
47#include <net/rtnetlink.h> 48#include <net/rtnetlink.h>
49#include <uapi/linux/batadv_packet.h>
48#include <uapi/linux/batman_adv.h> 50#include <uapi/linux/batman_adv.h>
49 51
50#include "bat_algo.h" 52#include "bat_algo.h"
@@ -62,7 +64,6 @@
62#include "netlink.h" 64#include "netlink.h"
63#include "network-coding.h" 65#include "network-coding.h"
64#include "originator.h" 66#include "originator.h"
65#include "packet.h"
66#include "routing.h" 67#include "routing.h"
67#include "send.h" 68#include "send.h"
68#include "soft-interface.h" 69#include "soft-interface.h"
@@ -139,6 +140,12 @@ static void __exit batadv_exit(void)
139 batadv_tt_cache_destroy(); 140 batadv_tt_cache_destroy();
140} 141}
141 142
143/**
144 * batadv_mesh_init() - Initialize soft interface
145 * @soft_iface: netdev struct of the soft interface
146 *
147 * Return: 0 on success or negative error number in case of failure
148 */
142int batadv_mesh_init(struct net_device *soft_iface) 149int batadv_mesh_init(struct net_device *soft_iface)
143{ 150{
144 struct batadv_priv *bat_priv = netdev_priv(soft_iface); 151 struct batadv_priv *bat_priv = netdev_priv(soft_iface);
@@ -216,6 +223,10 @@ err:
216 return ret; 223 return ret;
217} 224}
218 225
226/**
227 * batadv_mesh_free() - Deinitialize soft interface
228 * @soft_iface: netdev struct of the soft interface
229 */
219void batadv_mesh_free(struct net_device *soft_iface) 230void batadv_mesh_free(struct net_device *soft_iface)
220{ 231{
221 struct batadv_priv *bat_priv = netdev_priv(soft_iface); 232 struct batadv_priv *bat_priv = netdev_priv(soft_iface);
@@ -255,8 +266,8 @@ void batadv_mesh_free(struct net_device *soft_iface)
255} 266}
256 267
257/** 268/**
258 * batadv_is_my_mac - check if the given mac address belongs to any of the real 269 * batadv_is_my_mac() - check if the given mac address belongs to any of the
259 * interfaces in the current mesh 270 * real interfaces in the current mesh
260 * @bat_priv: the bat priv with all the soft interface information 271 * @bat_priv: the bat priv with all the soft interface information
261 * @addr: the address to check 272 * @addr: the address to check
262 * 273 *
@@ -286,7 +297,7 @@ bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr)
286 297
287#ifdef CONFIG_BATMAN_ADV_DEBUGFS 298#ifdef CONFIG_BATMAN_ADV_DEBUGFS
288/** 299/**
289 * batadv_seq_print_text_primary_if_get - called from debugfs table printing 300 * batadv_seq_print_text_primary_if_get() - called from debugfs table printing
290 * function that requires the primary interface 301 * function that requires the primary interface
291 * @seq: debugfs table seq_file struct 302 * @seq: debugfs table seq_file struct
292 * 303 *
@@ -323,7 +334,7 @@ out:
323#endif 334#endif
324 335
325/** 336/**
326 * batadv_max_header_len - calculate maximum encapsulation overhead for a 337 * batadv_max_header_len() - calculate maximum encapsulation overhead for a
327 * payload packet 338 * payload packet
328 * 339 *
329 * Return: the maximum encapsulation overhead in bytes. 340 * Return: the maximum encapsulation overhead in bytes.
@@ -348,7 +359,7 @@ int batadv_max_header_len(void)
348} 359}
349 360
350/** 361/**
351 * batadv_skb_set_priority - sets skb priority according to packet content 362 * batadv_skb_set_priority() - sets skb priority according to packet content
352 * @skb: the packet to be sent 363 * @skb: the packet to be sent
353 * @offset: offset to the packet content 364 * @offset: offset to the packet content
354 * 365 *
@@ -412,6 +423,16 @@ static int batadv_recv_unhandled_packet(struct sk_buff *skb,
412/* incoming packets with the batman ethertype received on any active hard 423/* incoming packets with the batman ethertype received on any active hard
413 * interface 424 * interface
414 */ 425 */
426
427/**
428 * batadv_batman_skb_recv() - Handle incoming message from an hard interface
429 * @skb: the received packet
430 * @dev: the net device that the packet was received on
431 * @ptype: packet type of incoming packet (ETH_P_BATMAN)
432 * @orig_dev: the original receive net device (e.g. bonded device)
433 *
434 * Return: NET_RX_SUCCESS on success or NET_RX_DROP in case of failure
435 */
415int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, 436int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
416 struct packet_type *ptype, 437 struct packet_type *ptype,
417 struct net_device *orig_dev) 438 struct net_device *orig_dev)
@@ -535,6 +556,13 @@ static void batadv_recv_handler_init(void)
535 batadv_rx_handler[BATADV_UNICAST_FRAG] = batadv_recv_frag_packet; 556 batadv_rx_handler[BATADV_UNICAST_FRAG] = batadv_recv_frag_packet;
536} 557}
537 558
559/**
560 * batadv_recv_handler_register() - Register handler for batman-adv packet type
561 * @packet_type: batadv_packettype which should be handled
562 * @recv_handler: receive handler for the packet type
563 *
564 * Return: 0 on success or negative error number in case of failure
565 */
538int 566int
539batadv_recv_handler_register(u8 packet_type, 567batadv_recv_handler_register(u8 packet_type,
540 int (*recv_handler)(struct sk_buff *, 568 int (*recv_handler)(struct sk_buff *,
@@ -552,13 +580,17 @@ batadv_recv_handler_register(u8 packet_type,
552 return 0; 580 return 0;
553} 581}
554 582
583/**
584 * batadv_recv_handler_unregister() - Unregister handler for packet type
585 * @packet_type: batadv_packettype which should no longer be handled
586 */
555void batadv_recv_handler_unregister(u8 packet_type) 587void batadv_recv_handler_unregister(u8 packet_type)
556{ 588{
557 batadv_rx_handler[packet_type] = batadv_recv_unhandled_packet; 589 batadv_rx_handler[packet_type] = batadv_recv_unhandled_packet;
558} 590}
559 591
560/** 592/**
561 * batadv_skb_crc32 - calculate CRC32 of the whole packet and skip bytes in 593 * batadv_skb_crc32() - calculate CRC32 of the whole packet and skip bytes in
562 * the header 594 * the header
563 * @skb: skb pointing to fragmented socket buffers 595 * @skb: skb pointing to fragmented socket buffers
564 * @payload_ptr: Pointer to position inside the head buffer of the skb 596 * @payload_ptr: Pointer to position inside the head buffer of the skb
@@ -591,7 +623,7 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr)
591} 623}
592 624
593/** 625/**
594 * batadv_get_vid - extract the VLAN identifier from skb if any 626 * batadv_get_vid() - extract the VLAN identifier from skb if any
595 * @skb: the buffer containing the packet 627 * @skb: the buffer containing the packet
596 * @header_len: length of the batman header preceding the ethernet header 628 * @header_len: length of the batman header preceding the ethernet header
597 * 629 *
@@ -618,7 +650,7 @@ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len)
618} 650}
619 651
620/** 652/**
621 * batadv_vlan_ap_isola_get - return the AP isolation status for the given vlan 653 * batadv_vlan_ap_isola_get() - return AP isolation status for the given vlan
622 * @bat_priv: the bat priv with all the soft interface information 654 * @bat_priv: the bat priv with all the soft interface information
623 * @vid: the VLAN identifier for which the AP isolation attributed as to be 655 * @vid: the VLAN identifier for which the AP isolation attributed as to be
624 * looked up 656 * looked up
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index edb2f239d04d..f7ba3f96d8f3 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
@@ -24,7 +25,7 @@
24#define BATADV_DRIVER_DEVICE "batman-adv" 25#define BATADV_DRIVER_DEVICE "batman-adv"
25 26
26#ifndef BATADV_SOURCE_VERSION 27#ifndef BATADV_SOURCE_VERSION
27#define BATADV_SOURCE_VERSION "2017.4" 28#define BATADV_SOURCE_VERSION "2018.0"
28#endif 29#endif
29 30
30/* B.A.T.M.A.N. parameters */ 31/* B.A.T.M.A.N. parameters */
@@ -140,24 +141,56 @@
140 */ 141 */
141#define BATADV_TP_MAX_NUM 5 142#define BATADV_TP_MAX_NUM 5
142 143
144/**
145 * enum batadv_mesh_state - State of a soft interface
146 */
143enum batadv_mesh_state { 147enum batadv_mesh_state {
148 /** @BATADV_MESH_INACTIVE: soft interface is not yet running */
144 BATADV_MESH_INACTIVE, 149 BATADV_MESH_INACTIVE,
150
151 /** @BATADV_MESH_ACTIVE: interface is up and running */
145 BATADV_MESH_ACTIVE, 152 BATADV_MESH_ACTIVE,
153
154 /** @BATADV_MESH_DEACTIVATING: interface is getting shut down */
146 BATADV_MESH_DEACTIVATING, 155 BATADV_MESH_DEACTIVATING,
147}; 156};
148 157
149#define BATADV_BCAST_QUEUE_LEN 256 158#define BATADV_BCAST_QUEUE_LEN 256
150#define BATADV_BATMAN_QUEUE_LEN 256 159#define BATADV_BATMAN_QUEUE_LEN 256
151 160
161/**
162 * enum batadv_uev_action - action type of uevent
163 */
152enum batadv_uev_action { 164enum batadv_uev_action {
165 /** @BATADV_UEV_ADD: gateway was selected (after none was selected) */
153 BATADV_UEV_ADD = 0, 166 BATADV_UEV_ADD = 0,
167
168 /**
169 * @BATADV_UEV_DEL: selected gateway was removed and none is selected
170 * anymore
171 */
154 BATADV_UEV_DEL, 172 BATADV_UEV_DEL,
173
174 /**
175 * @BATADV_UEV_CHANGE: a different gateway was selected as based gateway
176 */
155 BATADV_UEV_CHANGE, 177 BATADV_UEV_CHANGE,
178
179 /**
180 * @BATADV_UEV_LOOPDETECT: loop was detected which cannot be handled by
181 * bridge loop avoidance
182 */
156 BATADV_UEV_LOOPDETECT, 183 BATADV_UEV_LOOPDETECT,
157}; 184};
158 185
186/**
187 * enum batadv_uev_type - Type of uevent
188 */
159enum batadv_uev_type { 189enum batadv_uev_type {
190 /** @BATADV_UEV_GW: selected gateway was modified */
160 BATADV_UEV_GW = 0, 191 BATADV_UEV_GW = 0,
192
193 /** @BATADV_UEV_BLA: bridge loop avoidance event */
161 BATADV_UEV_BLA, 194 BATADV_UEV_BLA,
162}; 195};
163 196
@@ -184,16 +217,14 @@ enum batadv_uev_type {
184 217
185/* Kernel headers */ 218/* Kernel headers */
186 219
187#include <linux/bitops.h> /* for packet.h */
188#include <linux/compiler.h> 220#include <linux/compiler.h>
189#include <linux/etherdevice.h> 221#include <linux/etherdevice.h>
190#include <linux/if_ether.h> /* for packet.h */
191#include <linux/if_vlan.h> 222#include <linux/if_vlan.h>
192#include <linux/jiffies.h> 223#include <linux/jiffies.h>
193#include <linux/percpu.h> 224#include <linux/percpu.h>
194#include <linux/types.h> 225#include <linux/types.h>
226#include <uapi/linux/batadv_packet.h>
195 227
196#include "packet.h"
197#include "types.h" 228#include "types.h"
198 229
199struct net_device; 230struct net_device;
@@ -202,7 +233,7 @@ struct seq_file;
202struct sk_buff; 233struct sk_buff;
203 234
204/** 235/**
205 * batadv_print_vid - return printable version of vid information 236 * batadv_print_vid() - return printable version of vid information
206 * @vid: the VLAN identifier 237 * @vid: the VLAN identifier
207 * 238 *
208 * Return: -1 when no VLAN is used, VLAN id otherwise 239 * Return: -1 when no VLAN is used, VLAN id otherwise
@@ -238,7 +269,7 @@ void batadv_recv_handler_unregister(u8 packet_type);
238__be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr); 269__be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr);
239 270
240/** 271/**
241 * batadv_compare_eth - Compare two not u16 aligned Ethernet addresses 272 * batadv_compare_eth() - Compare two not u16 aligned Ethernet addresses
242 * @data1: Pointer to a six-byte array containing the Ethernet address 273 * @data1: Pointer to a six-byte array containing the Ethernet address
243 * @data2: Pointer other six-byte array containing the Ethernet address 274 * @data2: Pointer other six-byte array containing the Ethernet address
244 * 275 *
@@ -252,7 +283,7 @@ static inline bool batadv_compare_eth(const void *data1, const void *data2)
252} 283}
253 284
254/** 285/**
255 * batadv_has_timed_out - compares current time (jiffies) and timestamp + 286 * batadv_has_timed_out() - compares current time (jiffies) and timestamp +
256 * timeout 287 * timeout
257 * @timestamp: base value to compare with (in jiffies) 288 * @timestamp: base value to compare with (in jiffies)
258 * @timeout: added to base value before comparing (in milliseconds) 289 * @timeout: added to base value before comparing (in milliseconds)
@@ -265,40 +296,96 @@ static inline bool batadv_has_timed_out(unsigned long timestamp,
265 return time_is_before_jiffies(timestamp + msecs_to_jiffies(timeout)); 296 return time_is_before_jiffies(timestamp + msecs_to_jiffies(timeout));
266} 297}
267 298
299/**
300 * batadv_atomic_dec_not_zero() - Decrease unless the number is 0
301 * @v: pointer of type atomic_t
302 *
303 * Return: non-zero if v was not 0, and zero otherwise.
304 */
268#define batadv_atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0) 305#define batadv_atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0)
269 306
270/* Returns the smallest signed integer in two's complement with the sizeof x */ 307/**
308 * batadv_smallest_signed_int() - Returns the smallest signed integer in two's
309 * complement with the sizeof x
310 * @x: type of integer
311 *
312 * Return: smallest signed integer of type
313 */
271#define batadv_smallest_signed_int(x) (1u << (7u + 8u * (sizeof(x) - 1u))) 314#define batadv_smallest_signed_int(x) (1u << (7u + 8u * (sizeof(x) - 1u)))
272 315
273/* Checks if a sequence number x is a predecessor/successor of y. 316/**
274 * they handle overflows/underflows and can correctly check for a 317 * batadv_seq_before() - Checks if a sequence number x is a predecessor of y
275 * predecessor/successor unless the variable sequence number has grown by 318 * @x: potential predecessor of @y
276 * more then 2**(bitwidth(x)-1)-1. 319 * @y: value to compare @x against
320 *
321 * It handles overflows/underflows and can correctly check for a predecessor
322 * unless the variable sequence number has grown by more then
323 * 2**(bitwidth(x)-1)-1.
324 *
277 * This means that for a u8 with the maximum value 255, it would think: 325 * This means that for a u8 with the maximum value 255, it would think:
278 * - when adding nothing - it is neither a predecessor nor a successor 326 *
279 * - before adding more than 127 to the starting value - it is a predecessor, 327 * * when adding nothing - it is neither a predecessor nor a successor
280 * - when adding 128 - it is neither a predecessor nor a successor, 328 * * before adding more than 127 to the starting value - it is a predecessor,
281 * - after adding more than 127 to the starting value - it is a successor 329 * * when adding 128 - it is neither a predecessor nor a successor,
330 * * after adding more than 127 to the starting value - it is a successor
331 *
332 * Return: true when x is a predecessor of y, false otherwise
282 */ 333 */
283#define batadv_seq_before(x, y) ({typeof(x)_d1 = (x); \ 334#define batadv_seq_before(x, y) ({typeof(x)_d1 = (x); \
284 typeof(y)_d2 = (y); \ 335 typeof(y)_d2 = (y); \
285 typeof(x)_dummy = (_d1 - _d2); \ 336 typeof(x)_dummy = (_d1 - _d2); \
286 (void)(&_d1 == &_d2); \ 337 (void)(&_d1 == &_d2); \
287 _dummy > batadv_smallest_signed_int(_dummy); }) 338 _dummy > batadv_smallest_signed_int(_dummy); })
339
340/**
341 * batadv_seq_after() - Checks if a sequence number x is a successor of y
342 * @x: potential sucessor of @y
343 * @y: value to compare @x against
344 *
345 * It handles overflows/underflows and can correctly check for a successor
346 * unless the variable sequence number has grown by more then
347 * 2**(bitwidth(x)-1)-1.
348 *
349 * This means that for a u8 with the maximum value 255, it would think:
350 *
351 * * when adding nothing - it is neither a predecessor nor a successor
352 * * before adding more than 127 to the starting value - it is a predecessor,
353 * * when adding 128 - it is neither a predecessor nor a successor,
354 * * after adding more than 127 to the starting value - it is a successor
355 *
356 * Return: true when x is a successor of y, false otherwise
357 */
288#define batadv_seq_after(x, y) batadv_seq_before(y, x) 358#define batadv_seq_after(x, y) batadv_seq_before(y, x)
289 359
290/* Stop preemption on local cpu while incrementing the counter */ 360/**
361 * batadv_add_counter() - Add to per cpu statistics counter of soft interface
362 * @bat_priv: the bat priv with all the soft interface information
363 * @idx: counter index which should be modified
364 * @count: value to increase counter by
365 *
366 * Stop preemption on local cpu while incrementing the counter
367 */
291static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx, 368static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx,
292 size_t count) 369 size_t count)
293{ 370{
294 this_cpu_add(bat_priv->bat_counters[idx], count); 371 this_cpu_add(bat_priv->bat_counters[idx], count);
295} 372}
296 373
374/**
375 * batadv_inc_counter() - Increase per cpu statistics counter of soft interface
376 * @b: the bat priv with all the soft interface information
377 * @i: counter index which should be modified
378 */
297#define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1) 379#define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1)
298 380
299/* Define a macro to reach the control buffer of the skb. The members of the 381/**
300 * control buffer are defined in struct batadv_skb_cb in types.h. 382 * BATADV_SKB_CB() - Get batadv_skb_cb from skb control buffer
301 * The macro is inspired by the similar macro TCP_SKB_CB() in tcp.h. 383 * @__skb: skb holding the control buffer
384 *
385 * The members of the control buffer are defined in struct batadv_skb_cb in
386 * types.h. The macro is inspired by the similar macro TCP_SKB_CB() in tcp.h.
387 *
388 * Return: pointer to the batadv_skb_cb of the skb
302 */ 389 */
303#define BATADV_SKB_CB(__skb) ((struct batadv_skb_cb *)&((__skb)->cb[0])) 390#define BATADV_SKB_CB(__skb) ((struct batadv_skb_cb *)&((__skb)->cb[0]))
304 391
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index e553a8770a89..cbdeb47ec3f6 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2014-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2014-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Linus Lüssing 4 * Linus Lüssing
@@ -24,7 +25,7 @@
24#include <linux/byteorder/generic.h> 25#include <linux/byteorder/generic.h>
25#include <linux/errno.h> 26#include <linux/errno.h>
26#include <linux/etherdevice.h> 27#include <linux/etherdevice.h>
27#include <linux/fs.h> 28#include <linux/gfp.h>
28#include <linux/icmpv6.h> 29#include <linux/icmpv6.h>
29#include <linux/if_bridge.h> 30#include <linux/if_bridge.h>
30#include <linux/if_ether.h> 31#include <linux/if_ether.h>
@@ -54,18 +55,18 @@
54#include <net/if_inet6.h> 55#include <net/if_inet6.h>
55#include <net/ip.h> 56#include <net/ip.h>
56#include <net/ipv6.h> 57#include <net/ipv6.h>
58#include <uapi/linux/batadv_packet.h>
57 59
58#include "hard-interface.h" 60#include "hard-interface.h"
59#include "hash.h" 61#include "hash.h"
60#include "log.h" 62#include "log.h"
61#include "packet.h"
62#include "translation-table.h" 63#include "translation-table.h"
63#include "tvlv.h" 64#include "tvlv.h"
64 65
65static void batadv_mcast_mla_update(struct work_struct *work); 66static void batadv_mcast_mla_update(struct work_struct *work);
66 67
67/** 68/**
68 * batadv_mcast_start_timer - schedule the multicast periodic worker 69 * batadv_mcast_start_timer() - schedule the multicast periodic worker
69 * @bat_priv: the bat priv with all the soft interface information 70 * @bat_priv: the bat priv with all the soft interface information
70 */ 71 */
71static void batadv_mcast_start_timer(struct batadv_priv *bat_priv) 72static void batadv_mcast_start_timer(struct batadv_priv *bat_priv)
@@ -75,7 +76,7 @@ static void batadv_mcast_start_timer(struct batadv_priv *bat_priv)
75} 76}
76 77
77/** 78/**
78 * batadv_mcast_get_bridge - get the bridge on top of the softif if it exists 79 * batadv_mcast_get_bridge() - get the bridge on top of the softif if it exists
79 * @soft_iface: netdev struct of the mesh interface 80 * @soft_iface: netdev struct of the mesh interface
80 * 81 *
81 * If the given soft interface has a bridge on top then the refcount 82 * If the given soft interface has a bridge on top then the refcount
@@ -101,7 +102,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
101} 102}
102 103
103/** 104/**
104 * batadv_mcast_mla_softif_get - get softif multicast listeners 105 * batadv_mcast_mla_softif_get() - get softif multicast listeners
105 * @dev: the device to collect multicast addresses from 106 * @dev: the device to collect multicast addresses from
106 * @mcast_list: a list to put found addresses into 107 * @mcast_list: a list to put found addresses into
107 * 108 *
@@ -147,7 +148,7 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev,
147} 148}
148 149
149/** 150/**
150 * batadv_mcast_mla_is_duplicate - check whether an address is in a list 151 * batadv_mcast_mla_is_duplicate() - check whether an address is in a list
151 * @mcast_addr: the multicast address to check 152 * @mcast_addr: the multicast address to check
152 * @mcast_list: the list with multicast addresses to search in 153 * @mcast_list: the list with multicast addresses to search in
153 * 154 *
@@ -167,7 +168,7 @@ static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr,
167} 168}
168 169
169/** 170/**
170 * batadv_mcast_mla_br_addr_cpy - copy a bridge multicast address 171 * batadv_mcast_mla_br_addr_cpy() - copy a bridge multicast address
171 * @dst: destination to write to - a multicast MAC address 172 * @dst: destination to write to - a multicast MAC address
172 * @src: source to read from - a multicast IP address 173 * @src: source to read from - a multicast IP address
173 * 174 *
@@ -191,7 +192,7 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
191} 192}
192 193
193/** 194/**
194 * batadv_mcast_mla_bridge_get - get bridged-in multicast listeners 195 * batadv_mcast_mla_bridge_get() - get bridged-in multicast listeners
195 * @dev: a bridge slave whose bridge to collect multicast addresses from 196 * @dev: a bridge slave whose bridge to collect multicast addresses from
196 * @mcast_list: a list to put found addresses into 197 * @mcast_list: a list to put found addresses into
197 * 198 *
@@ -244,7 +245,7 @@ out:
244} 245}
245 246
246/** 247/**
247 * batadv_mcast_mla_list_free - free a list of multicast addresses 248 * batadv_mcast_mla_list_free() - free a list of multicast addresses
248 * @mcast_list: the list to free 249 * @mcast_list: the list to free
249 * 250 *
250 * Removes and frees all items in the given mcast_list. 251 * Removes and frees all items in the given mcast_list.
@@ -261,7 +262,7 @@ static void batadv_mcast_mla_list_free(struct hlist_head *mcast_list)
261} 262}
262 263
263/** 264/**
264 * batadv_mcast_mla_tt_retract - clean up multicast listener announcements 265 * batadv_mcast_mla_tt_retract() - clean up multicast listener announcements
265 * @bat_priv: the bat priv with all the soft interface information 266 * @bat_priv: the bat priv with all the soft interface information
266 * @mcast_list: a list of addresses which should _not_ be removed 267 * @mcast_list: a list of addresses which should _not_ be removed
267 * 268 *
@@ -297,7 +298,7 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv,
297} 298}
298 299
299/** 300/**
300 * batadv_mcast_mla_tt_add - add multicast listener announcements 301 * batadv_mcast_mla_tt_add() - add multicast listener announcements
301 * @bat_priv: the bat priv with all the soft interface information 302 * @bat_priv: the bat priv with all the soft interface information
302 * @mcast_list: a list of addresses which are going to get added 303 * @mcast_list: a list of addresses which are going to get added
303 * 304 *
@@ -333,7 +334,7 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv,
333} 334}
334 335
335/** 336/**
336 * batadv_mcast_has_bridge - check whether the soft-iface is bridged 337 * batadv_mcast_has_bridge() - check whether the soft-iface is bridged
337 * @bat_priv: the bat priv with all the soft interface information 338 * @bat_priv: the bat priv with all the soft interface information
338 * 339 *
339 * Checks whether there is a bridge on top of our soft interface. 340 * Checks whether there is a bridge on top of our soft interface.
@@ -354,7 +355,8 @@ static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv)
354} 355}
355 356
356/** 357/**
357 * batadv_mcast_querier_log - debug output regarding the querier status on link 358 * batadv_mcast_querier_log() - debug output regarding the querier status on
359 * link
358 * @bat_priv: the bat priv with all the soft interface information 360 * @bat_priv: the bat priv with all the soft interface information
359 * @str_proto: a string for the querier protocol (e.g. "IGMP" or "MLD") 361 * @str_proto: a string for the querier protocol (e.g. "IGMP" or "MLD")
360 * @old_state: the previous querier state on our link 362 * @old_state: the previous querier state on our link
@@ -405,7 +407,8 @@ batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto,
405} 407}
406 408
407/** 409/**
408 * batadv_mcast_bridge_log - debug output for topology changes in bridged setups 410 * batadv_mcast_bridge_log() - debug output for topology changes in bridged
411 * setups
409 * @bat_priv: the bat priv with all the soft interface information 412 * @bat_priv: the bat priv with all the soft interface information
410 * @bridged: a flag about whether the soft interface is currently bridged or not 413 * @bridged: a flag about whether the soft interface is currently bridged or not
411 * @querier_ipv4: (maybe) new status of a potential, selected IGMP querier 414 * @querier_ipv4: (maybe) new status of a potential, selected IGMP querier
@@ -444,7 +447,7 @@ batadv_mcast_bridge_log(struct batadv_priv *bat_priv, bool bridged,
444} 447}
445 448
446/** 449/**
447 * batadv_mcast_flags_logs - output debug information about mcast flag changes 450 * batadv_mcast_flags_logs() - output debug information about mcast flag changes
448 * @bat_priv: the bat priv with all the soft interface information 451 * @bat_priv: the bat priv with all the soft interface information
449 * @flags: flags indicating the new multicast state 452 * @flags: flags indicating the new multicast state
450 * 453 *
@@ -470,7 +473,7 @@ static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags)
470} 473}
471 474
472/** 475/**
473 * batadv_mcast_mla_tvlv_update - update multicast tvlv 476 * batadv_mcast_mla_tvlv_update() - update multicast tvlv
474 * @bat_priv: the bat priv with all the soft interface information 477 * @bat_priv: the bat priv with all the soft interface information
475 * 478 *
476 * Updates the own multicast tvlv with our current multicast related settings, 479 * Updates the own multicast tvlv with our current multicast related settings,
@@ -545,7 +548,7 @@ update:
545} 548}
546 549
547/** 550/**
548 * __batadv_mcast_mla_update - update the own MLAs 551 * __batadv_mcast_mla_update() - update the own MLAs
549 * @bat_priv: the bat priv with all the soft interface information 552 * @bat_priv: the bat priv with all the soft interface information
550 * 553 *
551 * Updates the own multicast listener announcements in the translation 554 * Updates the own multicast listener announcements in the translation
@@ -582,7 +585,7 @@ out:
582} 585}
583 586
584/** 587/**
585 * batadv_mcast_mla_update - update the own MLAs 588 * batadv_mcast_mla_update() - update the own MLAs
586 * @work: kernel work struct 589 * @work: kernel work struct
587 * 590 *
588 * Updates the own multicast listener announcements in the translation 591 * Updates the own multicast listener announcements in the translation
@@ -605,7 +608,7 @@ static void batadv_mcast_mla_update(struct work_struct *work)
605} 608}
606 609
607/** 610/**
608 * batadv_mcast_is_report_ipv4 - check for IGMP reports 611 * batadv_mcast_is_report_ipv4() - check for IGMP reports
609 * @skb: the ethernet frame destined for the mesh 612 * @skb: the ethernet frame destined for the mesh
610 * 613 *
611 * This call might reallocate skb data. 614 * This call might reallocate skb data.
@@ -630,7 +633,8 @@ static bool batadv_mcast_is_report_ipv4(struct sk_buff *skb)
630} 633}
631 634
632/** 635/**
633 * batadv_mcast_forw_mode_check_ipv4 - check for optimized forwarding potential 636 * batadv_mcast_forw_mode_check_ipv4() - check for optimized forwarding
637 * potential
634 * @bat_priv: the bat priv with all the soft interface information 638 * @bat_priv: the bat priv with all the soft interface information
635 * @skb: the IPv4 packet to check 639 * @skb: the IPv4 packet to check
636 * @is_unsnoopable: stores whether the destination is snoopable 640 * @is_unsnoopable: stores whether the destination is snoopable
@@ -671,7 +675,7 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
671} 675}
672 676
673/** 677/**
674 * batadv_mcast_is_report_ipv6 - check for MLD reports 678 * batadv_mcast_is_report_ipv6() - check for MLD reports
675 * @skb: the ethernet frame destined for the mesh 679 * @skb: the ethernet frame destined for the mesh
676 * 680 *
677 * This call might reallocate skb data. 681 * This call might reallocate skb data.
@@ -695,7 +699,8 @@ static bool batadv_mcast_is_report_ipv6(struct sk_buff *skb)
695} 699}
696 700
697/** 701/**
698 * batadv_mcast_forw_mode_check_ipv6 - check for optimized forwarding potential 702 * batadv_mcast_forw_mode_check_ipv6() - check for optimized forwarding
703 * potential
699 * @bat_priv: the bat priv with all the soft interface information 704 * @bat_priv: the bat priv with all the soft interface information
700 * @skb: the IPv6 packet to check 705 * @skb: the IPv6 packet to check
701 * @is_unsnoopable: stores whether the destination is snoopable 706 * @is_unsnoopable: stores whether the destination is snoopable
@@ -736,7 +741,7 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
736} 741}
737 742
738/** 743/**
739 * batadv_mcast_forw_mode_check - check for optimized forwarding potential 744 * batadv_mcast_forw_mode_check() - check for optimized forwarding potential
740 * @bat_priv: the bat priv with all the soft interface information 745 * @bat_priv: the bat priv with all the soft interface information
741 * @skb: the multicast frame to check 746 * @skb: the multicast frame to check
742 * @is_unsnoopable: stores whether the destination is snoopable 747 * @is_unsnoopable: stores whether the destination is snoopable
@@ -774,7 +779,7 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
774} 779}
775 780
776/** 781/**
777 * batadv_mcast_forw_want_all_ip_count - count nodes with unspecific mcast 782 * batadv_mcast_forw_want_all_ip_count() - count nodes with unspecific mcast
778 * interest 783 * interest
779 * @bat_priv: the bat priv with all the soft interface information 784 * @bat_priv: the bat priv with all the soft interface information
780 * @ethhdr: ethernet header of a packet 785 * @ethhdr: ethernet header of a packet
@@ -798,7 +803,7 @@ static int batadv_mcast_forw_want_all_ip_count(struct batadv_priv *bat_priv,
798} 803}
799 804
800/** 805/**
801 * batadv_mcast_forw_tt_node_get - get a multicast tt node 806 * batadv_mcast_forw_tt_node_get() - get a multicast tt node
802 * @bat_priv: the bat priv with all the soft interface information 807 * @bat_priv: the bat priv with all the soft interface information
803 * @ethhdr: the ether header containing the multicast destination 808 * @ethhdr: the ether header containing the multicast destination
804 * 809 *
@@ -814,7 +819,7 @@ batadv_mcast_forw_tt_node_get(struct batadv_priv *bat_priv,
814} 819}
815 820
816/** 821/**
817 * batadv_mcast_forw_ipv4_node_get - get a node with an ipv4 flag 822 * batadv_mcast_forw_ipv4_node_get() - get a node with an ipv4 flag
818 * @bat_priv: the bat priv with all the soft interface information 823 * @bat_priv: the bat priv with all the soft interface information
819 * 824 *
820 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and 825 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and
@@ -841,7 +846,7 @@ batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv)
841} 846}
842 847
843/** 848/**
844 * batadv_mcast_forw_ipv6_node_get - get a node with an ipv6 flag 849 * batadv_mcast_forw_ipv6_node_get() - get a node with an ipv6 flag
845 * @bat_priv: the bat priv with all the soft interface information 850 * @bat_priv: the bat priv with all the soft interface information
846 * 851 *
847 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set 852 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set
@@ -868,7 +873,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv)
868} 873}
869 874
870/** 875/**
871 * batadv_mcast_forw_ip_node_get - get a node with an ipv4/ipv6 flag 876 * batadv_mcast_forw_ip_node_get() - get a node with an ipv4/ipv6 flag
872 * @bat_priv: the bat priv with all the soft interface information 877 * @bat_priv: the bat priv with all the soft interface information
873 * @ethhdr: an ethernet header to determine the protocol family from 878 * @ethhdr: an ethernet header to determine the protocol family from
874 * 879 *
@@ -892,7 +897,7 @@ batadv_mcast_forw_ip_node_get(struct batadv_priv *bat_priv,
892} 897}
893 898
894/** 899/**
895 * batadv_mcast_forw_unsnoop_node_get - get a node with an unsnoopable flag 900 * batadv_mcast_forw_unsnoop_node_get() - get a node with an unsnoopable flag
896 * @bat_priv: the bat priv with all the soft interface information 901 * @bat_priv: the bat priv with all the soft interface information
897 * 902 *
898 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag 903 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag
@@ -919,7 +924,7 @@ batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv)
919} 924}
920 925
921/** 926/**
922 * batadv_mcast_forw_mode - check on how to forward a multicast packet 927 * batadv_mcast_forw_mode() - check on how to forward a multicast packet
923 * @bat_priv: the bat priv with all the soft interface information 928 * @bat_priv: the bat priv with all the soft interface information
924 * @skb: The multicast packet to check 929 * @skb: The multicast packet to check
925 * @orig: an originator to be set to forward the skb to 930 * @orig: an originator to be set to forward the skb to
@@ -973,7 +978,7 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
973} 978}
974 979
975/** 980/**
976 * batadv_mcast_want_unsnoop_update - update unsnoop counter and list 981 * batadv_mcast_want_unsnoop_update() - update unsnoop counter and list
977 * @bat_priv: the bat priv with all the soft interface information 982 * @bat_priv: the bat priv with all the soft interface information
978 * @orig: the orig_node which multicast state might have changed of 983 * @orig: the orig_node which multicast state might have changed of
979 * @mcast_flags: flags indicating the new multicast state 984 * @mcast_flags: flags indicating the new multicast state
@@ -1018,7 +1023,7 @@ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv,
1018} 1023}
1019 1024
1020/** 1025/**
1021 * batadv_mcast_want_ipv4_update - update want-all-ipv4 counter and list 1026 * batadv_mcast_want_ipv4_update() - update want-all-ipv4 counter and list
1022 * @bat_priv: the bat priv with all the soft interface information 1027 * @bat_priv: the bat priv with all the soft interface information
1023 * @orig: the orig_node which multicast state might have changed of 1028 * @orig: the orig_node which multicast state might have changed of
1024 * @mcast_flags: flags indicating the new multicast state 1029 * @mcast_flags: flags indicating the new multicast state
@@ -1063,7 +1068,7 @@ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv,
1063} 1068}
1064 1069
1065/** 1070/**
1066 * batadv_mcast_want_ipv6_update - update want-all-ipv6 counter and list 1071 * batadv_mcast_want_ipv6_update() - update want-all-ipv6 counter and list
1067 * @bat_priv: the bat priv with all the soft interface information 1072 * @bat_priv: the bat priv with all the soft interface information
1068 * @orig: the orig_node which multicast state might have changed of 1073 * @orig: the orig_node which multicast state might have changed of
1069 * @mcast_flags: flags indicating the new multicast state 1074 * @mcast_flags: flags indicating the new multicast state
@@ -1108,7 +1113,7 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv,
1108} 1113}
1109 1114
1110/** 1115/**
1111 * batadv_mcast_tvlv_ogm_handler - process incoming multicast tvlv container 1116 * batadv_mcast_tvlv_ogm_handler() - process incoming multicast tvlv container
1112 * @bat_priv: the bat priv with all the soft interface information 1117 * @bat_priv: the bat priv with all the soft interface information
1113 * @orig: the orig_node of the ogm 1118 * @orig: the orig_node of the ogm
1114 * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) 1119 * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
@@ -1164,7 +1169,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
1164} 1169}
1165 1170
1166/** 1171/**
1167 * batadv_mcast_init - initialize the multicast optimizations structures 1172 * batadv_mcast_init() - initialize the multicast optimizations structures
1168 * @bat_priv: the bat priv with all the soft interface information 1173 * @bat_priv: the bat priv with all the soft interface information
1169 */ 1174 */
1170void batadv_mcast_init(struct batadv_priv *bat_priv) 1175void batadv_mcast_init(struct batadv_priv *bat_priv)
@@ -1179,7 +1184,7 @@ void batadv_mcast_init(struct batadv_priv *bat_priv)
1179 1184
1180#ifdef CONFIG_BATMAN_ADV_DEBUGFS 1185#ifdef CONFIG_BATMAN_ADV_DEBUGFS
1181/** 1186/**
1182 * batadv_mcast_flags_print_header - print own mcast flags to debugfs table 1187 * batadv_mcast_flags_print_header() - print own mcast flags to debugfs table
1183 * @bat_priv: the bat priv with all the soft interface information 1188 * @bat_priv: the bat priv with all the soft interface information
1184 * @seq: debugfs table seq_file struct 1189 * @seq: debugfs table seq_file struct
1185 * 1190 *
@@ -1220,7 +1225,7 @@ static void batadv_mcast_flags_print_header(struct batadv_priv *bat_priv,
1220} 1225}
1221 1226
1222/** 1227/**
1223 * batadv_mcast_flags_seq_print_text - print the mcast flags of other nodes 1228 * batadv_mcast_flags_seq_print_text() - print the mcast flags of other nodes
1224 * @seq: seq file to print on 1229 * @seq: seq file to print on
1225 * @offset: not used 1230 * @offset: not used
1226 * 1231 *
@@ -1281,7 +1286,7 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
1281#endif 1286#endif
1282 1287
1283/** 1288/**
1284 * batadv_mcast_free - free the multicast optimizations structures 1289 * batadv_mcast_free() - free the multicast optimizations structures
1285 * @bat_priv: the bat priv with all the soft interface information 1290 * @bat_priv: the bat priv with all the soft interface information
1286 */ 1291 */
1287void batadv_mcast_free(struct batadv_priv *bat_priv) 1292void batadv_mcast_free(struct batadv_priv *bat_priv)
@@ -1296,7 +1301,7 @@ void batadv_mcast_free(struct batadv_priv *bat_priv)
1296} 1301}
1297 1302
1298/** 1303/**
1299 * batadv_mcast_purge_orig - reset originator global mcast state modifications 1304 * batadv_mcast_purge_orig() - reset originator global mcast state modifications
1300 * @orig: the originator which is going to get purged 1305 * @orig: the originator which is going to get purged
1301 */ 1306 */
1302void batadv_mcast_purge_orig(struct batadv_orig_node *orig) 1307void batadv_mcast_purge_orig(struct batadv_orig_node *orig)
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 2a78cddab0e9..3ac06337ab71 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2014-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2014-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Linus Lüssing 4 * Linus Lüssing
@@ -25,15 +26,21 @@ struct sk_buff;
25 26
26/** 27/**
27 * enum batadv_forw_mode - the way a packet should be forwarded as 28 * enum batadv_forw_mode - the way a packet should be forwarded as
28 * @BATADV_FORW_ALL: forward the packet to all nodes (currently via classic
29 * flooding)
30 * @BATADV_FORW_SINGLE: forward the packet to a single node (currently via the
31 * BATMAN unicast routing protocol)
32 * @BATADV_FORW_NONE: don't forward, drop it
33 */ 29 */
34enum batadv_forw_mode { 30enum batadv_forw_mode {
31 /**
32 * @BATADV_FORW_ALL: forward the packet to all nodes (currently via
33 * classic flooding)
34 */
35 BATADV_FORW_ALL, 35 BATADV_FORW_ALL,
36
37 /**
38 * @BATADV_FORW_SINGLE: forward the packet to a single node (currently
39 * via the BATMAN unicast routing protocol)
40 */
36 BATADV_FORW_SINGLE, 41 BATADV_FORW_SINGLE,
42
43 /** @BATADV_FORW_NONE: don't forward, drop it */
37 BATADV_FORW_NONE, 44 BATADV_FORW_NONE,
38}; 45};
39 46
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index ab13b4d58733..a823d3899bad 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Matthias Schiffer 4 * Matthias Schiffer
@@ -23,8 +24,8 @@
23#include <linux/cache.h> 24#include <linux/cache.h>
24#include <linux/errno.h> 25#include <linux/errno.h>
25#include <linux/export.h> 26#include <linux/export.h>
26#include <linux/fs.h>
27#include <linux/genetlink.h> 27#include <linux/genetlink.h>
28#include <linux/gfp.h>
28#include <linux/if_ether.h> 29#include <linux/if_ether.h>
29#include <linux/init.h> 30#include <linux/init.h>
30#include <linux/kernel.h> 31#include <linux/kernel.h>
@@ -39,6 +40,7 @@
39#include <net/genetlink.h> 40#include <net/genetlink.h>
40#include <net/netlink.h> 41#include <net/netlink.h>
41#include <net/sock.h> 42#include <net/sock.h>
43#include <uapi/linux/batadv_packet.h>
42#include <uapi/linux/batman_adv.h> 44#include <uapi/linux/batman_adv.h>
43 45
44#include "bat_algo.h" 46#include "bat_algo.h"
@@ -46,7 +48,6 @@
46#include "gateway_client.h" 48#include "gateway_client.h"
47#include "hard-interface.h" 49#include "hard-interface.h"
48#include "originator.h" 50#include "originator.h"
49#include "packet.h"
50#include "soft-interface.h" 51#include "soft-interface.h"
51#include "tp_meter.h" 52#include "tp_meter.h"
52#include "translation-table.h" 53#include "translation-table.h"
@@ -99,7 +100,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
99}; 100};
100 101
101/** 102/**
102 * batadv_netlink_get_ifindex - Extract an interface index from a message 103 * batadv_netlink_get_ifindex() - Extract an interface index from a message
103 * @nlh: Message header 104 * @nlh: Message header
104 * @attrtype: Attribute which holds an interface index 105 * @attrtype: Attribute which holds an interface index
105 * 106 *
@@ -114,7 +115,7 @@ batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype)
114} 115}
115 116
116/** 117/**
117 * batadv_netlink_mesh_info_put - fill in generic information about mesh 118 * batadv_netlink_mesh_info_put() - fill in generic information about mesh
118 * interface 119 * interface
119 * @msg: netlink message to be sent back 120 * @msg: netlink message to be sent back
120 * @soft_iface: interface for which the data should be taken 121 * @soft_iface: interface for which the data should be taken
@@ -169,7 +170,7 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
169} 170}
170 171
171/** 172/**
172 * batadv_netlink_get_mesh_info - handle incoming BATADV_CMD_GET_MESH_INFO 173 * batadv_netlink_get_mesh_info() - handle incoming BATADV_CMD_GET_MESH_INFO
173 * netlink request 174 * netlink request
174 * @skb: received netlink message 175 * @skb: received netlink message
175 * @info: receiver information 176 * @info: receiver information
@@ -230,7 +231,7 @@ batadv_netlink_get_mesh_info(struct sk_buff *skb, struct genl_info *info)
230} 231}
231 232
232/** 233/**
233 * batadv_netlink_tp_meter_put - Fill information of started tp_meter session 234 * batadv_netlink_tp_meter_put() - Fill information of started tp_meter session
234 * @msg: netlink message to be sent back 235 * @msg: netlink message to be sent back
235 * @cookie: tp meter session cookie 236 * @cookie: tp meter session cookie
236 * 237 *
@@ -246,7 +247,7 @@ batadv_netlink_tp_meter_put(struct sk_buff *msg, u32 cookie)
246} 247}
247 248
248/** 249/**
249 * batadv_netlink_tpmeter_notify - send tp_meter result via netlink to client 250 * batadv_netlink_tpmeter_notify() - send tp_meter result via netlink to client
250 * @bat_priv: the bat priv with all the soft interface information 251 * @bat_priv: the bat priv with all the soft interface information
251 * @dst: destination of tp_meter session 252 * @dst: destination of tp_meter session
252 * @result: reason for tp meter session stop 253 * @result: reason for tp meter session stop
@@ -309,7 +310,7 @@ err_genlmsg:
309} 310}
310 311
311/** 312/**
312 * batadv_netlink_tp_meter_start - Start a new tp_meter session 313 * batadv_netlink_tp_meter_start() - Start a new tp_meter session
313 * @skb: received netlink message 314 * @skb: received netlink message
314 * @info: receiver information 315 * @info: receiver information
315 * 316 *
@@ -386,7 +387,7 @@ batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info)
386} 387}
387 388
388/** 389/**
389 * batadv_netlink_tp_meter_start - Cancel a running tp_meter session 390 * batadv_netlink_tp_meter_start() - Cancel a running tp_meter session
390 * @skb: received netlink message 391 * @skb: received netlink message
391 * @info: receiver information 392 * @info: receiver information
392 * 393 *
@@ -431,7 +432,7 @@ out:
431} 432}
432 433
433/** 434/**
434 * batadv_netlink_dump_hardif_entry - Dump one hard interface into a message 435 * batadv_netlink_dump_hardif_entry() - Dump one hard interface into a message
435 * @msg: Netlink message to dump into 436 * @msg: Netlink message to dump into
436 * @portid: Port making netlink request 437 * @portid: Port making netlink request
437 * @seq: Sequence number of netlink message 438 * @seq: Sequence number of netlink message
@@ -473,7 +474,7 @@ batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, u32 seq,
473} 474}
474 475
475/** 476/**
476 * batadv_netlink_dump_hardifs - Dump all hard interface into a messages 477 * batadv_netlink_dump_hardifs() - Dump all hard interface into a messages
477 * @msg: Netlink message to dump into 478 * @msg: Netlink message to dump into
478 * @cb: Parameters from query 479 * @cb: Parameters from query
479 * 480 *
@@ -620,7 +621,7 @@ struct genl_family batadv_netlink_family __ro_after_init = {
620}; 621};
621 622
622/** 623/**
623 * batadv_netlink_register - register batadv genl netlink family 624 * batadv_netlink_register() - register batadv genl netlink family
624 */ 625 */
625void __init batadv_netlink_register(void) 626void __init batadv_netlink_register(void)
626{ 627{
@@ -632,7 +633,7 @@ void __init batadv_netlink_register(void)
632} 633}
633 634
634/** 635/**
635 * batadv_netlink_unregister - unregister batadv genl netlink family 636 * batadv_netlink_unregister() - unregister batadv genl netlink family
636 */ 637 */
637void batadv_netlink_unregister(void) 638void batadv_netlink_unregister(void)
638{ 639{
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index f1cd8c5da966..0e7e57b69b54 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Matthias Schiffer 4 * Matthias Schiffer
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 3604d7899e2c..b48116bb24ef 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Martin Hundebøll, Jeppe Ledet-Pedersen 4 * Martin Hundebøll, Jeppe Ledet-Pedersen
@@ -25,7 +26,7 @@
25#include <linux/debugfs.h> 26#include <linux/debugfs.h>
26#include <linux/errno.h> 27#include <linux/errno.h>
27#include <linux/etherdevice.h> 28#include <linux/etherdevice.h>
28#include <linux/fs.h> 29#include <linux/gfp.h>
29#include <linux/if_ether.h> 30#include <linux/if_ether.h>
30#include <linux/if_packet.h> 31#include <linux/if_packet.h>
31#include <linux/init.h> 32#include <linux/init.h>
@@ -35,6 +36,7 @@
35#include <linux/kref.h> 36#include <linux/kref.h>
36#include <linux/list.h> 37#include <linux/list.h>
37#include <linux/lockdep.h> 38#include <linux/lockdep.h>
39#include <linux/net.h>
38#include <linux/netdevice.h> 40#include <linux/netdevice.h>
39#include <linux/printk.h> 41#include <linux/printk.h>
40#include <linux/random.h> 42#include <linux/random.h>
@@ -47,12 +49,12 @@
47#include <linux/stddef.h> 49#include <linux/stddef.h>
48#include <linux/string.h> 50#include <linux/string.h>
49#include <linux/workqueue.h> 51#include <linux/workqueue.h>
52#include <uapi/linux/batadv_packet.h>
50 53
51#include "hard-interface.h" 54#include "hard-interface.h"
52#include "hash.h" 55#include "hash.h"
53#include "log.h" 56#include "log.h"
54#include "originator.h" 57#include "originator.h"
55#include "packet.h"
56#include "routing.h" 58#include "routing.h"
57#include "send.h" 59#include "send.h"
58#include "tvlv.h" 60#include "tvlv.h"
@@ -65,7 +67,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
65 struct batadv_hard_iface *recv_if); 67 struct batadv_hard_iface *recv_if);
66 68
67/** 69/**
68 * batadv_nc_init - one-time initialization for network coding 70 * batadv_nc_init() - one-time initialization for network coding
69 * 71 *
70 * Return: 0 on success or negative error number in case of failure 72 * Return: 0 on success or negative error number in case of failure
71 */ 73 */
@@ -81,7 +83,7 @@ int __init batadv_nc_init(void)
81} 83}
82 84
83/** 85/**
84 * batadv_nc_start_timer - initialise the nc periodic worker 86 * batadv_nc_start_timer() - initialise the nc periodic worker
85 * @bat_priv: the bat priv with all the soft interface information 87 * @bat_priv: the bat priv with all the soft interface information
86 */ 88 */
87static void batadv_nc_start_timer(struct batadv_priv *bat_priv) 89static void batadv_nc_start_timer(struct batadv_priv *bat_priv)
@@ -91,7 +93,7 @@ static void batadv_nc_start_timer(struct batadv_priv *bat_priv)
91} 93}
92 94
93/** 95/**
94 * batadv_nc_tvlv_container_update - update the network coding tvlv container 96 * batadv_nc_tvlv_container_update() - update the network coding tvlv container
95 * after network coding setting change 97 * after network coding setting change
96 * @bat_priv: the bat priv with all the soft interface information 98 * @bat_priv: the bat priv with all the soft interface information
97 */ 99 */
@@ -113,7 +115,7 @@ static void batadv_nc_tvlv_container_update(struct batadv_priv *bat_priv)
113} 115}
114 116
115/** 117/**
116 * batadv_nc_status_update - update the network coding tvlv container after 118 * batadv_nc_status_update() - update the network coding tvlv container after
117 * network coding setting change 119 * network coding setting change
118 * @net_dev: the soft interface net device 120 * @net_dev: the soft interface net device
119 */ 121 */
@@ -125,7 +127,7 @@ void batadv_nc_status_update(struct net_device *net_dev)
125} 127}
126 128
127/** 129/**
128 * batadv_nc_tvlv_ogm_handler_v1 - process incoming nc tvlv container 130 * batadv_nc_tvlv_ogm_handler_v1() - process incoming nc tvlv container
129 * @bat_priv: the bat priv with all the soft interface information 131 * @bat_priv: the bat priv with all the soft interface information
130 * @orig: the orig_node of the ogm 132 * @orig: the orig_node of the ogm
131 * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) 133 * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
@@ -144,7 +146,7 @@ static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
144} 146}
145 147
146/** 148/**
147 * batadv_nc_mesh_init - initialise coding hash table and start house keeping 149 * batadv_nc_mesh_init() - initialise coding hash table and start house keeping
148 * @bat_priv: the bat priv with all the soft interface information 150 * @bat_priv: the bat priv with all the soft interface information
149 * 151 *
150 * Return: 0 on success or negative error number in case of failure 152 * Return: 0 on success or negative error number in case of failure
@@ -185,7 +187,7 @@ err:
185} 187}
186 188
187/** 189/**
188 * batadv_nc_init_bat_priv - initialise the nc specific bat_priv variables 190 * batadv_nc_init_bat_priv() - initialise the nc specific bat_priv variables
189 * @bat_priv: the bat priv with all the soft interface information 191 * @bat_priv: the bat priv with all the soft interface information
190 */ 192 */
191void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv) 193void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv)
@@ -197,7 +199,7 @@ void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv)
197} 199}
198 200
199/** 201/**
200 * batadv_nc_init_orig - initialise the nc fields of an orig_node 202 * batadv_nc_init_orig() - initialise the nc fields of an orig_node
201 * @orig_node: the orig_node which is going to be initialised 203 * @orig_node: the orig_node which is going to be initialised
202 */ 204 */
203void batadv_nc_init_orig(struct batadv_orig_node *orig_node) 205void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
@@ -209,8 +211,8 @@ void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
209} 211}
210 212
211/** 213/**
212 * batadv_nc_node_release - release nc_node from lists and queue for free after 214 * batadv_nc_node_release() - release nc_node from lists and queue for free
213 * rcu grace period 215 * after rcu grace period
214 * @ref: kref pointer of the nc_node 216 * @ref: kref pointer of the nc_node
215 */ 217 */
216static void batadv_nc_node_release(struct kref *ref) 218static void batadv_nc_node_release(struct kref *ref)
@@ -224,7 +226,7 @@ static void batadv_nc_node_release(struct kref *ref)
224} 226}
225 227
226/** 228/**
227 * batadv_nc_node_put - decrement the nc_node refcounter and possibly 229 * batadv_nc_node_put() - decrement the nc_node refcounter and possibly
228 * release it 230 * release it
229 * @nc_node: nc_node to be free'd 231 * @nc_node: nc_node to be free'd
230 */ 232 */
@@ -234,8 +236,8 @@ static void batadv_nc_node_put(struct batadv_nc_node *nc_node)
234} 236}
235 237
236/** 238/**
237 * batadv_nc_path_release - release nc_path from lists and queue for free after 239 * batadv_nc_path_release() - release nc_path from lists and queue for free
238 * rcu grace period 240 * after rcu grace period
239 * @ref: kref pointer of the nc_path 241 * @ref: kref pointer of the nc_path
240 */ 242 */
241static void batadv_nc_path_release(struct kref *ref) 243static void batadv_nc_path_release(struct kref *ref)
@@ -248,7 +250,7 @@ static void batadv_nc_path_release(struct kref *ref)
248} 250}
249 251
250/** 252/**
251 * batadv_nc_path_put - decrement the nc_path refcounter and possibly 253 * batadv_nc_path_put() - decrement the nc_path refcounter and possibly
252 * release it 254 * release it
253 * @nc_path: nc_path to be free'd 255 * @nc_path: nc_path to be free'd
254 */ 256 */
@@ -258,7 +260,7 @@ static void batadv_nc_path_put(struct batadv_nc_path *nc_path)
258} 260}
259 261
260/** 262/**
261 * batadv_nc_packet_free - frees nc packet 263 * batadv_nc_packet_free() - frees nc packet
262 * @nc_packet: the nc packet to free 264 * @nc_packet: the nc packet to free
263 * @dropped: whether the packet is freed because is is dropped 265 * @dropped: whether the packet is freed because is is dropped
264 */ 266 */
@@ -275,7 +277,7 @@ static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet,
275} 277}
276 278
277/** 279/**
278 * batadv_nc_to_purge_nc_node - checks whether an nc node has to be purged 280 * batadv_nc_to_purge_nc_node() - checks whether an nc node has to be purged
279 * @bat_priv: the bat priv with all the soft interface information 281 * @bat_priv: the bat priv with all the soft interface information
280 * @nc_node: the nc node to check 282 * @nc_node: the nc node to check
281 * 283 *
@@ -291,7 +293,7 @@ static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv,
291} 293}
292 294
293/** 295/**
294 * batadv_nc_to_purge_nc_path_coding - checks whether an nc path has timed out 296 * batadv_nc_to_purge_nc_path_coding() - checks whether an nc path has timed out
295 * @bat_priv: the bat priv with all the soft interface information 297 * @bat_priv: the bat priv with all the soft interface information
296 * @nc_path: the nc path to check 298 * @nc_path: the nc path to check
297 * 299 *
@@ -311,7 +313,8 @@ static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv,
311} 313}
312 314
313/** 315/**
314 * batadv_nc_to_purge_nc_path_decoding - checks whether an nc path has timed out 316 * batadv_nc_to_purge_nc_path_decoding() - checks whether an nc path has timed
317 * out
315 * @bat_priv: the bat priv with all the soft interface information 318 * @bat_priv: the bat priv with all the soft interface information
316 * @nc_path: the nc path to check 319 * @nc_path: the nc path to check
317 * 320 *
@@ -331,7 +334,7 @@ static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv,
331} 334}
332 335
333/** 336/**
334 * batadv_nc_purge_orig_nc_nodes - go through list of nc nodes and purge stale 337 * batadv_nc_purge_orig_nc_nodes() - go through list of nc nodes and purge stale
335 * entries 338 * entries
336 * @bat_priv: the bat priv with all the soft interface information 339 * @bat_priv: the bat priv with all the soft interface information
337 * @list: list of nc nodes 340 * @list: list of nc nodes
@@ -369,7 +372,7 @@ batadv_nc_purge_orig_nc_nodes(struct batadv_priv *bat_priv,
369} 372}
370 373
371/** 374/**
372 * batadv_nc_purge_orig - purges all nc node data attached of the given 375 * batadv_nc_purge_orig() - purges all nc node data attached of the given
373 * originator 376 * originator
374 * @bat_priv: the bat priv with all the soft interface information 377 * @bat_priv: the bat priv with all the soft interface information
375 * @orig_node: orig_node with the nc node entries to be purged 378 * @orig_node: orig_node with the nc node entries to be purged
@@ -395,8 +398,8 @@ void batadv_nc_purge_orig(struct batadv_priv *bat_priv,
395} 398}
396 399
397/** 400/**
398 * batadv_nc_purge_orig_hash - traverse entire originator hash to check if they 401 * batadv_nc_purge_orig_hash() - traverse entire originator hash to check if
399 * have timed out nc nodes 402 * they have timed out nc nodes
400 * @bat_priv: the bat priv with all the soft interface information 403 * @bat_priv: the bat priv with all the soft interface information
401 */ 404 */
402static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv) 405static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv)
@@ -422,7 +425,7 @@ static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv)
422} 425}
423 426
424/** 427/**
425 * batadv_nc_purge_paths - traverse all nc paths part of the hash and remove 428 * batadv_nc_purge_paths() - traverse all nc paths part of the hash and remove
426 * unused ones 429 * unused ones
427 * @bat_priv: the bat priv with all the soft interface information 430 * @bat_priv: the bat priv with all the soft interface information
428 * @hash: hash table containing the nc paths to check 431 * @hash: hash table containing the nc paths to check
@@ -481,7 +484,7 @@ static void batadv_nc_purge_paths(struct batadv_priv *bat_priv,
481} 484}
482 485
483/** 486/**
484 * batadv_nc_hash_key_gen - computes the nc_path hash key 487 * batadv_nc_hash_key_gen() - computes the nc_path hash key
485 * @key: buffer to hold the final hash key 488 * @key: buffer to hold the final hash key
486 * @src: source ethernet mac address going into the hash key 489 * @src: source ethernet mac address going into the hash key
487 * @dst: destination ethernet mac address going into the hash key 490 * @dst: destination ethernet mac address going into the hash key
@@ -494,7 +497,7 @@ static void batadv_nc_hash_key_gen(struct batadv_nc_path *key, const char *src,
494} 497}
495 498
496/** 499/**
497 * batadv_nc_hash_choose - compute the hash value for an nc path 500 * batadv_nc_hash_choose() - compute the hash value for an nc path
498 * @data: data to hash 501 * @data: data to hash
499 * @size: size of the hash table 502 * @size: size of the hash table
500 * 503 *
@@ -512,7 +515,7 @@ static u32 batadv_nc_hash_choose(const void *data, u32 size)
512} 515}
513 516
514/** 517/**
515 * batadv_nc_hash_compare - comparing function used in the network coding hash 518 * batadv_nc_hash_compare() - comparing function used in the network coding hash
516 * tables 519 * tables
517 * @node: node in the local table 520 * @node: node in the local table
518 * @data2: second object to compare the node to 521 * @data2: second object to compare the node to
@@ -538,7 +541,7 @@ static bool batadv_nc_hash_compare(const struct hlist_node *node,
538} 541}
539 542
540/** 543/**
541 * batadv_nc_hash_find - search for an existing nc path and return it 544 * batadv_nc_hash_find() - search for an existing nc path and return it
542 * @hash: hash table containing the nc path 545 * @hash: hash table containing the nc path
543 * @data: search key 546 * @data: search key
544 * 547 *
@@ -575,7 +578,7 @@ batadv_nc_hash_find(struct batadv_hashtable *hash,
575} 578}
576 579
577/** 580/**
578 * batadv_nc_send_packet - send non-coded packet and free nc_packet struct 581 * batadv_nc_send_packet() - send non-coded packet and free nc_packet struct
579 * @nc_packet: the nc packet to send 582 * @nc_packet: the nc packet to send
580 */ 583 */
581static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet) 584static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet)
@@ -586,7 +589,7 @@ static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet)
586} 589}
587 590
588/** 591/**
589 * batadv_nc_sniffed_purge - Checks timestamp of given sniffed nc_packet. 592 * batadv_nc_sniffed_purge() - Checks timestamp of given sniffed nc_packet.
590 * @bat_priv: the bat priv with all the soft interface information 593 * @bat_priv: the bat priv with all the soft interface information
591 * @nc_path: the nc path the packet belongs to 594 * @nc_path: the nc path the packet belongs to
592 * @nc_packet: the nc packet to be checked 595 * @nc_packet: the nc packet to be checked
@@ -625,7 +628,7 @@ out:
625} 628}
626 629
627/** 630/**
628 * batadv_nc_fwd_flush - Checks the timestamp of the given nc packet. 631 * batadv_nc_fwd_flush() - Checks the timestamp of the given nc packet.
629 * @bat_priv: the bat priv with all the soft interface information 632 * @bat_priv: the bat priv with all the soft interface information
630 * @nc_path: the nc path the packet belongs to 633 * @nc_path: the nc path the packet belongs to
631 * @nc_packet: the nc packet to be checked 634 * @nc_packet: the nc packet to be checked
@@ -663,8 +666,8 @@ static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv,
663} 666}
664 667
665/** 668/**
666 * batadv_nc_process_nc_paths - traverse given nc packet pool and free timed out 669 * batadv_nc_process_nc_paths() - traverse given nc packet pool and free timed
667 * nc packets 670 * out nc packets
668 * @bat_priv: the bat priv with all the soft interface information 671 * @bat_priv: the bat priv with all the soft interface information
669 * @hash: to be processed hash table 672 * @hash: to be processed hash table
670 * @process_fn: Function called to process given nc packet. Should return true 673 * @process_fn: Function called to process given nc packet. Should return true
@@ -709,7 +712,8 @@ batadv_nc_process_nc_paths(struct batadv_priv *bat_priv,
709} 712}
710 713
711/** 714/**
712 * batadv_nc_worker - periodic task for house keeping related to network coding 715 * batadv_nc_worker() - periodic task for house keeping related to network
716 * coding
713 * @work: kernel work struct 717 * @work: kernel work struct
714 */ 718 */
715static void batadv_nc_worker(struct work_struct *work) 719static void batadv_nc_worker(struct work_struct *work)
@@ -749,8 +753,8 @@ static void batadv_nc_worker(struct work_struct *work)
749} 753}
750 754
751/** 755/**
752 * batadv_can_nc_with_orig - checks whether the given orig node is suitable for 756 * batadv_can_nc_with_orig() - checks whether the given orig node is suitable
753 * coding or not 757 * for coding or not
754 * @bat_priv: the bat priv with all the soft interface information 758 * @bat_priv: the bat priv with all the soft interface information
755 * @orig_node: neighboring orig node which may be used as nc candidate 759 * @orig_node: neighboring orig node which may be used as nc candidate
756 * @ogm_packet: incoming ogm packet also used for the checks 760 * @ogm_packet: incoming ogm packet also used for the checks
@@ -790,7 +794,7 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv,
790} 794}
791 795
792/** 796/**
793 * batadv_nc_find_nc_node - search for an existing nc node and return it 797 * batadv_nc_find_nc_node() - search for an existing nc node and return it
794 * @orig_node: orig node originating the ogm packet 798 * @orig_node: orig node originating the ogm packet
795 * @orig_neigh_node: neighboring orig node from which we received the ogm packet 799 * @orig_neigh_node: neighboring orig node from which we received the ogm packet
796 * (can be equal to orig_node) 800 * (can be equal to orig_node)
@@ -830,7 +834,7 @@ batadv_nc_find_nc_node(struct batadv_orig_node *orig_node,
830} 834}
831 835
832/** 836/**
833 * batadv_nc_get_nc_node - retrieves an nc node or creates the entry if it was 837 * batadv_nc_get_nc_node() - retrieves an nc node or creates the entry if it was
834 * not found 838 * not found
835 * @bat_priv: the bat priv with all the soft interface information 839 * @bat_priv: the bat priv with all the soft interface information
836 * @orig_node: orig node originating the ogm packet 840 * @orig_node: orig node originating the ogm packet
@@ -890,7 +894,7 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
890} 894}
891 895
892/** 896/**
893 * batadv_nc_update_nc_node - updates stored incoming and outgoing nc node 897 * batadv_nc_update_nc_node() - updates stored incoming and outgoing nc node
894 * structs (best called on incoming OGMs) 898 * structs (best called on incoming OGMs)
895 * @bat_priv: the bat priv with all the soft interface information 899 * @bat_priv: the bat priv with all the soft interface information
896 * @orig_node: orig node originating the ogm packet 900 * @orig_node: orig node originating the ogm packet
@@ -945,7 +949,7 @@ out:
945} 949}
946 950
947/** 951/**
948 * batadv_nc_get_path - get existing nc_path or allocate a new one 952 * batadv_nc_get_path() - get existing nc_path or allocate a new one
949 * @bat_priv: the bat priv with all the soft interface information 953 * @bat_priv: the bat priv with all the soft interface information
950 * @hash: hash table containing the nc path 954 * @hash: hash table containing the nc path
951 * @src: ethernet source address - first half of the nc path search key 955 * @src: ethernet source address - first half of the nc path search key
@@ -1006,7 +1010,7 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
1006} 1010}
1007 1011
1008/** 1012/**
1009 * batadv_nc_random_weight_tq - scale the receivers TQ-value to avoid unfair 1013 * batadv_nc_random_weight_tq() - scale the receivers TQ-value to avoid unfair
1010 * selection of a receiver with slightly lower TQ than the other 1014 * selection of a receiver with slightly lower TQ than the other
1011 * @tq: to be weighted tq value 1015 * @tq: to be weighted tq value
1012 * 1016 *
@@ -1029,7 +1033,7 @@ static u8 batadv_nc_random_weight_tq(u8 tq)
1029} 1033}
1030 1034
1031/** 1035/**
1032 * batadv_nc_memxor - XOR destination with source 1036 * batadv_nc_memxor() - XOR destination with source
1033 * @dst: byte array to XOR into 1037 * @dst: byte array to XOR into
1034 * @src: byte array to XOR from 1038 * @src: byte array to XOR from
1035 * @len: length of destination array 1039 * @len: length of destination array
@@ -1043,7 +1047,7 @@ static void batadv_nc_memxor(char *dst, const char *src, unsigned int len)
1043} 1047}
1044 1048
1045/** 1049/**
1046 * batadv_nc_code_packets - code a received unicast_packet with an nc packet 1050 * batadv_nc_code_packets() - code a received unicast_packet with an nc packet
1047 * into a coded_packet and send it 1051 * into a coded_packet and send it
1048 * @bat_priv: the bat priv with all the soft interface information 1052 * @bat_priv: the bat priv with all the soft interface information
1049 * @skb: data skb to forward 1053 * @skb: data skb to forward
@@ -1236,7 +1240,7 @@ out:
1236} 1240}
1237 1241
1238/** 1242/**
1239 * batadv_nc_skb_coding_possible - true if a decoded skb is available at dst. 1243 * batadv_nc_skb_coding_possible() - true if a decoded skb is available at dst.
1240 * @skb: data skb to forward 1244 * @skb: data skb to forward
1241 * @dst: destination mac address of the other skb to code with 1245 * @dst: destination mac address of the other skb to code with
1242 * @src: source mac address of skb 1246 * @src: source mac address of skb
@@ -1260,7 +1264,7 @@ static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, u8 *dst, u8 *src)
1260} 1264}
1261 1265
1262/** 1266/**
1263 * batadv_nc_path_search - Find the coding path matching in_nc_node and 1267 * batadv_nc_path_search() - Find the coding path matching in_nc_node and
1264 * out_nc_node to retrieve a buffered packet that can be used for coding. 1268 * out_nc_node to retrieve a buffered packet that can be used for coding.
1265 * @bat_priv: the bat priv with all the soft interface information 1269 * @bat_priv: the bat priv with all the soft interface information
1266 * @in_nc_node: pointer to skb next hop's neighbor nc node 1270 * @in_nc_node: pointer to skb next hop's neighbor nc node
@@ -1328,8 +1332,8 @@ batadv_nc_path_search(struct batadv_priv *bat_priv,
1328} 1332}
1329 1333
1330/** 1334/**
1331 * batadv_nc_skb_src_search - Loops through the list of neighoring nodes of the 1335 * batadv_nc_skb_src_search() - Loops through the list of neighoring nodes of
1332 * skb's sender (may be equal to the originator). 1336 * the skb's sender (may be equal to the originator).
1333 * @bat_priv: the bat priv with all the soft interface information 1337 * @bat_priv: the bat priv with all the soft interface information
1334 * @skb: data skb to forward 1338 * @skb: data skb to forward
1335 * @eth_dst: next hop mac address of skb 1339 * @eth_dst: next hop mac address of skb
@@ -1374,7 +1378,7 @@ batadv_nc_skb_src_search(struct batadv_priv *bat_priv,
1374} 1378}
1375 1379
1376/** 1380/**
1377 * batadv_nc_skb_store_before_coding - set the ethernet src and dst of the 1381 * batadv_nc_skb_store_before_coding() - set the ethernet src and dst of the
1378 * unicast skb before it is stored for use in later decoding 1382 * unicast skb before it is stored for use in later decoding
1379 * @bat_priv: the bat priv with all the soft interface information 1383 * @bat_priv: the bat priv with all the soft interface information
1380 * @skb: data skb to store 1384 * @skb: data skb to store
@@ -1409,7 +1413,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
1409} 1413}
1410 1414
1411/** 1415/**
1412 * batadv_nc_skb_dst_search - Loops through list of neighboring nodes to dst. 1416 * batadv_nc_skb_dst_search() - Loops through list of neighboring nodes to dst.
1413 * @skb: data skb to forward 1417 * @skb: data skb to forward
1414 * @neigh_node: next hop to forward packet to 1418 * @neigh_node: next hop to forward packet to
1415 * @ethhdr: pointer to the ethernet header inside the skb 1419 * @ethhdr: pointer to the ethernet header inside the skb
@@ -1467,7 +1471,7 @@ static bool batadv_nc_skb_dst_search(struct sk_buff *skb,
1467} 1471}
1468 1472
1469/** 1473/**
1470 * batadv_nc_skb_add_to_path - buffer skb for later encoding / decoding 1474 * batadv_nc_skb_add_to_path() - buffer skb for later encoding / decoding
1471 * @skb: skb to add to path 1475 * @skb: skb to add to path
1472 * @nc_path: path to add skb to 1476 * @nc_path: path to add skb to
1473 * @neigh_node: next hop to forward packet to 1477 * @neigh_node: next hop to forward packet to
@@ -1502,7 +1506,7 @@ static bool batadv_nc_skb_add_to_path(struct sk_buff *skb,
1502} 1506}
1503 1507
1504/** 1508/**
1505 * batadv_nc_skb_forward - try to code a packet or add it to the coding packet 1509 * batadv_nc_skb_forward() - try to code a packet or add it to the coding packet
1506 * buffer 1510 * buffer
1507 * @skb: data skb to forward 1511 * @skb: data skb to forward
1508 * @neigh_node: next hop to forward packet to 1512 * @neigh_node: next hop to forward packet to
@@ -1559,8 +1563,8 @@ out:
1559} 1563}
1560 1564
1561/** 1565/**
1562 * batadv_nc_skb_store_for_decoding - save a clone of the skb which can be used 1566 * batadv_nc_skb_store_for_decoding() - save a clone of the skb which can be
1563 * when decoding coded packets 1567 * used when decoding coded packets
1564 * @bat_priv: the bat priv with all the soft interface information 1568 * @bat_priv: the bat priv with all the soft interface information
1565 * @skb: data skb to store 1569 * @skb: data skb to store
1566 */ 1570 */
@@ -1620,7 +1624,7 @@ out:
1620} 1624}
1621 1625
1622/** 1626/**
1623 * batadv_nc_skb_store_sniffed_unicast - check if a received unicast packet 1627 * batadv_nc_skb_store_sniffed_unicast() - check if a received unicast packet
1624 * should be saved in the decoding buffer and, if so, store it there 1628 * should be saved in the decoding buffer and, if so, store it there
1625 * @bat_priv: the bat priv with all the soft interface information 1629 * @bat_priv: the bat priv with all the soft interface information
1626 * @skb: unicast skb to store 1630 * @skb: unicast skb to store
@@ -1640,7 +1644,7 @@ void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
1640} 1644}
1641 1645
1642/** 1646/**
1643 * batadv_nc_skb_decode_packet - decode given skb using the decode data stored 1647 * batadv_nc_skb_decode_packet() - decode given skb using the decode data stored
1644 * in nc_packet 1648 * in nc_packet
1645 * @bat_priv: the bat priv with all the soft interface information 1649 * @bat_priv: the bat priv with all the soft interface information
1646 * @skb: unicast skb to decode 1650 * @skb: unicast skb to decode
@@ -1734,7 +1738,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
1734} 1738}
1735 1739
1736/** 1740/**
1737 * batadv_nc_find_decoding_packet - search through buffered decoding data to 1741 * batadv_nc_find_decoding_packet() - search through buffered decoding data to
1738 * find the data needed to decode the coded packet 1742 * find the data needed to decode the coded packet
1739 * @bat_priv: the bat priv with all the soft interface information 1743 * @bat_priv: the bat priv with all the soft interface information
1740 * @ethhdr: pointer to the ethernet header inside the coded packet 1744 * @ethhdr: pointer to the ethernet header inside the coded packet
@@ -1799,7 +1803,7 @@ batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv,
1799} 1803}
1800 1804
1801/** 1805/**
1802 * batadv_nc_recv_coded_packet - try to decode coded packet and enqueue the 1806 * batadv_nc_recv_coded_packet() - try to decode coded packet and enqueue the
1803 * resulting unicast packet 1807 * resulting unicast packet
1804 * @skb: incoming coded packet 1808 * @skb: incoming coded packet
1805 * @recv_if: pointer to interface this packet was received on 1809 * @recv_if: pointer to interface this packet was received on
@@ -1874,7 +1878,7 @@ free_skb:
1874} 1878}
1875 1879
1876/** 1880/**
1877 * batadv_nc_mesh_free - clean up network coding memory 1881 * batadv_nc_mesh_free() - clean up network coding memory
1878 * @bat_priv: the bat priv with all the soft interface information 1882 * @bat_priv: the bat priv with all the soft interface information
1879 */ 1883 */
1880void batadv_nc_mesh_free(struct batadv_priv *bat_priv) 1884void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
@@ -1891,7 +1895,7 @@ void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
1891 1895
1892#ifdef CONFIG_BATMAN_ADV_DEBUGFS 1896#ifdef CONFIG_BATMAN_ADV_DEBUGFS
1893/** 1897/**
1894 * batadv_nc_nodes_seq_print_text - print the nc node information 1898 * batadv_nc_nodes_seq_print_text() - print the nc node information
1895 * @seq: seq file to print on 1899 * @seq: seq file to print on
1896 * @offset: not used 1900 * @offset: not used
1897 * 1901 *
@@ -1954,7 +1958,7 @@ out:
1954} 1958}
1955 1959
1956/** 1960/**
1957 * batadv_nc_init_debugfs - create nc folder and related files in debugfs 1961 * batadv_nc_init_debugfs() - create nc folder and related files in debugfs
1958 * @bat_priv: the bat priv with all the soft interface information 1962 * @bat_priv: the bat priv with all the soft interface information
1959 * 1963 *
1960 * Return: 0 on success or negative error number in case of failure 1964 * Return: 0 on success or negative error number in case of failure
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index c66efb81d2f4..adaeafa4f71e 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Martin Hundebøll, Jeppe Ledet-Pedersen 4 * Martin Hundebøll, Jeppe Ledet-Pedersen
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 2967b86c13da..58a7d9274435 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
@@ -21,7 +22,7 @@
21#include <linux/atomic.h> 22#include <linux/atomic.h>
22#include <linux/errno.h> 23#include <linux/errno.h>
23#include <linux/etherdevice.h> 24#include <linux/etherdevice.h>
24#include <linux/fs.h> 25#include <linux/gfp.h>
25#include <linux/jiffies.h> 26#include <linux/jiffies.h>
26#include <linux/kernel.h> 27#include <linux/kernel.h>
27#include <linux/kref.h> 28#include <linux/kref.h>
@@ -30,10 +31,12 @@
30#include <linux/netdevice.h> 31#include <linux/netdevice.h>
31#include <linux/netlink.h> 32#include <linux/netlink.h>
32#include <linux/rculist.h> 33#include <linux/rculist.h>
34#include <linux/rcupdate.h>
33#include <linux/seq_file.h> 35#include <linux/seq_file.h>
34#include <linux/skbuff.h> 36#include <linux/skbuff.h>
35#include <linux/slab.h> 37#include <linux/slab.h>
36#include <linux/spinlock.h> 38#include <linux/spinlock.h>
39#include <linux/stddef.h>
37#include <linux/workqueue.h> 40#include <linux/workqueue.h>
38#include <net/sock.h> 41#include <net/sock.h>
39#include <uapi/linux/batman_adv.h> 42#include <uapi/linux/batman_adv.h>
@@ -55,10 +58,47 @@
55/* hash class keys */ 58/* hash class keys */
56static struct lock_class_key batadv_orig_hash_lock_class_key; 59static struct lock_class_key batadv_orig_hash_lock_class_key;
57 60
61/**
62 * batadv_orig_hash_find() - Find and return originator from orig_hash
63 * @bat_priv: the bat priv with all the soft interface information
64 * @data: mac address of the originator
65 *
66 * Return: orig_node (with increased refcnt), NULL on errors
67 */
68struct batadv_orig_node *
69batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data)
70{
71 struct batadv_hashtable *hash = bat_priv->orig_hash;
72 struct hlist_head *head;
73 struct batadv_orig_node *orig_node, *orig_node_tmp = NULL;
74 int index;
75
76 if (!hash)
77 return NULL;
78
79 index = batadv_choose_orig(data, hash->size);
80 head = &hash->table[index];
81
82 rcu_read_lock();
83 hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
84 if (!batadv_compare_eth(orig_node, data))
85 continue;
86
87 if (!kref_get_unless_zero(&orig_node->refcount))
88 continue;
89
90 orig_node_tmp = orig_node;
91 break;
92 }
93 rcu_read_unlock();
94
95 return orig_node_tmp;
96}
97
58static void batadv_purge_orig(struct work_struct *work); 98static void batadv_purge_orig(struct work_struct *work);
59 99
60/** 100/**
61 * batadv_compare_orig - comparing function used in the originator hash table 101 * batadv_compare_orig() - comparing function used in the originator hash table
62 * @node: node in the local table 102 * @node: node in the local table
63 * @data2: second object to compare the node to 103 * @data2: second object to compare the node to
64 * 104 *
@@ -73,7 +113,7 @@ bool batadv_compare_orig(const struct hlist_node *node, const void *data2)
73} 113}
74 114
75/** 115/**
76 * batadv_orig_node_vlan_get - get an orig_node_vlan object 116 * batadv_orig_node_vlan_get() - get an orig_node_vlan object
77 * @orig_node: the originator serving the VLAN 117 * @orig_node: the originator serving the VLAN
78 * @vid: the VLAN identifier 118 * @vid: the VLAN identifier
79 * 119 *
@@ -104,7 +144,7 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
104} 144}
105 145
106/** 146/**
107 * batadv_orig_node_vlan_new - search and possibly create an orig_node_vlan 147 * batadv_orig_node_vlan_new() - search and possibly create an orig_node_vlan
108 * object 148 * object
109 * @orig_node: the originator serving the VLAN 149 * @orig_node: the originator serving the VLAN
110 * @vid: the VLAN identifier 150 * @vid: the VLAN identifier
@@ -145,7 +185,7 @@ out:
145} 185}
146 186
147/** 187/**
148 * batadv_orig_node_vlan_release - release originator-vlan object from lists 188 * batadv_orig_node_vlan_release() - release originator-vlan object from lists
149 * and queue for free after rcu grace period 189 * and queue for free after rcu grace period
150 * @ref: kref pointer of the originator-vlan object 190 * @ref: kref pointer of the originator-vlan object
151 */ 191 */
@@ -159,7 +199,7 @@ static void batadv_orig_node_vlan_release(struct kref *ref)
159} 199}
160 200
161/** 201/**
162 * batadv_orig_node_vlan_put - decrement the refcounter and possibly release 202 * batadv_orig_node_vlan_put() - decrement the refcounter and possibly release
163 * the originator-vlan object 203 * the originator-vlan object
164 * @orig_vlan: the originator-vlan object to release 204 * @orig_vlan: the originator-vlan object to release
165 */ 205 */
@@ -168,6 +208,12 @@ void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan)
168 kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release); 208 kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release);
169} 209}
170 210
211/**
212 * batadv_originator_init() - Initialize all originator structures
213 * @bat_priv: the bat priv with all the soft interface information
214 *
215 * Return: 0 on success or negative error number in case of failure
216 */
171int batadv_originator_init(struct batadv_priv *bat_priv) 217int batadv_originator_init(struct batadv_priv *bat_priv)
172{ 218{
173 if (bat_priv->orig_hash) 219 if (bat_priv->orig_hash)
@@ -193,7 +239,7 @@ err:
193} 239}
194 240
195/** 241/**
196 * batadv_neigh_ifinfo_release - release neigh_ifinfo from lists and queue for 242 * batadv_neigh_ifinfo_release() - release neigh_ifinfo from lists and queue for
197 * free after rcu grace period 243 * free after rcu grace period
198 * @ref: kref pointer of the neigh_ifinfo 244 * @ref: kref pointer of the neigh_ifinfo
199 */ 245 */
@@ -210,7 +256,7 @@ static void batadv_neigh_ifinfo_release(struct kref *ref)
210} 256}
211 257
212/** 258/**
213 * batadv_neigh_ifinfo_put - decrement the refcounter and possibly release 259 * batadv_neigh_ifinfo_put() - decrement the refcounter and possibly release
214 * the neigh_ifinfo 260 * the neigh_ifinfo
215 * @neigh_ifinfo: the neigh_ifinfo object to release 261 * @neigh_ifinfo: the neigh_ifinfo object to release
216 */ 262 */
@@ -220,7 +266,7 @@ void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo)
220} 266}
221 267
222/** 268/**
223 * batadv_hardif_neigh_release - release hardif neigh node from lists and 269 * batadv_hardif_neigh_release() - release hardif neigh node from lists and
224 * queue for free after rcu grace period 270 * queue for free after rcu grace period
225 * @ref: kref pointer of the neigh_node 271 * @ref: kref pointer of the neigh_node
226 */ 272 */
@@ -240,7 +286,7 @@ static void batadv_hardif_neigh_release(struct kref *ref)
240} 286}
241 287
242/** 288/**
243 * batadv_hardif_neigh_put - decrement the hardif neighbors refcounter 289 * batadv_hardif_neigh_put() - decrement the hardif neighbors refcounter
244 * and possibly release it 290 * and possibly release it
245 * @hardif_neigh: hardif neigh neighbor to free 291 * @hardif_neigh: hardif neigh neighbor to free
246 */ 292 */
@@ -250,7 +296,7 @@ void batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh)
250} 296}
251 297
252/** 298/**
253 * batadv_neigh_node_release - release neigh_node from lists and queue for 299 * batadv_neigh_node_release() - release neigh_node from lists and queue for
254 * free after rcu grace period 300 * free after rcu grace period
255 * @ref: kref pointer of the neigh_node 301 * @ref: kref pointer of the neigh_node
256 */ 302 */
@@ -275,7 +321,7 @@ static void batadv_neigh_node_release(struct kref *ref)
275} 321}
276 322
277/** 323/**
278 * batadv_neigh_node_put - decrement the neighbors refcounter and possibly 324 * batadv_neigh_node_put() - decrement the neighbors refcounter and possibly
279 * release it 325 * release it
280 * @neigh_node: neigh neighbor to free 326 * @neigh_node: neigh neighbor to free
281 */ 327 */
@@ -285,7 +331,7 @@ void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node)
285} 331}
286 332
287/** 333/**
288 * batadv_orig_router_get - router to the originator depending on iface 334 * batadv_orig_router_get() - router to the originator depending on iface
289 * @orig_node: the orig node for the router 335 * @orig_node: the orig node for the router
290 * @if_outgoing: the interface where the payload packet has been received or 336 * @if_outgoing: the interface where the payload packet has been received or
291 * the OGM should be sent to 337 * the OGM should be sent to
@@ -318,7 +364,7 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node,
318} 364}
319 365
320/** 366/**
321 * batadv_orig_ifinfo_get - find the ifinfo from an orig_node 367 * batadv_orig_ifinfo_get() - find the ifinfo from an orig_node
322 * @orig_node: the orig node to be queried 368 * @orig_node: the orig node to be queried
323 * @if_outgoing: the interface for which the ifinfo should be acquired 369 * @if_outgoing: the interface for which the ifinfo should be acquired
324 * 370 *
@@ -350,7 +396,7 @@ batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node,
350} 396}
351 397
352/** 398/**
353 * batadv_orig_ifinfo_new - search and possibly create an orig_ifinfo object 399 * batadv_orig_ifinfo_new() - search and possibly create an orig_ifinfo object
354 * @orig_node: the orig node to be queried 400 * @orig_node: the orig node to be queried
355 * @if_outgoing: the interface for which the ifinfo should be acquired 401 * @if_outgoing: the interface for which the ifinfo should be acquired
356 * 402 *
@@ -396,7 +442,7 @@ out:
396} 442}
397 443
398/** 444/**
399 * batadv_neigh_ifinfo_get - find the ifinfo from an neigh_node 445 * batadv_neigh_ifinfo_get() - find the ifinfo from an neigh_node
400 * @neigh: the neigh node to be queried 446 * @neigh: the neigh node to be queried
401 * @if_outgoing: the interface for which the ifinfo should be acquired 447 * @if_outgoing: the interface for which the ifinfo should be acquired
402 * 448 *
@@ -429,7 +475,7 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
429} 475}
430 476
431/** 477/**
432 * batadv_neigh_ifinfo_new - search and possibly create an neigh_ifinfo object 478 * batadv_neigh_ifinfo_new() - search and possibly create an neigh_ifinfo object
433 * @neigh: the neigh node to be queried 479 * @neigh: the neigh node to be queried
434 * @if_outgoing: the interface for which the ifinfo should be acquired 480 * @if_outgoing: the interface for which the ifinfo should be acquired
435 * 481 *
@@ -472,7 +518,7 @@ out:
472} 518}
473 519
474/** 520/**
475 * batadv_neigh_node_get - retrieve a neighbour from the list 521 * batadv_neigh_node_get() - retrieve a neighbour from the list
476 * @orig_node: originator which the neighbour belongs to 522 * @orig_node: originator which the neighbour belongs to
477 * @hard_iface: the interface where this neighbour is connected to 523 * @hard_iface: the interface where this neighbour is connected to
478 * @addr: the address of the neighbour 524 * @addr: the address of the neighbour
@@ -509,7 +555,7 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
509} 555}
510 556
511/** 557/**
512 * batadv_hardif_neigh_create - create a hardif neighbour node 558 * batadv_hardif_neigh_create() - create a hardif neighbour node
513 * @hard_iface: the interface this neighbour is connected to 559 * @hard_iface: the interface this neighbour is connected to
514 * @neigh_addr: the interface address of the neighbour to retrieve 560 * @neigh_addr: the interface address of the neighbour to retrieve
515 * @orig_node: originator object representing the neighbour 561 * @orig_node: originator object representing the neighbour
@@ -555,7 +601,7 @@ out:
555} 601}
556 602
557/** 603/**
558 * batadv_hardif_neigh_get_or_create - retrieve or create a hardif neighbour 604 * batadv_hardif_neigh_get_or_create() - retrieve or create a hardif neighbour
559 * node 605 * node
560 * @hard_iface: the interface this neighbour is connected to 606 * @hard_iface: the interface this neighbour is connected to
561 * @neigh_addr: the interface address of the neighbour to retrieve 607 * @neigh_addr: the interface address of the neighbour to retrieve
@@ -579,7 +625,7 @@ batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface,
579} 625}
580 626
581/** 627/**
582 * batadv_hardif_neigh_get - retrieve a hardif neighbour from the list 628 * batadv_hardif_neigh_get() - retrieve a hardif neighbour from the list
583 * @hard_iface: the interface where this neighbour is connected to 629 * @hard_iface: the interface where this neighbour is connected to
584 * @neigh_addr: the address of the neighbour 630 * @neigh_addr: the address of the neighbour
585 * 631 *
@@ -611,7 +657,7 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
611} 657}
612 658
613/** 659/**
614 * batadv_neigh_node_create - create a neigh node object 660 * batadv_neigh_node_create() - create a neigh node object
615 * @orig_node: originator object representing the neighbour 661 * @orig_node: originator object representing the neighbour
616 * @hard_iface: the interface where the neighbour is connected to 662 * @hard_iface: the interface where the neighbour is connected to
617 * @neigh_addr: the mac address of the neighbour interface 663 * @neigh_addr: the mac address of the neighbour interface
@@ -676,7 +722,7 @@ out:
676} 722}
677 723
678/** 724/**
679 * batadv_neigh_node_get_or_create - retrieve or create a neigh node object 725 * batadv_neigh_node_get_or_create() - retrieve or create a neigh node object
680 * @orig_node: originator object representing the neighbour 726 * @orig_node: originator object representing the neighbour
681 * @hard_iface: the interface where the neighbour is connected to 727 * @hard_iface: the interface where the neighbour is connected to
682 * @neigh_addr: the mac address of the neighbour interface 728 * @neigh_addr: the mac address of the neighbour interface
@@ -700,7 +746,7 @@ batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node,
700 746
701#ifdef CONFIG_BATMAN_ADV_DEBUGFS 747#ifdef CONFIG_BATMAN_ADV_DEBUGFS
702/** 748/**
703 * batadv_hardif_neigh_seq_print_text - print the single hop neighbour list 749 * batadv_hardif_neigh_seq_print_text() - print the single hop neighbour list
704 * @seq: neighbour table seq_file struct 750 * @seq: neighbour table seq_file struct
705 * @offset: not used 751 * @offset: not used
706 * 752 *
@@ -735,8 +781,8 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
735#endif 781#endif
736 782
737/** 783/**
738 * batadv_hardif_neigh_dump - Dump to netlink the neighbor infos for a specific 784 * batadv_hardif_neigh_dump() - Dump to netlink the neighbor infos for a
739 * outgoing interface 785 * specific outgoing interface
740 * @msg: message to dump into 786 * @msg: message to dump into
741 * @cb: parameters for the dump 787 * @cb: parameters for the dump
742 * 788 *
@@ -812,7 +858,7 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb)
812} 858}
813 859
814/** 860/**
815 * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for 861 * batadv_orig_ifinfo_release() - release orig_ifinfo from lists and queue for
816 * free after rcu grace period 862 * free after rcu grace period
817 * @ref: kref pointer of the orig_ifinfo 863 * @ref: kref pointer of the orig_ifinfo
818 */ 864 */
@@ -835,7 +881,7 @@ static void batadv_orig_ifinfo_release(struct kref *ref)
835} 881}
836 882
837/** 883/**
838 * batadv_orig_ifinfo_put - decrement the refcounter and possibly release 884 * batadv_orig_ifinfo_put() - decrement the refcounter and possibly release
839 * the orig_ifinfo 885 * the orig_ifinfo
840 * @orig_ifinfo: the orig_ifinfo object to release 886 * @orig_ifinfo: the orig_ifinfo object to release
841 */ 887 */
@@ -845,7 +891,7 @@ void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo)
845} 891}
846 892
847/** 893/**
848 * batadv_orig_node_free_rcu - free the orig_node 894 * batadv_orig_node_free_rcu() - free the orig_node
849 * @rcu: rcu pointer of the orig_node 895 * @rcu: rcu pointer of the orig_node
850 */ 896 */
851static void batadv_orig_node_free_rcu(struct rcu_head *rcu) 897static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
@@ -866,7 +912,7 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
866} 912}
867 913
868/** 914/**
869 * batadv_orig_node_release - release orig_node from lists and queue for 915 * batadv_orig_node_release() - release orig_node from lists and queue for
870 * free after rcu grace period 916 * free after rcu grace period
871 * @ref: kref pointer of the orig_node 917 * @ref: kref pointer of the orig_node
872 */ 918 */
@@ -917,7 +963,7 @@ static void batadv_orig_node_release(struct kref *ref)
917} 963}
918 964
919/** 965/**
920 * batadv_orig_node_put - decrement the orig node refcounter and possibly 966 * batadv_orig_node_put() - decrement the orig node refcounter and possibly
921 * release it 967 * release it
922 * @orig_node: the orig node to free 968 * @orig_node: the orig node to free
923 */ 969 */
@@ -926,6 +972,10 @@ void batadv_orig_node_put(struct batadv_orig_node *orig_node)
926 kref_put(&orig_node->refcount, batadv_orig_node_release); 972 kref_put(&orig_node->refcount, batadv_orig_node_release);
927} 973}
928 974
975/**
976 * batadv_originator_free() - Free all originator structures
977 * @bat_priv: the bat priv with all the soft interface information
978 */
929void batadv_originator_free(struct batadv_priv *bat_priv) 979void batadv_originator_free(struct batadv_priv *bat_priv)
930{ 980{
931 struct batadv_hashtable *hash = bat_priv->orig_hash; 981 struct batadv_hashtable *hash = bat_priv->orig_hash;
@@ -959,7 +1009,7 @@ void batadv_originator_free(struct batadv_priv *bat_priv)
959} 1009}
960 1010
961/** 1011/**
962 * batadv_orig_node_new - creates a new orig_node 1012 * batadv_orig_node_new() - creates a new orig_node
963 * @bat_priv: the bat priv with all the soft interface information 1013 * @bat_priv: the bat priv with all the soft interface information
964 * @addr: the mac address of the originator 1014 * @addr: the mac address of the originator
965 * 1015 *
@@ -1038,7 +1088,7 @@ free_orig_node:
1038} 1088}
1039 1089
1040/** 1090/**
1041 * batadv_purge_neigh_ifinfo - purge obsolete ifinfo entries from neighbor 1091 * batadv_purge_neigh_ifinfo() - purge obsolete ifinfo entries from neighbor
1042 * @bat_priv: the bat priv with all the soft interface information 1092 * @bat_priv: the bat priv with all the soft interface information
1043 * @neigh: orig node which is to be checked 1093 * @neigh: orig node which is to be checked
1044 */ 1094 */
@@ -1079,7 +1129,7 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv,
1079} 1129}
1080 1130
1081/** 1131/**
1082 * batadv_purge_orig_ifinfo - purge obsolete ifinfo entries from originator 1132 * batadv_purge_orig_ifinfo() - purge obsolete ifinfo entries from originator
1083 * @bat_priv: the bat priv with all the soft interface information 1133 * @bat_priv: the bat priv with all the soft interface information
1084 * @orig_node: orig node which is to be checked 1134 * @orig_node: orig node which is to be checked
1085 * 1135 *
@@ -1131,7 +1181,7 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
1131} 1181}
1132 1182
1133/** 1183/**
1134 * batadv_purge_orig_neighbors - purges neighbors from originator 1184 * batadv_purge_orig_neighbors() - purges neighbors from originator
1135 * @bat_priv: the bat priv with all the soft interface information 1185 * @bat_priv: the bat priv with all the soft interface information
1136 * @orig_node: orig node which is to be checked 1186 * @orig_node: orig node which is to be checked
1137 * 1187 *
@@ -1189,7 +1239,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
1189} 1239}
1190 1240
1191/** 1241/**
1192 * batadv_find_best_neighbor - finds the best neighbor after purging 1242 * batadv_find_best_neighbor() - finds the best neighbor after purging
1193 * @bat_priv: the bat priv with all the soft interface information 1243 * @bat_priv: the bat priv with all the soft interface information
1194 * @orig_node: orig node which is to be checked 1244 * @orig_node: orig node which is to be checked
1195 * @if_outgoing: the interface for which the metric should be compared 1245 * @if_outgoing: the interface for which the metric should be compared
@@ -1224,7 +1274,7 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv,
1224} 1274}
1225 1275
1226/** 1276/**
1227 * batadv_purge_orig_node - purges obsolete information from an orig_node 1277 * batadv_purge_orig_node() - purges obsolete information from an orig_node
1228 * @bat_priv: the bat priv with all the soft interface information 1278 * @bat_priv: the bat priv with all the soft interface information
1229 * @orig_node: orig node which is to be checked 1279 * @orig_node: orig node which is to be checked
1230 * 1280 *
@@ -1341,12 +1391,24 @@ static void batadv_purge_orig(struct work_struct *work)
1341 msecs_to_jiffies(BATADV_ORIG_WORK_PERIOD)); 1391 msecs_to_jiffies(BATADV_ORIG_WORK_PERIOD));
1342} 1392}
1343 1393
1394/**
1395 * batadv_purge_orig_ref() - Purge all outdated originators
1396 * @bat_priv: the bat priv with all the soft interface information
1397 */
1344void batadv_purge_orig_ref(struct batadv_priv *bat_priv) 1398void batadv_purge_orig_ref(struct batadv_priv *bat_priv)
1345{ 1399{
1346 _batadv_purge_orig(bat_priv); 1400 _batadv_purge_orig(bat_priv);
1347} 1401}
1348 1402
1349#ifdef CONFIG_BATMAN_ADV_DEBUGFS 1403#ifdef CONFIG_BATMAN_ADV_DEBUGFS
1404
1405/**
1406 * batadv_orig_seq_print_text() - Print the originator table in a seq file
1407 * @seq: seq file to print on
1408 * @offset: not used
1409 *
1410 * Return: always 0
1411 */
1350int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) 1412int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
1351{ 1413{
1352 struct net_device *net_dev = (struct net_device *)seq->private; 1414 struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1376,7 +1438,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
1376} 1438}
1377 1439
1378/** 1440/**
1379 * batadv_orig_hardif_seq_print_text - writes originator infos for a specific 1441 * batadv_orig_hardif_seq_print_text() - writes originator infos for a specific
1380 * outgoing interface 1442 * outgoing interface
1381 * @seq: debugfs table seq_file struct 1443 * @seq: debugfs table seq_file struct
1382 * @offset: not used 1444 * @offset: not used
@@ -1423,7 +1485,7 @@ out:
1423#endif 1485#endif
1424 1486
1425/** 1487/**
1426 * batadv_orig_dump - Dump to netlink the originator infos for a specific 1488 * batadv_orig_dump() - Dump to netlink the originator infos for a specific
1427 * outgoing interface 1489 * outgoing interface
1428 * @msg: message to dump into 1490 * @msg: message to dump into
1429 * @cb: parameters for the dump 1491 * @cb: parameters for the dump
@@ -1499,6 +1561,13 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
1499 return ret; 1561 return ret;
1500} 1562}
1501 1563
1564/**
1565 * batadv_orig_hash_add_if() - Add interface to originators in orig_hash
1566 * @hard_iface: hard interface to add (already slave of the soft interface)
1567 * @max_if_num: new number of interfaces
1568 *
1569 * Return: 0 on success or negative error number in case of failure
1570 */
1502int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, 1571int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
1503 int max_if_num) 1572 int max_if_num)
1504{ 1573{
@@ -1534,6 +1603,13 @@ err:
1534 return -ENOMEM; 1603 return -ENOMEM;
1535} 1604}
1536 1605
1606/**
1607 * batadv_orig_hash_del_if() - Remove interface from originators in orig_hash
1608 * @hard_iface: hard interface to remove (still slave of the soft interface)
1609 * @max_if_num: new number of interfaces
1610 *
1611 * Return: 0 on success or negative error number in case of failure
1612 */
1537int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, 1613int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
1538 int max_if_num) 1614 int max_if_num)
1539{ 1615{
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index d94220a6d21a..8e543a3cdc6c 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
@@ -23,14 +24,8 @@
23#include <linux/compiler.h> 24#include <linux/compiler.h>
24#include <linux/if_ether.h> 25#include <linux/if_ether.h>
25#include <linux/jhash.h> 26#include <linux/jhash.h>
26#include <linux/kref.h>
27#include <linux/rculist.h>
28#include <linux/rcupdate.h>
29#include <linux/stddef.h>
30#include <linux/types.h> 27#include <linux/types.h>
31 28
32#include "hash.h"
33
34struct netlink_callback; 29struct netlink_callback;
35struct seq_file; 30struct seq_file;
36struct sk_buff; 31struct sk_buff;
@@ -89,8 +84,13 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
89 unsigned short vid); 84 unsigned short vid);
90void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan); 85void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan);
91 86
92/* hashfunction to choose an entry in a hash table of given size 87/**
93 * hash algorithm from http://en.wikipedia.org/wiki/Hash_table 88 * batadv_choose_orig() - Return the index of the orig entry in the hash table
89 * @data: mac address of the originator node
90 * @size: the size of the hash table
91 *
92 * Return: the hash index where the object represented by @data should be
93 * stored at.
94 */ 94 */
95static inline u32 batadv_choose_orig(const void *data, u32 size) 95static inline u32 batadv_choose_orig(const void *data, u32 size)
96{ 96{
@@ -100,34 +100,7 @@ static inline u32 batadv_choose_orig(const void *data, u32 size)
100 return hash % size; 100 return hash % size;
101} 101}
102 102
103static inline struct batadv_orig_node * 103struct batadv_orig_node *
104batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data) 104batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data);
105{
106 struct batadv_hashtable *hash = bat_priv->orig_hash;
107 struct hlist_head *head;
108 struct batadv_orig_node *orig_node, *orig_node_tmp = NULL;
109 int index;
110
111 if (!hash)
112 return NULL;
113
114 index = batadv_choose_orig(data, hash->size);
115 head = &hash->table[index];
116
117 rcu_read_lock();
118 hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
119 if (!batadv_compare_eth(orig_node, data))
120 continue;
121
122 if (!kref_get_unless_zero(&orig_node->refcount))
123 continue;
124
125 orig_node_tmp = orig_node;
126 break;
127 }
128 rcu_read_unlock();
129
130 return orig_node_tmp;
131}
132 105
133#endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */ 106#endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
deleted file mode 100644
index 8e8a5db197cb..000000000000
--- a/net/batman-adv/packet.h
+++ /dev/null
@@ -1,621 +0,0 @@
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 *
3 * Marek Lindner, Simon Wunderlich
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef _NET_BATMAN_ADV_PACKET_H_
19#define _NET_BATMAN_ADV_PACKET_H_
20
21#include <asm/byteorder.h>
22#include <linux/types.h>
23
24#define batadv_tp_is_error(n) ((u8)(n) > 127 ? 1 : 0)
25
26/**
27 * enum batadv_packettype - types for batman-adv encapsulated packets
28 * @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV
29 * @BATADV_BCAST: broadcast packets carrying broadcast payload
30 * @BATADV_CODED: network coded packets
31 * @BATADV_ELP: echo location packets for B.A.T.M.A.N. V
32 * @BATADV_OGM2: originator messages for B.A.T.M.A.N. V
33 *
34 * @BATADV_UNICAST: unicast packets carrying unicast payload traffic
35 * @BATADV_UNICAST_FRAG: unicast packets carrying a fragment of the original
36 * payload packet
37 * @BATADV_UNICAST_4ADDR: unicast packet including the originator address of
38 * the sender
39 * @BATADV_ICMP: unicast packet like IP ICMP used for ping or traceroute
40 * @BATADV_UNICAST_TVLV: unicast packet carrying TVLV containers
41 */
42enum batadv_packettype {
43 /* 0x00 - 0x3f: local packets or special rules for handling */
44 BATADV_IV_OGM = 0x00,
45 BATADV_BCAST = 0x01,
46 BATADV_CODED = 0x02,
47 BATADV_ELP = 0x03,
48 BATADV_OGM2 = 0x04,
49 /* 0x40 - 0x7f: unicast */
50#define BATADV_UNICAST_MIN 0x40
51 BATADV_UNICAST = 0x40,
52 BATADV_UNICAST_FRAG = 0x41,
53 BATADV_UNICAST_4ADDR = 0x42,
54 BATADV_ICMP = 0x43,
55 BATADV_UNICAST_TVLV = 0x44,
56#define BATADV_UNICAST_MAX 0x7f
57 /* 0x80 - 0xff: reserved */
58};
59
60/**
61 * enum batadv_subtype - packet subtype for unicast4addr
62 * @BATADV_P_DATA: user payload
63 * @BATADV_P_DAT_DHT_GET: DHT request message
64 * @BATADV_P_DAT_DHT_PUT: DHT store message
65 * @BATADV_P_DAT_CACHE_REPLY: ARP reply generated by DAT
66 */
67enum batadv_subtype {
68 BATADV_P_DATA = 0x01,
69 BATADV_P_DAT_DHT_GET = 0x02,
70 BATADV_P_DAT_DHT_PUT = 0x03,
71 BATADV_P_DAT_CACHE_REPLY = 0x04,
72};
73
74/* this file is included by batctl which needs these defines */
75#define BATADV_COMPAT_VERSION 15
76
77/**
78 * enum batadv_iv_flags - flags used in B.A.T.M.A.N. IV OGM packets
79 * @BATADV_NOT_BEST_NEXT_HOP: flag is set when ogm packet is forwarded and was
80 * previously received from someone else than the best neighbor.
81 * @BATADV_PRIMARIES_FIRST_HOP: flag unused.
82 * @BATADV_DIRECTLINK: flag is for the first hop or if rebroadcasted from a
83 * one hop neighbor on the interface where it was originally received.
84 */
85enum batadv_iv_flags {
86 BATADV_NOT_BEST_NEXT_HOP = BIT(0),
87 BATADV_PRIMARIES_FIRST_HOP = BIT(1),
88 BATADV_DIRECTLINK = BIT(2),
89};
90
91/* ICMP message types */
92enum batadv_icmp_packettype {
93 BATADV_ECHO_REPLY = 0,
94 BATADV_DESTINATION_UNREACHABLE = 3,
95 BATADV_ECHO_REQUEST = 8,
96 BATADV_TTL_EXCEEDED = 11,
97 BATADV_PARAMETER_PROBLEM = 12,
98 BATADV_TP = 15,
99};
100
101/**
102 * enum batadv_mcast_flags - flags for multicast capabilities and settings
103 * @BATADV_MCAST_WANT_ALL_UNSNOOPABLES: we want all packets destined for
104 * 224.0.0.0/24 or ff02::1
105 * @BATADV_MCAST_WANT_ALL_IPV4: we want all IPv4 multicast packets
106 * @BATADV_MCAST_WANT_ALL_IPV6: we want all IPv6 multicast packets
107 */
108enum batadv_mcast_flags {
109 BATADV_MCAST_WANT_ALL_UNSNOOPABLES = BIT(0),
110 BATADV_MCAST_WANT_ALL_IPV4 = BIT(1),
111 BATADV_MCAST_WANT_ALL_IPV6 = BIT(2),
112};
113
114/* tt data subtypes */
115#define BATADV_TT_DATA_TYPE_MASK 0x0F
116
117/**
118 * enum batadv_tt_data_flags - flags for tt data tvlv
119 * @BATADV_TT_OGM_DIFF: TT diff propagated through OGM
120 * @BATADV_TT_REQUEST: TT request message
121 * @BATADV_TT_RESPONSE: TT response message
122 * @BATADV_TT_FULL_TABLE: contains full table to replace existing table
123 */
124enum batadv_tt_data_flags {
125 BATADV_TT_OGM_DIFF = BIT(0),
126 BATADV_TT_REQUEST = BIT(1),
127 BATADV_TT_RESPONSE = BIT(2),
128 BATADV_TT_FULL_TABLE = BIT(4),
129};
130
131/**
132 * enum batadv_vlan_flags - flags for the four MSB of any vlan ID field
133 * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not
134 */
135enum batadv_vlan_flags {
136 BATADV_VLAN_HAS_TAG = BIT(15),
137};
138
139/* claim frame types for the bridge loop avoidance */
140enum batadv_bla_claimframe {
141 BATADV_CLAIM_TYPE_CLAIM = 0x00,
142 BATADV_CLAIM_TYPE_UNCLAIM = 0x01,
143 BATADV_CLAIM_TYPE_ANNOUNCE = 0x02,
144 BATADV_CLAIM_TYPE_REQUEST = 0x03,
145 BATADV_CLAIM_TYPE_LOOPDETECT = 0x04,
146};
147
148/**
149 * enum batadv_tvlv_type - tvlv type definitions
150 * @BATADV_TVLV_GW: gateway tvlv
151 * @BATADV_TVLV_DAT: distributed arp table tvlv
152 * @BATADV_TVLV_NC: network coding tvlv
153 * @BATADV_TVLV_TT: translation table tvlv
154 * @BATADV_TVLV_ROAM: roaming advertisement tvlv
155 * @BATADV_TVLV_MCAST: multicast capability tvlv
156 */
157enum batadv_tvlv_type {
158 BATADV_TVLV_GW = 0x01,
159 BATADV_TVLV_DAT = 0x02,
160 BATADV_TVLV_NC = 0x03,
161 BATADV_TVLV_TT = 0x04,
162 BATADV_TVLV_ROAM = 0x05,
163 BATADV_TVLV_MCAST = 0x06,
164};
165
166#pragma pack(2)
167/* the destination hardware field in the ARP frame is used to
168 * transport the claim type and the group id
169 */
170struct batadv_bla_claim_dst {
171 u8 magic[3]; /* FF:43:05 */
172 u8 type; /* bla_claimframe */
173 __be16 group; /* group id */
174};
175
176#pragma pack()
177
178/**
179 * struct batadv_ogm_packet - ogm (routing protocol) packet
180 * @packet_type: batman-adv packet type, part of the general header
181 * @version: batman-adv protocol version, part of the genereal header
182 * @ttl: time to live for this packet, part of the genereal header
183 * @flags: contains routing relevant flags - see enum batadv_iv_flags
184 * @seqno: sequence identification
185 * @orig: address of the source node
186 * @prev_sender: address of the previous sender
187 * @reserved: reserved byte for alignment
188 * @tq: transmission quality
189 * @tvlv_len: length of tvlv data following the ogm header
190 */
191struct batadv_ogm_packet {
192 u8 packet_type;
193 u8 version;
194 u8 ttl;
195 u8 flags;
196 __be32 seqno;
197 u8 orig[ETH_ALEN];
198 u8 prev_sender[ETH_ALEN];
199 u8 reserved;
200 u8 tq;
201 __be16 tvlv_len;
202 /* __packed is not needed as the struct size is divisible by 4,
203 * and the largest data type in this struct has a size of 4.
204 */
205};
206
207#define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet)
208
209/**
210 * struct batadv_ogm2_packet - ogm2 (routing protocol) packet
211 * @packet_type: batman-adv packet type, part of the general header
212 * @version: batman-adv protocol version, part of the general header
213 * @ttl: time to live for this packet, part of the general header
214 * @flags: reseved for routing relevant flags - currently always 0
215 * @seqno: sequence number
216 * @orig: originator mac address
217 * @tvlv_len: length of the appended tvlv buffer (in bytes)
218 * @throughput: the currently flooded path throughput
219 */
220struct batadv_ogm2_packet {
221 u8 packet_type;
222 u8 version;
223 u8 ttl;
224 u8 flags;
225 __be32 seqno;
226 u8 orig[ETH_ALEN];
227 __be16 tvlv_len;
228 __be32 throughput;
229 /* __packed is not needed as the struct size is divisible by 4,
230 * and the largest data type in this struct has a size of 4.
231 */
232};
233
234#define BATADV_OGM2_HLEN sizeof(struct batadv_ogm2_packet)
235
236/**
237 * struct batadv_elp_packet - elp (neighbor discovery) packet
238 * @packet_type: batman-adv packet type, part of the general header
239 * @version: batman-adv protocol version, part of the genereal header
240 * @orig: originator mac address
241 * @seqno: sequence number
242 * @elp_interval: currently used ELP sending interval in ms
243 */
244struct batadv_elp_packet {
245 u8 packet_type;
246 u8 version;
247 u8 orig[ETH_ALEN];
248 __be32 seqno;
249 __be32 elp_interval;
250};
251
252#define BATADV_ELP_HLEN sizeof(struct batadv_elp_packet)
253
254/**
255 * struct batadv_icmp_header - common members among all the ICMP packets
256 * @packet_type: batman-adv packet type, part of the general header
257 * @version: batman-adv protocol version, part of the genereal header
258 * @ttl: time to live for this packet, part of the genereal header
259 * @msg_type: ICMP packet type
260 * @dst: address of the destination node
261 * @orig: address of the source node
262 * @uid: local ICMP socket identifier
263 * @align: not used - useful for alignment purposes only
264 *
265 * This structure is used for ICMP packets parsing only and it is never sent
266 * over the wire. The alignment field at the end is there to ensure that
267 * members are padded the same way as they are in real packets.
268 */
269struct batadv_icmp_header {
270 u8 packet_type;
271 u8 version;
272 u8 ttl;
273 u8 msg_type; /* see ICMP message types above */
274 u8 dst[ETH_ALEN];
275 u8 orig[ETH_ALEN];
276 u8 uid;
277 u8 align[3];
278};
279
280/**
281 * struct batadv_icmp_packet - ICMP packet
282 * @packet_type: batman-adv packet type, part of the general header
283 * @version: batman-adv protocol version, part of the genereal header
284 * @ttl: time to live for this packet, part of the genereal header
285 * @msg_type: ICMP packet type
286 * @dst: address of the destination node
287 * @orig: address of the source node
288 * @uid: local ICMP socket identifier
289 * @reserved: not used - useful for alignment
290 * @seqno: ICMP sequence number
291 */
292struct batadv_icmp_packet {
293 u8 packet_type;
294 u8 version;
295 u8 ttl;
296 u8 msg_type; /* see ICMP message types above */
297 u8 dst[ETH_ALEN];
298 u8 orig[ETH_ALEN];
299 u8 uid;
300 u8 reserved;
301 __be16 seqno;
302};
303
304/**
305 * struct batadv_icmp_tp_packet - ICMP TP Meter packet
306 * @packet_type: batman-adv packet type, part of the general header
307 * @version: batman-adv protocol version, part of the genereal header
308 * @ttl: time to live for this packet, part of the genereal header
309 * @msg_type: ICMP packet type
310 * @dst: address of the destination node
311 * @orig: address of the source node
312 * @uid: local ICMP socket identifier
313 * @subtype: TP packet subtype (see batadv_icmp_tp_subtype)
314 * @session: TP session identifier
315 * @seqno: the TP sequence number
316 * @timestamp: time when the packet has been sent. This value is filled in a
317 * TP_MSG and echoed back in the next TP_ACK so that the sender can compute the
318 * RTT. Since it is read only by the host which wrote it, there is no need to
319 * store it using network order
320 */
321struct batadv_icmp_tp_packet {
322 u8 packet_type;
323 u8 version;
324 u8 ttl;
325 u8 msg_type; /* see ICMP message types above */
326 u8 dst[ETH_ALEN];
327 u8 orig[ETH_ALEN];
328 u8 uid;
329 u8 subtype;
330 u8 session[2];
331 __be32 seqno;
332 __be32 timestamp;
333};
334
335/**
336 * enum batadv_icmp_tp_subtype - ICMP TP Meter packet subtypes
337 * @BATADV_TP_MSG: Msg from sender to receiver
338 * @BATADV_TP_ACK: acknowledgment from receiver to sender
339 */
340enum batadv_icmp_tp_subtype {
341 BATADV_TP_MSG = 0,
342 BATADV_TP_ACK,
343};
344
345#define BATADV_RR_LEN 16
346
347/**
348 * struct batadv_icmp_packet_rr - ICMP RouteRecord packet
349 * @packet_type: batman-adv packet type, part of the general header
350 * @version: batman-adv protocol version, part of the genereal header
351 * @ttl: time to live for this packet, part of the genereal header
352 * @msg_type: ICMP packet type
353 * @dst: address of the destination node
354 * @orig: address of the source node
355 * @uid: local ICMP socket identifier
356 * @rr_cur: number of entries the rr array
357 * @seqno: ICMP sequence number
358 * @rr: route record array
359 */
360struct batadv_icmp_packet_rr {
361 u8 packet_type;
362 u8 version;
363 u8 ttl;
364 u8 msg_type; /* see ICMP message types above */
365 u8 dst[ETH_ALEN];
366 u8 orig[ETH_ALEN];
367 u8 uid;
368 u8 rr_cur;
369 __be16 seqno;
370 u8 rr[BATADV_RR_LEN][ETH_ALEN];
371};
372
373#define BATADV_ICMP_MAX_PACKET_SIZE sizeof(struct batadv_icmp_packet_rr)
374
375/* All packet headers in front of an ethernet header have to be completely
376 * divisible by 2 but not by 4 to make the payload after the ethernet
377 * header again 4 bytes boundary aligned.
378 *
379 * A packing of 2 is necessary to avoid extra padding at the end of the struct
380 * caused by a structure member which is larger than two bytes. Otherwise
381 * the structure would not fulfill the previously mentioned rule to avoid the
382 * misalignment of the payload after the ethernet header. It may also lead to
383 * leakage of information when the padding it not initialized before sending.
384 */
385#pragma pack(2)
386
387/**
388 * struct batadv_unicast_packet - unicast packet for network payload
389 * @packet_type: batman-adv packet type, part of the general header
390 * @version: batman-adv protocol version, part of the genereal header
391 * @ttl: time to live for this packet, part of the genereal header
392 * @ttvn: translation table version number
393 * @dest: originator destination of the unicast packet
394 */
395struct batadv_unicast_packet {
396 u8 packet_type;
397 u8 version;
398 u8 ttl;
399 u8 ttvn; /* destination translation table version number */
400 u8 dest[ETH_ALEN];
401 /* "4 bytes boundary + 2 bytes" long to make the payload after the
402 * following ethernet header again 4 bytes boundary aligned
403 */
404};
405
406/**
407 * struct batadv_unicast_4addr_packet - extended unicast packet
408 * @u: common unicast packet header
409 * @src: address of the source
410 * @subtype: packet subtype
411 * @reserved: reserved byte for alignment
412 */
413struct batadv_unicast_4addr_packet {
414 struct batadv_unicast_packet u;
415 u8 src[ETH_ALEN];
416 u8 subtype;
417 u8 reserved;
418 /* "4 bytes boundary + 2 bytes" long to make the payload after the
419 * following ethernet header again 4 bytes boundary aligned
420 */
421};
422
423/**
424 * struct batadv_frag_packet - fragmented packet
425 * @packet_type: batman-adv packet type, part of the general header
426 * @version: batman-adv protocol version, part of the genereal header
427 * @ttl: time to live for this packet, part of the genereal header
428 * @dest: final destination used when routing fragments
429 * @orig: originator of the fragment used when merging the packet
430 * @no: fragment number within this sequence
431 * @priority: priority of frame, from ToS IP precedence or 802.1p
432 * @reserved: reserved byte for alignment
433 * @seqno: sequence identification
434 * @total_size: size of the merged packet
435 */
436struct batadv_frag_packet {
437 u8 packet_type;
438 u8 version; /* batman version field */
439 u8 ttl;
440#if defined(__BIG_ENDIAN_BITFIELD)
441 u8 no:4;
442 u8 priority:3;
443 u8 reserved:1;
444#elif defined(__LITTLE_ENDIAN_BITFIELD)
445 u8 reserved:1;
446 u8 priority:3;
447 u8 no:4;
448#else
449#error "unknown bitfield endianness"
450#endif
451 u8 dest[ETH_ALEN];
452 u8 orig[ETH_ALEN];
453 __be16 seqno;
454 __be16 total_size;
455};
456
457/**
458 * struct batadv_bcast_packet - broadcast packet for network payload
459 * @packet_type: batman-adv packet type, part of the general header
460 * @version: batman-adv protocol version, part of the genereal header
461 * @ttl: time to live for this packet, part of the genereal header
462 * @reserved: reserved byte for alignment
463 * @seqno: sequence identification
464 * @orig: originator of the broadcast packet
465 */
466struct batadv_bcast_packet {
467 u8 packet_type;
468 u8 version; /* batman version field */
469 u8 ttl;
470 u8 reserved;
471 __be32 seqno;
472 u8 orig[ETH_ALEN];
473 /* "4 bytes boundary + 2 bytes" long to make the payload after the
474 * following ethernet header again 4 bytes boundary aligned
475 */
476};
477
478/**
479 * struct batadv_coded_packet - network coded packet
480 * @packet_type: batman-adv packet type, part of the general header
481 * @version: batman-adv protocol version, part of the genereal header
482 * @ttl: time to live for this packet, part of the genereal header
483 * @first_source: original source of first included packet
484 * @first_orig_dest: original destinal of first included packet
485 * @first_crc: checksum of first included packet
486 * @first_ttvn: tt-version number of first included packet
487 * @second_ttl: ttl of second packet
488 * @second_dest: second receiver of this coded packet
489 * @second_source: original source of second included packet
490 * @second_orig_dest: original destination of second included packet
491 * @second_crc: checksum of second included packet
492 * @second_ttvn: tt version number of second included packet
493 * @coded_len: length of network coded part of the payload
494 */
495struct batadv_coded_packet {
496 u8 packet_type;
497 u8 version; /* batman version field */
498 u8 ttl;
499 u8 first_ttvn;
500 /* u8 first_dest[ETH_ALEN]; - saved in mac header destination */
501 u8 first_source[ETH_ALEN];
502 u8 first_orig_dest[ETH_ALEN];
503 __be32 first_crc;
504 u8 second_ttl;
505 u8 second_ttvn;
506 u8 second_dest[ETH_ALEN];
507 u8 second_source[ETH_ALEN];
508 u8 second_orig_dest[ETH_ALEN];
509 __be32 second_crc;
510 __be16 coded_len;
511};
512
513#pragma pack()
514
515/**
516 * struct batadv_unicast_tvlv_packet - generic unicast packet with tvlv payload
517 * @packet_type: batman-adv packet type, part of the general header
518 * @version: batman-adv protocol version, part of the genereal header
519 * @ttl: time to live for this packet, part of the genereal header
520 * @reserved: reserved field (for packet alignment)
521 * @src: address of the source
522 * @dst: address of the destination
523 * @tvlv_len: length of tvlv data following the unicast tvlv header
524 * @align: 2 bytes to align the header to a 4 byte boundary
525 */
526struct batadv_unicast_tvlv_packet {
527 u8 packet_type;
528 u8 version; /* batman version field */
529 u8 ttl;
530 u8 reserved;
531 u8 dst[ETH_ALEN];
532 u8 src[ETH_ALEN];
533 __be16 tvlv_len;
534 u16 align;
535};
536
537/**
538 * struct batadv_tvlv_hdr - base tvlv header struct
539 * @type: tvlv container type (see batadv_tvlv_type)
540 * @version: tvlv container version
541 * @len: tvlv container length
542 */
543struct batadv_tvlv_hdr {
544 u8 type;
545 u8 version;
546 __be16 len;
547};
548
549/**
550 * struct batadv_tvlv_gateway_data - gateway data propagated through gw tvlv
551 * container
552 * @bandwidth_down: advertised uplink download bandwidth
553 * @bandwidth_up: advertised uplink upload bandwidth
554 */
555struct batadv_tvlv_gateway_data {
556 __be32 bandwidth_down;
557 __be32 bandwidth_up;
558};
559
560/**
561 * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container
562 * @flags: translation table flags (see batadv_tt_data_flags)
563 * @ttvn: translation table version number
564 * @num_vlan: number of announced VLANs. In the TVLV this struct is followed by
565 * one batadv_tvlv_tt_vlan_data object per announced vlan
566 */
567struct batadv_tvlv_tt_data {
568 u8 flags;
569 u8 ttvn;
570 __be16 num_vlan;
571};
572
573/**
574 * struct batadv_tvlv_tt_vlan_data - vlan specific tt data propagated through
575 * the tt tvlv container
576 * @crc: crc32 checksum of the entries belonging to this vlan
577 * @vid: vlan identifier
578 * @reserved: unused, useful for alignment purposes
579 */
580struct batadv_tvlv_tt_vlan_data {
581 __be32 crc;
582 __be16 vid;
583 u16 reserved;
584};
585
586/**
587 * struct batadv_tvlv_tt_change - translation table diff data
588 * @flags: status indicators concerning the non-mesh client (see
589 * batadv_tt_client_flags)
590 * @reserved: reserved field - useful for alignment purposes only
591 * @addr: mac address of non-mesh client that triggered this tt change
592 * @vid: VLAN identifier
593 */
594struct batadv_tvlv_tt_change {
595 u8 flags;
596 u8 reserved[3];
597 u8 addr[ETH_ALEN];
598 __be16 vid;
599};
600
601/**
602 * struct batadv_tvlv_roam_adv - roaming advertisement
603 * @client: mac address of roaming client
604 * @vid: VLAN identifier
605 */
606struct batadv_tvlv_roam_adv {
607 u8 client[ETH_ALEN];
608 __be16 vid;
609};
610
611/**
612 * struct batadv_tvlv_mcast_data - payload of a multicast tvlv
613 * @flags: multicast flags announced by the orig node
614 * @reserved: reserved field
615 */
616struct batadv_tvlv_mcast_data {
617 u8 flags;
618 u8 reserved[3];
619};
620
621#endif /* _NET_BATMAN_ADV_PACKET_H_ */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 40d9bf3e5bfe..b6891e8b741c 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
@@ -33,6 +34,7 @@
33#include <linux/skbuff.h> 34#include <linux/skbuff.h>
34#include <linux/spinlock.h> 35#include <linux/spinlock.h>
35#include <linux/stddef.h> 36#include <linux/stddef.h>
37#include <uapi/linux/batadv_packet.h>
36 38
37#include "bitarray.h" 39#include "bitarray.h"
38#include "bridge_loop_avoidance.h" 40#include "bridge_loop_avoidance.h"
@@ -43,7 +45,6 @@
43#include "log.h" 45#include "log.h"
44#include "network-coding.h" 46#include "network-coding.h"
45#include "originator.h" 47#include "originator.h"
46#include "packet.h"
47#include "send.h" 48#include "send.h"
48#include "soft-interface.h" 49#include "soft-interface.h"
49#include "tp_meter.h" 50#include "tp_meter.h"
@@ -54,7 +55,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
54 struct batadv_hard_iface *recv_if); 55 struct batadv_hard_iface *recv_if);
55 56
56/** 57/**
57 * _batadv_update_route - set the router for this originator 58 * _batadv_update_route() - set the router for this originator
58 * @bat_priv: the bat priv with all the soft interface information 59 * @bat_priv: the bat priv with all the soft interface information
59 * @orig_node: orig node which is to be configured 60 * @orig_node: orig node which is to be configured
60 * @recv_if: the receive interface for which this route is set 61 * @recv_if: the receive interface for which this route is set
@@ -118,7 +119,7 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
118} 119}
119 120
120/** 121/**
121 * batadv_update_route - set the router for this originator 122 * batadv_update_route() - set the router for this originator
122 * @bat_priv: the bat priv with all the soft interface information 123 * @bat_priv: the bat priv with all the soft interface information
123 * @orig_node: orig node which is to be configured 124 * @orig_node: orig node which is to be configured
124 * @recv_if: the receive interface for which this route is set 125 * @recv_if: the receive interface for which this route is set
@@ -145,7 +146,7 @@ out:
145} 146}
146 147
147/** 148/**
148 * batadv_window_protected - checks whether the host restarted and is in the 149 * batadv_window_protected() - checks whether the host restarted and is in the
149 * protection time. 150 * protection time.
150 * @bat_priv: the bat priv with all the soft interface information 151 * @bat_priv: the bat priv with all the soft interface information
151 * @seq_num_diff: difference between the current/received sequence number and 152 * @seq_num_diff: difference between the current/received sequence number and
@@ -180,6 +181,14 @@ bool batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff,
180 return false; 181 return false;
181} 182}
182 183
184/**
185 * batadv_check_management_packet() - Check preconditions for management packets
186 * @skb: incoming packet buffer
187 * @hard_iface: incoming hard interface
188 * @header_len: minimal header length of packet type
189 *
190 * Return: true when management preconditions are met, false otherwise
191 */
183bool batadv_check_management_packet(struct sk_buff *skb, 192bool batadv_check_management_packet(struct sk_buff *skb,
184 struct batadv_hard_iface *hard_iface, 193 struct batadv_hard_iface *hard_iface,
185 int header_len) 194 int header_len)
@@ -212,7 +221,7 @@ bool batadv_check_management_packet(struct sk_buff *skb,
212} 221}
213 222
214/** 223/**
215 * batadv_recv_my_icmp_packet - receive an icmp packet locally 224 * batadv_recv_my_icmp_packet() - receive an icmp packet locally
216 * @bat_priv: the bat priv with all the soft interface information 225 * @bat_priv: the bat priv with all the soft interface information
217 * @skb: icmp packet to process 226 * @skb: icmp packet to process
218 * 227 *
@@ -347,6 +356,13 @@ out:
347 return ret; 356 return ret;
348} 357}
349 358
359/**
360 * batadv_recv_icmp_packet() - Process incoming icmp packet
361 * @skb: incoming packet buffer
362 * @recv_if: incoming hard interface
363 *
364 * Return: NET_RX_SUCCESS on success or NET_RX_DROP in case of failure
365 */
350int batadv_recv_icmp_packet(struct sk_buff *skb, 366int batadv_recv_icmp_packet(struct sk_buff *skb,
351 struct batadv_hard_iface *recv_if) 367 struct batadv_hard_iface *recv_if)
352{ 368{
@@ -440,7 +456,7 @@ free_skb:
440} 456}
441 457
442/** 458/**
443 * batadv_check_unicast_packet - Check for malformed unicast packets 459 * batadv_check_unicast_packet() - Check for malformed unicast packets
444 * @bat_priv: the bat priv with all the soft interface information 460 * @bat_priv: the bat priv with all the soft interface information
445 * @skb: packet to check 461 * @skb: packet to check
446 * @hdr_size: size of header to pull 462 * @hdr_size: size of header to pull
@@ -478,7 +494,7 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
478} 494}
479 495
480/** 496/**
481 * batadv_last_bonding_get - Get last_bonding_candidate of orig_node 497 * batadv_last_bonding_get() - Get last_bonding_candidate of orig_node
482 * @orig_node: originator node whose last bonding candidate should be retrieved 498 * @orig_node: originator node whose last bonding candidate should be retrieved
483 * 499 *
484 * Return: last bonding candidate of router or NULL if not found 500 * Return: last bonding candidate of router or NULL if not found
@@ -501,7 +517,7 @@ batadv_last_bonding_get(struct batadv_orig_node *orig_node)
501} 517}
502 518
503/** 519/**
504 * batadv_last_bonding_replace - Replace last_bonding_candidate of orig_node 520 * batadv_last_bonding_replace() - Replace last_bonding_candidate of orig_node
505 * @orig_node: originator node whose bonding candidates should be replaced 521 * @orig_node: originator node whose bonding candidates should be replaced
506 * @new_candidate: new bonding candidate or NULL 522 * @new_candidate: new bonding candidate or NULL
507 */ 523 */
@@ -524,7 +540,7 @@ batadv_last_bonding_replace(struct batadv_orig_node *orig_node,
524} 540}
525 541
526/** 542/**
527 * batadv_find_router - find a suitable router for this originator 543 * batadv_find_router() - find a suitable router for this originator
528 * @bat_priv: the bat priv with all the soft interface information 544 * @bat_priv: the bat priv with all the soft interface information
529 * @orig_node: the destination node 545 * @orig_node: the destination node
530 * @recv_if: pointer to interface this packet was received on 546 * @recv_if: pointer to interface this packet was received on
@@ -741,7 +757,7 @@ free_skb:
741} 757}
742 758
743/** 759/**
744 * batadv_reroute_unicast_packet - update the unicast header for re-routing 760 * batadv_reroute_unicast_packet() - update the unicast header for re-routing
745 * @bat_priv: the bat priv with all the soft interface information 761 * @bat_priv: the bat priv with all the soft interface information
746 * @unicast_packet: the unicast header to be updated 762 * @unicast_packet: the unicast header to be updated
747 * @dst_addr: the payload destination 763 * @dst_addr: the payload destination
@@ -904,7 +920,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
904} 920}
905 921
906/** 922/**
907 * batadv_recv_unhandled_unicast_packet - receive and process packets which 923 * batadv_recv_unhandled_unicast_packet() - receive and process packets which
908 * are in the unicast number space but not yet known to the implementation 924 * are in the unicast number space but not yet known to the implementation
909 * @skb: unicast tvlv packet to process 925 * @skb: unicast tvlv packet to process
910 * @recv_if: pointer to interface this packet was received on 926 * @recv_if: pointer to interface this packet was received on
@@ -935,6 +951,13 @@ free_skb:
935 return NET_RX_DROP; 951 return NET_RX_DROP;
936} 952}
937 953
954/**
955 * batadv_recv_unicast_packet() - Process incoming unicast packet
956 * @skb: incoming packet buffer
957 * @recv_if: incoming hard interface
958 *
959 * Return: NET_RX_SUCCESS on success or NET_RX_DROP in case of failure
960 */
938int batadv_recv_unicast_packet(struct sk_buff *skb, 961int batadv_recv_unicast_packet(struct sk_buff *skb,
939 struct batadv_hard_iface *recv_if) 962 struct batadv_hard_iface *recv_if)
940{ 963{
@@ -1036,7 +1059,7 @@ free_skb:
1036} 1059}
1037 1060
1038/** 1061/**
1039 * batadv_recv_unicast_tvlv - receive and process unicast tvlv packets 1062 * batadv_recv_unicast_tvlv() - receive and process unicast tvlv packets
1040 * @skb: unicast tvlv packet to process 1063 * @skb: unicast tvlv packet to process
1041 * @recv_if: pointer to interface this packet was received on 1064 * @recv_if: pointer to interface this packet was received on
1042 * 1065 *
@@ -1090,7 +1113,7 @@ free_skb:
1090} 1113}
1091 1114
1092/** 1115/**
1093 * batadv_recv_frag_packet - process received fragment 1116 * batadv_recv_frag_packet() - process received fragment
1094 * @skb: the received fragment 1117 * @skb: the received fragment
1095 * @recv_if: interface that the skb is received on 1118 * @recv_if: interface that the skb is received on
1096 * 1119 *
@@ -1155,6 +1178,13 @@ free_skb:
1155 return ret; 1178 return ret;
1156} 1179}
1157 1180
1181/**
1182 * batadv_recv_bcast_packet() - Process incoming broadcast packet
1183 * @skb: incoming packet buffer
1184 * @recv_if: incoming hard interface
1185 *
1186 * Return: NET_RX_SUCCESS on success or NET_RX_DROP in case of failure
1187 */
1158int batadv_recv_bcast_packet(struct sk_buff *skb, 1188int batadv_recv_bcast_packet(struct sk_buff *skb,
1159 struct batadv_hard_iface *recv_if) 1189 struct batadv_hard_iface *recv_if)
1160{ 1190{
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 5ede16c32f15..a1289bc5f115 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 7895323fd2a7..2a5ab6f1076d 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
@@ -23,7 +24,7 @@
23#include <linux/byteorder/generic.h> 24#include <linux/byteorder/generic.h>
24#include <linux/errno.h> 25#include <linux/errno.h>
25#include <linux/etherdevice.h> 26#include <linux/etherdevice.h>
26#include <linux/fs.h> 27#include <linux/gfp.h>
27#include <linux/if.h> 28#include <linux/if.h>
28#include <linux/if_ether.h> 29#include <linux/if_ether.h>
29#include <linux/jiffies.h> 30#include <linux/jiffies.h>
@@ -54,7 +55,7 @@
54static void batadv_send_outstanding_bcast_packet(struct work_struct *work); 55static void batadv_send_outstanding_bcast_packet(struct work_struct *work);
55 56
56/** 57/**
57 * batadv_send_skb_packet - send an already prepared packet 58 * batadv_send_skb_packet() - send an already prepared packet
58 * @skb: the packet to send 59 * @skb: the packet to send
59 * @hard_iface: the interface to use to send the broadcast packet 60 * @hard_iface: the interface to use to send the broadcast packet
60 * @dst_addr: the payload destination 61 * @dst_addr: the payload destination
@@ -123,12 +124,30 @@ send_skb_err:
123 return NET_XMIT_DROP; 124 return NET_XMIT_DROP;
124} 125}
125 126
127/**
128 * batadv_send_broadcast_skb() - Send broadcast packet via hard interface
129 * @skb: packet to be transmitted (with batadv header and no outer eth header)
130 * @hard_iface: outgoing interface
131 *
132 * Return: A negative errno code is returned on a failure. A success does not
133 * guarantee the frame will be transmitted as it may be dropped due
134 * to congestion or traffic shaping.
135 */
126int batadv_send_broadcast_skb(struct sk_buff *skb, 136int batadv_send_broadcast_skb(struct sk_buff *skb,
127 struct batadv_hard_iface *hard_iface) 137 struct batadv_hard_iface *hard_iface)
128{ 138{
129 return batadv_send_skb_packet(skb, hard_iface, batadv_broadcast_addr); 139 return batadv_send_skb_packet(skb, hard_iface, batadv_broadcast_addr);
130} 140}
131 141
142/**
143 * batadv_send_unicast_skb() - Send unicast packet to neighbor
144 * @skb: packet to be transmitted (with batadv header and no outer eth header)
145 * @neigh: neighbor which is used as next hop to destination
146 *
147 * Return: A negative errno code is returned on a failure. A success does not
148 * guarantee the frame will be transmitted as it may be dropped due
149 * to congestion or traffic shaping.
150 */
132int batadv_send_unicast_skb(struct sk_buff *skb, 151int batadv_send_unicast_skb(struct sk_buff *skb,
133 struct batadv_neigh_node *neigh) 152 struct batadv_neigh_node *neigh)
134{ 153{
@@ -153,7 +172,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
153} 172}
154 173
155/** 174/**
156 * batadv_send_skb_to_orig - Lookup next-hop and transmit skb. 175 * batadv_send_skb_to_orig() - Lookup next-hop and transmit skb.
157 * @skb: Packet to be transmitted. 176 * @skb: Packet to be transmitted.
158 * @orig_node: Final destination of the packet. 177 * @orig_node: Final destination of the packet.
159 * @recv_if: Interface used when receiving the packet (can be NULL). 178 * @recv_if: Interface used when receiving the packet (can be NULL).
@@ -216,7 +235,7 @@ free_skb:
216} 235}
217 236
218/** 237/**
219 * batadv_send_skb_push_fill_unicast - extend the buffer and initialize the 238 * batadv_send_skb_push_fill_unicast() - extend the buffer and initialize the
220 * common fields for unicast packets 239 * common fields for unicast packets
221 * @skb: the skb carrying the unicast header to initialize 240 * @skb: the skb carrying the unicast header to initialize
222 * @hdr_size: amount of bytes to push at the beginning of the skb 241 * @hdr_size: amount of bytes to push at the beginning of the skb
@@ -249,7 +268,7 @@ batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size,
249} 268}
250 269
251/** 270/**
252 * batadv_send_skb_prepare_unicast - encapsulate an skb with a unicast header 271 * batadv_send_skb_prepare_unicast() - encapsulate an skb with a unicast header
253 * @skb: the skb containing the payload to encapsulate 272 * @skb: the skb containing the payload to encapsulate
254 * @orig_node: the destination node 273 * @orig_node: the destination node
255 * 274 *
@@ -264,7 +283,7 @@ static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb,
264} 283}
265 284
266/** 285/**
267 * batadv_send_skb_prepare_unicast_4addr - encapsulate an skb with a 286 * batadv_send_skb_prepare_unicast_4addr() - encapsulate an skb with a
268 * unicast 4addr header 287 * unicast 4addr header
269 * @bat_priv: the bat priv with all the soft interface information 288 * @bat_priv: the bat priv with all the soft interface information
270 * @skb: the skb containing the payload to encapsulate 289 * @skb: the skb containing the payload to encapsulate
@@ -308,7 +327,7 @@ out:
308} 327}
309 328
310/** 329/**
311 * batadv_send_skb_unicast - encapsulate and send an skb via unicast 330 * batadv_send_skb_unicast() - encapsulate and send an skb via unicast
312 * @bat_priv: the bat priv with all the soft interface information 331 * @bat_priv: the bat priv with all the soft interface information
313 * @skb: payload to send 332 * @skb: payload to send
314 * @packet_type: the batman unicast packet type to use 333 * @packet_type: the batman unicast packet type to use
@@ -378,7 +397,7 @@ out:
378} 397}
379 398
380/** 399/**
381 * batadv_send_skb_via_tt_generic - send an skb via TT lookup 400 * batadv_send_skb_via_tt_generic() - send an skb via TT lookup
382 * @bat_priv: the bat priv with all the soft interface information 401 * @bat_priv: the bat priv with all the soft interface information
383 * @skb: payload to send 402 * @skb: payload to send
384 * @packet_type: the batman unicast packet type to use 403 * @packet_type: the batman unicast packet type to use
@@ -425,7 +444,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
425} 444}
426 445
427/** 446/**
428 * batadv_send_skb_via_gw - send an skb via gateway lookup 447 * batadv_send_skb_via_gw() - send an skb via gateway lookup
429 * @bat_priv: the bat priv with all the soft interface information 448 * @bat_priv: the bat priv with all the soft interface information
430 * @skb: payload to send 449 * @skb: payload to send
431 * @vid: the vid to be used to search the translation table 450 * @vid: the vid to be used to search the translation table
@@ -452,7 +471,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
452} 471}
453 472
454/** 473/**
455 * batadv_forw_packet_free - free a forwarding packet 474 * batadv_forw_packet_free() - free a forwarding packet
456 * @forw_packet: The packet to free 475 * @forw_packet: The packet to free
457 * @dropped: whether the packet is freed because is is dropped 476 * @dropped: whether the packet is freed because is is dropped
458 * 477 *
@@ -477,7 +496,7 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet,
477} 496}
478 497
479/** 498/**
480 * batadv_forw_packet_alloc - allocate a forwarding packet 499 * batadv_forw_packet_alloc() - allocate a forwarding packet
481 * @if_incoming: The (optional) if_incoming to be grabbed 500 * @if_incoming: The (optional) if_incoming to be grabbed
482 * @if_outgoing: The (optional) if_outgoing to be grabbed 501 * @if_outgoing: The (optional) if_outgoing to be grabbed
483 * @queue_left: The (optional) queue counter to decrease 502 * @queue_left: The (optional) queue counter to decrease
@@ -543,7 +562,7 @@ err:
543} 562}
544 563
545/** 564/**
546 * batadv_forw_packet_was_stolen - check whether someone stole this packet 565 * batadv_forw_packet_was_stolen() - check whether someone stole this packet
547 * @forw_packet: the forwarding packet to check 566 * @forw_packet: the forwarding packet to check
548 * 567 *
549 * This function checks whether the given forwarding packet was claimed by 568 * This function checks whether the given forwarding packet was claimed by
@@ -558,7 +577,7 @@ batadv_forw_packet_was_stolen(struct batadv_forw_packet *forw_packet)
558} 577}
559 578
560/** 579/**
561 * batadv_forw_packet_steal - claim a forw_packet for free() 580 * batadv_forw_packet_steal() - claim a forw_packet for free()
562 * @forw_packet: the forwarding packet to steal 581 * @forw_packet: the forwarding packet to steal
563 * @lock: a key to the store to steal from (e.g. forw_{bat,bcast}_list_lock) 582 * @lock: a key to the store to steal from (e.g. forw_{bat,bcast}_list_lock)
564 * 583 *
@@ -589,7 +608,7 @@ bool batadv_forw_packet_steal(struct batadv_forw_packet *forw_packet,
589} 608}
590 609
591/** 610/**
592 * batadv_forw_packet_list_steal - claim a list of forward packets for free() 611 * batadv_forw_packet_list_steal() - claim a list of forward packets for free()
593 * @forw_list: the to be stolen forward packets 612 * @forw_list: the to be stolen forward packets
594 * @cleanup_list: a backup pointer, to be able to dispose the packet later 613 * @cleanup_list: a backup pointer, to be able to dispose the packet later
595 * @hard_iface: the interface to steal forward packets from 614 * @hard_iface: the interface to steal forward packets from
@@ -625,7 +644,7 @@ batadv_forw_packet_list_steal(struct hlist_head *forw_list,
625} 644}
626 645
627/** 646/**
628 * batadv_forw_packet_list_free - free a list of forward packets 647 * batadv_forw_packet_list_free() - free a list of forward packets
629 * @head: a list of to be freed forw_packets 648 * @head: a list of to be freed forw_packets
630 * 649 *
631 * This function cancels the scheduling of any packet in the provided list, 650 * This function cancels the scheduling of any packet in the provided list,
@@ -649,7 +668,7 @@ static void batadv_forw_packet_list_free(struct hlist_head *head)
649} 668}
650 669
651/** 670/**
652 * batadv_forw_packet_queue - try to queue a forwarding packet 671 * batadv_forw_packet_queue() - try to queue a forwarding packet
653 * @forw_packet: the forwarding packet to queue 672 * @forw_packet: the forwarding packet to queue
654 * @lock: a key to the store (e.g. forw_{bat,bcast}_list_lock) 673 * @lock: a key to the store (e.g. forw_{bat,bcast}_list_lock)
655 * @head: the shelve to queue it on (e.g. forw_{bat,bcast}_list) 674 * @head: the shelve to queue it on (e.g. forw_{bat,bcast}_list)
@@ -693,7 +712,7 @@ static void batadv_forw_packet_queue(struct batadv_forw_packet *forw_packet,
693} 712}
694 713
695/** 714/**
696 * batadv_forw_packet_bcast_queue - try to queue a broadcast packet 715 * batadv_forw_packet_bcast_queue() - try to queue a broadcast packet
697 * @bat_priv: the bat priv with all the soft interface information 716 * @bat_priv: the bat priv with all the soft interface information
698 * @forw_packet: the forwarding packet to queue 717 * @forw_packet: the forwarding packet to queue
699 * @send_time: timestamp (jiffies) when the packet is to be sent 718 * @send_time: timestamp (jiffies) when the packet is to be sent
@@ -712,7 +731,7 @@ batadv_forw_packet_bcast_queue(struct batadv_priv *bat_priv,
712} 731}
713 732
714/** 733/**
715 * batadv_forw_packet_ogmv1_queue - try to queue an OGMv1 packet 734 * batadv_forw_packet_ogmv1_queue() - try to queue an OGMv1 packet
716 * @bat_priv: the bat priv with all the soft interface information 735 * @bat_priv: the bat priv with all the soft interface information
717 * @forw_packet: the forwarding packet to queue 736 * @forw_packet: the forwarding packet to queue
718 * @send_time: timestamp (jiffies) when the packet is to be sent 737 * @send_time: timestamp (jiffies) when the packet is to be sent
@@ -730,7 +749,7 @@ void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv,
730} 749}
731 750
732/** 751/**
733 * batadv_add_bcast_packet_to_list - queue broadcast packet for multiple sends 752 * batadv_add_bcast_packet_to_list() - queue broadcast packet for multiple sends
734 * @bat_priv: the bat priv with all the soft interface information 753 * @bat_priv: the bat priv with all the soft interface information
735 * @skb: broadcast packet to add 754 * @skb: broadcast packet to add
736 * @delay: number of jiffies to wait before sending 755 * @delay: number of jiffies to wait before sending
@@ -790,7 +809,7 @@ err:
790} 809}
791 810
792/** 811/**
793 * batadv_forw_packet_bcasts_left - check if a retransmission is necessary 812 * batadv_forw_packet_bcasts_left() - check if a retransmission is necessary
794 * @forw_packet: the forwarding packet to check 813 * @forw_packet: the forwarding packet to check
795 * @hard_iface: the interface to check on 814 * @hard_iface: the interface to check on
796 * 815 *
@@ -818,7 +837,8 @@ batadv_forw_packet_bcasts_left(struct batadv_forw_packet *forw_packet,
818} 837}
819 838
820/** 839/**
821 * batadv_forw_packet_bcasts_inc - increment retransmission counter of a packet 840 * batadv_forw_packet_bcasts_inc() - increment retransmission counter of a
841 * packet
822 * @forw_packet: the packet to increase the counter for 842 * @forw_packet: the packet to increase the counter for
823 */ 843 */
824static void 844static void
@@ -828,7 +848,7 @@ batadv_forw_packet_bcasts_inc(struct batadv_forw_packet *forw_packet)
828} 848}
829 849
830/** 850/**
831 * batadv_forw_packet_is_rebroadcast - check packet for previous transmissions 851 * batadv_forw_packet_is_rebroadcast() - check packet for previous transmissions
832 * @forw_packet: the packet to check 852 * @forw_packet: the packet to check
833 * 853 *
834 * Return: True if this packet was transmitted before, false otherwise. 854 * Return: True if this packet was transmitted before, false otherwise.
@@ -953,7 +973,7 @@ out:
953} 973}
954 974
955/** 975/**
956 * batadv_purge_outstanding_packets - stop/purge scheduled bcast/OGMv1 packets 976 * batadv_purge_outstanding_packets() - stop/purge scheduled bcast/OGMv1 packets
957 * @bat_priv: the bat priv with all the soft interface information 977 * @bat_priv: the bat priv with all the soft interface information
958 * @hard_iface: the hard interface to cancel and purge bcast/ogm packets on 978 * @hard_iface: the hard interface to cancel and purge bcast/ogm packets on
959 * 979 *
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index a16b34f473ef..1e8c79093623 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
@@ -23,8 +24,7 @@
23#include <linux/compiler.h> 24#include <linux/compiler.h>
24#include <linux/spinlock.h> 25#include <linux/spinlock.h>
25#include <linux/types.h> 26#include <linux/types.h>
26 27#include <uapi/linux/batadv_packet.h>
27#include "packet.h"
28 28
29struct sk_buff; 29struct sk_buff;
30 30
@@ -76,7 +76,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
76 unsigned short vid); 76 unsigned short vid);
77 77
78/** 78/**
79 * batadv_send_skb_via_tt - send an skb via TT lookup 79 * batadv_send_skb_via_tt() - send an skb via TT lookup
80 * @bat_priv: the bat priv with all the soft interface information 80 * @bat_priv: the bat priv with all the soft interface information
81 * @skb: the payload to send 81 * @skb: the payload to send
82 * @dst_hint: can be used to override the destination contained in the skb 82 * @dst_hint: can be used to override the destination contained in the skb
@@ -97,7 +97,7 @@ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv,
97} 97}
98 98
99/** 99/**
100 * batadv_send_skb_via_tt_4addr - send an skb via TT lookup 100 * batadv_send_skb_via_tt_4addr() - send an skb via TT lookup
101 * @bat_priv: the bat priv with all the soft interface information 101 * @bat_priv: the bat priv with all the soft interface information
102 * @skb: the payload to send 102 * @skb: the payload to send
103 * @packet_subtype: the unicast 4addr packet subtype to use 103 * @packet_subtype: the unicast 4addr packet subtype to use
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 9f673cdfecf8..900c5ce21cd4 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
@@ -26,7 +27,7 @@
26#include <linux/errno.h> 27#include <linux/errno.h>
27#include <linux/etherdevice.h> 28#include <linux/etherdevice.h>
28#include <linux/ethtool.h> 29#include <linux/ethtool.h>
29#include <linux/fs.h> 30#include <linux/gfp.h>
30#include <linux/if_ether.h> 31#include <linux/if_ether.h>
31#include <linux/if_vlan.h> 32#include <linux/if_vlan.h>
32#include <linux/jiffies.h> 33#include <linux/jiffies.h>
@@ -48,6 +49,7 @@
48#include <linux/stddef.h> 49#include <linux/stddef.h>
49#include <linux/string.h> 50#include <linux/string.h>
50#include <linux/types.h> 51#include <linux/types.h>
52#include <uapi/linux/batadv_packet.h>
51 53
52#include "bat_algo.h" 54#include "bat_algo.h"
53#include "bridge_loop_avoidance.h" 55#include "bridge_loop_avoidance.h"
@@ -59,11 +61,17 @@
59#include "multicast.h" 61#include "multicast.h"
60#include "network-coding.h" 62#include "network-coding.h"
61#include "originator.h" 63#include "originator.h"
62#include "packet.h"
63#include "send.h" 64#include "send.h"
64#include "sysfs.h" 65#include "sysfs.h"
65#include "translation-table.h" 66#include "translation-table.h"
66 67
68/**
69 * batadv_skb_head_push() - Increase header size and move (push) head pointer
70 * @skb: packet buffer which should be modified
71 * @len: number of bytes to add
72 *
73 * Return: 0 on success or negative error number in case of failure
74 */
67int batadv_skb_head_push(struct sk_buff *skb, unsigned int len) 75int batadv_skb_head_push(struct sk_buff *skb, unsigned int len)
68{ 76{
69 int result; 77 int result;
@@ -96,7 +104,7 @@ static int batadv_interface_release(struct net_device *dev)
96} 104}
97 105
98/** 106/**
99 * batadv_sum_counter - Sum the cpu-local counters for index 'idx' 107 * batadv_sum_counter() - Sum the cpu-local counters for index 'idx'
100 * @bat_priv: the bat priv with all the soft interface information 108 * @bat_priv: the bat priv with all the soft interface information
101 * @idx: index of counter to sum up 109 * @idx: index of counter to sum up
102 * 110 *
@@ -169,7 +177,7 @@ static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
169} 177}
170 178
171/** 179/**
172 * batadv_interface_set_rx_mode - set the rx mode of a device 180 * batadv_interface_set_rx_mode() - set the rx mode of a device
173 * @dev: registered network device to modify 181 * @dev: registered network device to modify
174 * 182 *
175 * We do not actually need to set any rx filters for the virtual batman 183 * We do not actually need to set any rx filters for the virtual batman
@@ -389,7 +397,7 @@ end:
389} 397}
390 398
391/** 399/**
392 * batadv_interface_rx - receive ethernet frame on local batman-adv interface 400 * batadv_interface_rx() - receive ethernet frame on local batman-adv interface
393 * @soft_iface: local interface which will receive the ethernet frame 401 * @soft_iface: local interface which will receive the ethernet frame
394 * @skb: ethernet frame for @soft_iface 402 * @skb: ethernet frame for @soft_iface
395 * @hdr_size: size of already parsed batman-adv header 403 * @hdr_size: size of already parsed batman-adv header
@@ -501,8 +509,8 @@ out:
501} 509}
502 510
503/** 511/**
504 * batadv_softif_vlan_release - release vlan from lists and queue for free after 512 * batadv_softif_vlan_release() - release vlan from lists and queue for free
505 * rcu grace period 513 * after rcu grace period
506 * @ref: kref pointer of the vlan object 514 * @ref: kref pointer of the vlan object
507 */ 515 */
508static void batadv_softif_vlan_release(struct kref *ref) 516static void batadv_softif_vlan_release(struct kref *ref)
@@ -519,7 +527,7 @@ static void batadv_softif_vlan_release(struct kref *ref)
519} 527}
520 528
521/** 529/**
522 * batadv_softif_vlan_put - decrease the vlan object refcounter and 530 * batadv_softif_vlan_put() - decrease the vlan object refcounter and
523 * possibly release it 531 * possibly release it
524 * @vlan: the vlan object to release 532 * @vlan: the vlan object to release
525 */ 533 */
@@ -532,7 +540,7 @@ void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan)
532} 540}
533 541
534/** 542/**
535 * batadv_softif_vlan_get - get the vlan object for a specific vid 543 * batadv_softif_vlan_get() - get the vlan object for a specific vid
536 * @bat_priv: the bat priv with all the soft interface information 544 * @bat_priv: the bat priv with all the soft interface information
537 * @vid: the identifier of the vlan object to retrieve 545 * @vid: the identifier of the vlan object to retrieve
538 * 546 *
@@ -561,7 +569,7 @@ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
561} 569}
562 570
563/** 571/**
564 * batadv_softif_create_vlan - allocate the needed resources for a new vlan 572 * batadv_softif_create_vlan() - allocate the needed resources for a new vlan
565 * @bat_priv: the bat priv with all the soft interface information 573 * @bat_priv: the bat priv with all the soft interface information
566 * @vid: the VLAN identifier 574 * @vid: the VLAN identifier
567 * 575 *
@@ -613,7 +621,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
613} 621}
614 622
615/** 623/**
616 * batadv_softif_destroy_vlan - remove and destroy a softif_vlan object 624 * batadv_softif_destroy_vlan() - remove and destroy a softif_vlan object
617 * @bat_priv: the bat priv with all the soft interface information 625 * @bat_priv: the bat priv with all the soft interface information
618 * @vlan: the object to remove 626 * @vlan: the object to remove
619 */ 627 */
@@ -631,7 +639,7 @@ static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv,
631} 639}
632 640
633/** 641/**
634 * batadv_interface_add_vid - ndo_add_vid API implementation 642 * batadv_interface_add_vid() - ndo_add_vid API implementation
635 * @dev: the netdev of the mesh interface 643 * @dev: the netdev of the mesh interface
636 * @proto: protocol of the the vlan id 644 * @proto: protocol of the the vlan id
637 * @vid: identifier of the new vlan 645 * @vid: identifier of the new vlan
@@ -689,7 +697,7 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
689} 697}
690 698
691/** 699/**
692 * batadv_interface_kill_vid - ndo_kill_vid API implementation 700 * batadv_interface_kill_vid() - ndo_kill_vid API implementation
693 * @dev: the netdev of the mesh interface 701 * @dev: the netdev of the mesh interface
694 * @proto: protocol of the the vlan id 702 * @proto: protocol of the the vlan id
695 * @vid: identifier of the deleted vlan 703 * @vid: identifier of the deleted vlan
@@ -732,7 +740,7 @@ static struct lock_class_key batadv_netdev_xmit_lock_key;
732static struct lock_class_key batadv_netdev_addr_lock_key; 740static struct lock_class_key batadv_netdev_addr_lock_key;
733 741
734/** 742/**
735 * batadv_set_lockdep_class_one - Set lockdep class for a single tx queue 743 * batadv_set_lockdep_class_one() - Set lockdep class for a single tx queue
736 * @dev: device which owns the tx queue 744 * @dev: device which owns the tx queue
737 * @txq: tx queue to modify 745 * @txq: tx queue to modify
738 * @_unused: always NULL 746 * @_unused: always NULL
@@ -745,7 +753,7 @@ static void batadv_set_lockdep_class_one(struct net_device *dev,
745} 753}
746 754
747/** 755/**
748 * batadv_set_lockdep_class - Set txq and addr_list lockdep class 756 * batadv_set_lockdep_class() - Set txq and addr_list lockdep class
749 * @dev: network device to modify 757 * @dev: network device to modify
750 */ 758 */
751static void batadv_set_lockdep_class(struct net_device *dev) 759static void batadv_set_lockdep_class(struct net_device *dev)
@@ -755,7 +763,7 @@ static void batadv_set_lockdep_class(struct net_device *dev)
755} 763}
756 764
757/** 765/**
758 * batadv_softif_init_late - late stage initialization of soft interface 766 * batadv_softif_init_late() - late stage initialization of soft interface
759 * @dev: registered network device to modify 767 * @dev: registered network device to modify
760 * 768 *
761 * Return: error code on failures 769 * Return: error code on failures
@@ -860,7 +868,7 @@ free_bat_counters:
860} 868}
861 869
862/** 870/**
863 * batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface 871 * batadv_softif_slave_add() - Add a slave interface to a batadv_soft_interface
864 * @dev: batadv_soft_interface used as master interface 872 * @dev: batadv_soft_interface used as master interface
865 * @slave_dev: net_device which should become the slave interface 873 * @slave_dev: net_device which should become the slave interface
866 * @extack: extended ACK report struct 874 * @extack: extended ACK report struct
@@ -888,7 +896,7 @@ out:
888} 896}
889 897
890/** 898/**
891 * batadv_softif_slave_del - Delete a slave iface from a batadv_soft_interface 899 * batadv_softif_slave_del() - Delete a slave iface from a batadv_soft_interface
892 * @dev: batadv_soft_interface used as master interface 900 * @dev: batadv_soft_interface used as master interface
893 * @slave_dev: net_device which should be removed from the master interface 901 * @slave_dev: net_device which should be removed from the master interface
894 * 902 *
@@ -1023,7 +1031,7 @@ static const struct ethtool_ops batadv_ethtool_ops = {
1023}; 1031};
1024 1032
1025/** 1033/**
1026 * batadv_softif_free - Deconstructor of batadv_soft_interface 1034 * batadv_softif_free() - Deconstructor of batadv_soft_interface
1027 * @dev: Device to cleanup and remove 1035 * @dev: Device to cleanup and remove
1028 */ 1036 */
1029static void batadv_softif_free(struct net_device *dev) 1037static void batadv_softif_free(struct net_device *dev)
@@ -1039,7 +1047,7 @@ static void batadv_softif_free(struct net_device *dev)
1039} 1047}
1040 1048
1041/** 1049/**
1042 * batadv_softif_init_early - early stage initialization of soft interface 1050 * batadv_softif_init_early() - early stage initialization of soft interface
1043 * @dev: registered network device to modify 1051 * @dev: registered network device to modify
1044 */ 1052 */
1045static void batadv_softif_init_early(struct net_device *dev) 1053static void batadv_softif_init_early(struct net_device *dev)
@@ -1063,6 +1071,13 @@ static void batadv_softif_init_early(struct net_device *dev)
1063 dev->ethtool_ops = &batadv_ethtool_ops; 1071 dev->ethtool_ops = &batadv_ethtool_ops;
1064} 1072}
1065 1073
1074/**
1075 * batadv_softif_create() - Create and register soft interface
1076 * @net: the applicable net namespace
1077 * @name: name of the new soft interface
1078 *
1079 * Return: newly allocated soft_interface, NULL on errors
1080 */
1066struct net_device *batadv_softif_create(struct net *net, const char *name) 1081struct net_device *batadv_softif_create(struct net *net, const char *name)
1067{ 1082{
1068 struct net_device *soft_iface; 1083 struct net_device *soft_iface;
@@ -1089,7 +1104,7 @@ struct net_device *batadv_softif_create(struct net *net, const char *name)
1089} 1104}
1090 1105
1091/** 1106/**
1092 * batadv_softif_destroy_sysfs - deletion of batadv_soft_interface via sysfs 1107 * batadv_softif_destroy_sysfs() - deletion of batadv_soft_interface via sysfs
1093 * @soft_iface: the to-be-removed batman-adv interface 1108 * @soft_iface: the to-be-removed batman-adv interface
1094 */ 1109 */
1095void batadv_softif_destroy_sysfs(struct net_device *soft_iface) 1110void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
@@ -1111,7 +1126,8 @@ void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
1111} 1126}
1112 1127
1113/** 1128/**
1114 * batadv_softif_destroy_netlink - deletion of batadv_soft_interface via netlink 1129 * batadv_softif_destroy_netlink() - deletion of batadv_soft_interface via
1130 * netlink
1115 * @soft_iface: the to-be-removed batman-adv interface 1131 * @soft_iface: the to-be-removed batman-adv interface
1116 * @head: list pointer 1132 * @head: list pointer
1117 */ 1133 */
@@ -1139,6 +1155,12 @@ static void batadv_softif_destroy_netlink(struct net_device *soft_iface,
1139 unregister_netdevice_queue(soft_iface, head); 1155 unregister_netdevice_queue(soft_iface, head);
1140} 1156}
1141 1157
1158/**
1159 * batadv_softif_is_valid() - Check whether device is a batadv soft interface
1160 * @net_dev: device which should be checked
1161 *
1162 * Return: true when net_dev is a batman-adv interface, false otherwise
1163 */
1142bool batadv_softif_is_valid(const struct net_device *net_dev) 1164bool batadv_softif_is_valid(const struct net_device *net_dev)
1143{ 1165{
1144 if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx) 1166 if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx)
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 639c3abb214a..075c5b5b2ce1 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner 4 * Marek Lindner
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index aa187fd42475..c1578fa0b952 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner 4 * Marek Lindner
@@ -22,10 +23,11 @@
22#include <linux/compiler.h> 23#include <linux/compiler.h>
23#include <linux/device.h> 24#include <linux/device.h>
24#include <linux/errno.h> 25#include <linux/errno.h>
25#include <linux/fs.h> 26#include <linux/gfp.h>
26#include <linux/if.h> 27#include <linux/if.h>
27#include <linux/if_vlan.h> 28#include <linux/if_vlan.h>
28#include <linux/kernel.h> 29#include <linux/kernel.h>
30#include <linux/kobject.h>
29#include <linux/kref.h> 31#include <linux/kref.h>
30#include <linux/netdevice.h> 32#include <linux/netdevice.h>
31#include <linux/printk.h> 33#include <linux/printk.h>
@@ -37,6 +39,7 @@
37#include <linux/string.h> 39#include <linux/string.h>
38#include <linux/stringify.h> 40#include <linux/stringify.h>
39#include <linux/workqueue.h> 41#include <linux/workqueue.h>
42#include <uapi/linux/batadv_packet.h>
40 43
41#include "bridge_loop_avoidance.h" 44#include "bridge_loop_avoidance.h"
42#include "distributed-arp-table.h" 45#include "distributed-arp-table.h"
@@ -45,7 +48,6 @@
45#include "hard-interface.h" 48#include "hard-interface.h"
46#include "log.h" 49#include "log.h"
47#include "network-coding.h" 50#include "network-coding.h"
48#include "packet.h"
49#include "soft-interface.h" 51#include "soft-interface.h"
50 52
51static struct net_device *batadv_kobj_to_netdev(struct kobject *obj) 53static struct net_device *batadv_kobj_to_netdev(struct kobject *obj)
@@ -63,7 +65,7 @@ static struct batadv_priv *batadv_kobj_to_batpriv(struct kobject *obj)
63} 65}
64 66
65/** 67/**
66 * batadv_vlan_kobj_to_batpriv - convert a vlan kobj in the associated batpriv 68 * batadv_vlan_kobj_to_batpriv() - convert a vlan kobj in the associated batpriv
67 * @obj: kobject to covert 69 * @obj: kobject to covert
68 * 70 *
69 * Return: the associated batadv_priv struct. 71 * Return: the associated batadv_priv struct.
@@ -83,7 +85,7 @@ static struct batadv_priv *batadv_vlan_kobj_to_batpriv(struct kobject *obj)
83} 85}
84 86
85/** 87/**
86 * batadv_kobj_to_vlan - convert a kobj in the associated softif_vlan struct 88 * batadv_kobj_to_vlan() - convert a kobj in the associated softif_vlan struct
87 * @bat_priv: the bat priv with all the soft interface information 89 * @bat_priv: the bat priv with all the soft interface information
88 * @obj: kobject to covert 90 * @obj: kobject to covert
89 * 91 *
@@ -598,7 +600,7 @@ static ssize_t batadv_store_gw_bwidth(struct kobject *kobj,
598} 600}
599 601
600/** 602/**
601 * batadv_show_isolation_mark - print the current isolation mark/mask 603 * batadv_show_isolation_mark() - print the current isolation mark/mask
602 * @kobj: kobject representing the private mesh sysfs directory 604 * @kobj: kobject representing the private mesh sysfs directory
603 * @attr: the batman-adv attribute the user is interacting with 605 * @attr: the batman-adv attribute the user is interacting with
604 * @buff: the buffer that will contain the data to send back to the user 606 * @buff: the buffer that will contain the data to send back to the user
@@ -616,8 +618,8 @@ static ssize_t batadv_show_isolation_mark(struct kobject *kobj,
616} 618}
617 619
618/** 620/**
619 * batadv_store_isolation_mark - parse and store the isolation mark/mask entered 621 * batadv_store_isolation_mark() - parse and store the isolation mark/mask
620 * by the user 622 * entered by the user
621 * @kobj: kobject representing the private mesh sysfs directory 623 * @kobj: kobject representing the private mesh sysfs directory
622 * @attr: the batman-adv attribute the user is interacting with 624 * @attr: the batman-adv attribute the user is interacting with
623 * @buff: the buffer containing the user data 625 * @buff: the buffer containing the user data
@@ -733,6 +735,12 @@ static struct batadv_attribute *batadv_vlan_attrs[] = {
733 NULL, 735 NULL,
734}; 736};
735 737
738/**
739 * batadv_sysfs_add_meshif() - Add soft interface specific sysfs entries
740 * @dev: netdev struct of the soft interface
741 *
742 * Return: 0 on success or negative error number in case of failure
743 */
736int batadv_sysfs_add_meshif(struct net_device *dev) 744int batadv_sysfs_add_meshif(struct net_device *dev)
737{ 745{
738 struct kobject *batif_kobject = &dev->dev.kobj; 746 struct kobject *batif_kobject = &dev->dev.kobj;
@@ -773,6 +781,10 @@ out:
773 return -ENOMEM; 781 return -ENOMEM;
774} 782}
775 783
784/**
785 * batadv_sysfs_del_meshif() - Remove soft interface specific sysfs entries
786 * @dev: netdev struct of the soft interface
787 */
776void batadv_sysfs_del_meshif(struct net_device *dev) 788void batadv_sysfs_del_meshif(struct net_device *dev)
777{ 789{
778 struct batadv_priv *bat_priv = netdev_priv(dev); 790 struct batadv_priv *bat_priv = netdev_priv(dev);
@@ -788,7 +800,7 @@ void batadv_sysfs_del_meshif(struct net_device *dev)
788} 800}
789 801
790/** 802/**
791 * batadv_sysfs_add_vlan - add all the needed sysfs objects for the new vlan 803 * batadv_sysfs_add_vlan() - add all the needed sysfs objects for the new vlan
792 * @dev: netdev of the mesh interface 804 * @dev: netdev of the mesh interface
793 * @vlan: private data of the newly added VLAN interface 805 * @vlan: private data of the newly added VLAN interface
794 * 806 *
@@ -849,7 +861,7 @@ out:
849} 861}
850 862
851/** 863/**
852 * batadv_sysfs_del_vlan - remove all the sysfs objects for a given VLAN 864 * batadv_sysfs_del_vlan() - remove all the sysfs objects for a given VLAN
853 * @bat_priv: the bat priv with all the soft interface information 865 * @bat_priv: the bat priv with all the soft interface information
854 * @vlan: the private data of the VLAN to destroy 866 * @vlan: the private data of the VLAN to destroy
855 */ 867 */
@@ -894,7 +906,7 @@ static ssize_t batadv_show_mesh_iface(struct kobject *kobj,
894} 906}
895 907
896/** 908/**
897 * batadv_store_mesh_iface_finish - store new hardif mesh_iface state 909 * batadv_store_mesh_iface_finish() - store new hardif mesh_iface state
898 * @net_dev: netdevice to add/remove to/from batman-adv soft-interface 910 * @net_dev: netdevice to add/remove to/from batman-adv soft-interface
899 * @ifname: name of soft-interface to modify 911 * @ifname: name of soft-interface to modify
900 * 912 *
@@ -947,7 +959,7 @@ out:
947} 959}
948 960
949/** 961/**
950 * batadv_store_mesh_iface_work - store new hardif mesh_iface state 962 * batadv_store_mesh_iface_work() - store new hardif mesh_iface state
951 * @work: work queue item 963 * @work: work queue item
952 * 964 *
953 * Changes the parts of the hard+soft interface which can not be modified under 965 * Changes the parts of the hard+soft interface which can not be modified under
@@ -1043,7 +1055,7 @@ static ssize_t batadv_show_iface_status(struct kobject *kobj,
1043#ifdef CONFIG_BATMAN_ADV_BATMAN_V 1055#ifdef CONFIG_BATMAN_ADV_BATMAN_V
1044 1056
1045/** 1057/**
1046 * batadv_store_throughput_override - parse and store throughput override 1058 * batadv_store_throughput_override() - parse and store throughput override
1047 * entered by the user 1059 * entered by the user
1048 * @kobj: kobject representing the private mesh sysfs directory 1060 * @kobj: kobject representing the private mesh sysfs directory
1049 * @attr: the batman-adv attribute the user is interacting with 1061 * @attr: the batman-adv attribute the user is interacting with
@@ -1130,6 +1142,13 @@ static struct batadv_attribute *batadv_batman_attrs[] = {
1130 NULL, 1142 NULL,
1131}; 1143};
1132 1144
1145/**
1146 * batadv_sysfs_add_hardif() - Add hard interface specific sysfs entries
1147 * @hardif_obj: address where to store the pointer to new sysfs folder
1148 * @dev: netdev struct of the hard interface
1149 *
1150 * Return: 0 on success or negative error number in case of failure
1151 */
1133int batadv_sysfs_add_hardif(struct kobject **hardif_obj, struct net_device *dev) 1152int batadv_sysfs_add_hardif(struct kobject **hardif_obj, struct net_device *dev)
1134{ 1153{
1135 struct kobject *hardif_kobject = &dev->dev.kobj; 1154 struct kobject *hardif_kobject = &dev->dev.kobj;
@@ -1164,6 +1183,11 @@ out:
1164 return -ENOMEM; 1183 return -ENOMEM;
1165} 1184}
1166 1185
1186/**
1187 * batadv_sysfs_del_hardif() - Remove hard interface specific sysfs entries
1188 * @hardif_obj: address to the pointer to which stores batman-adv sysfs folder
1189 * of the hard interface
1190 */
1167void batadv_sysfs_del_hardif(struct kobject **hardif_obj) 1191void batadv_sysfs_del_hardif(struct kobject **hardif_obj)
1168{ 1192{
1169 kobject_uevent(*hardif_obj, KOBJ_REMOVE); 1193 kobject_uevent(*hardif_obj, KOBJ_REMOVE);
@@ -1172,6 +1196,16 @@ void batadv_sysfs_del_hardif(struct kobject **hardif_obj)
1172 *hardif_obj = NULL; 1196 *hardif_obj = NULL;
1173} 1197}
1174 1198
1199/**
1200 * batadv_throw_uevent() - Send an uevent with batman-adv specific env data
1201 * @bat_priv: the bat priv with all the soft interface information
1202 * @type: subsystem type of event. Stored in uevent's BATTYPE
1203 * @action: action type of event. Stored in uevent's BATACTION
1204 * @data: string with additional information to the event (ignored for
1205 * BATADV_UEV_DEL). Stored in uevent's BATDATA
1206 *
1207 * Return: 0 on success or negative error number in case of failure
1208 */
1175int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type, 1209int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type,
1176 enum batadv_uev_action action, const char *data) 1210 enum batadv_uev_action action, const char *data)
1177{ 1211{
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index e487412e256b..bbeee61221fa 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner 4 * Marek Lindner
@@ -35,10 +36,23 @@ struct net_device;
35 */ 36 */
36#define BATADV_SYSFS_VLAN_SUBDIR_PREFIX "vlan" 37#define BATADV_SYSFS_VLAN_SUBDIR_PREFIX "vlan"
37 38
39/**
40 * struct batadv_attribute - sysfs export helper for batman-adv attributes
41 */
38struct batadv_attribute { 42struct batadv_attribute {
43 /** @attr: sysfs attribute file */
39 struct attribute attr; 44 struct attribute attr;
45
46 /**
47 * @show: function to export the current attribute's content to sysfs
48 */
40 ssize_t (*show)(struct kobject *kobj, struct attribute *attr, 49 ssize_t (*show)(struct kobject *kobj, struct attribute *attr,
41 char *buf); 50 char *buf);
51
52 /**
53 * @store: function to load new value from character buffer and save it
54 * in batman-adv attribute
55 */
42 ssize_t (*store)(struct kobject *kobj, struct attribute *attr, 56 ssize_t (*store)(struct kobject *kobj, struct attribute *attr,
43 char *buf, size_t count); 57 char *buf, size_t count);
44}; 58};
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index ebc4e2241c77..8b576712d0c1 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Edo Monticelli, Antonio Quartulli 4 * Edo Monticelli, Antonio Quartulli
@@ -19,13 +20,13 @@
19#include "main.h" 20#include "main.h"
20 21
21#include <linux/atomic.h> 22#include <linux/atomic.h>
22#include <linux/bug.h> 23#include <linux/build_bug.h>
23#include <linux/byteorder/generic.h> 24#include <linux/byteorder/generic.h>
24#include <linux/cache.h> 25#include <linux/cache.h>
25#include <linux/compiler.h> 26#include <linux/compiler.h>
26#include <linux/err.h> 27#include <linux/err.h>
27#include <linux/etherdevice.h> 28#include <linux/etherdevice.h>
28#include <linux/fs.h> 29#include <linux/gfp.h>
29#include <linux/if_ether.h> 30#include <linux/if_ether.h>
30#include <linux/init.h> 31#include <linux/init.h>
31#include <linux/jiffies.h> 32#include <linux/jiffies.h>
@@ -48,13 +49,13 @@
48#include <linux/timer.h> 49#include <linux/timer.h>
49#include <linux/wait.h> 50#include <linux/wait.h>
50#include <linux/workqueue.h> 51#include <linux/workqueue.h>
52#include <uapi/linux/batadv_packet.h>
51#include <uapi/linux/batman_adv.h> 53#include <uapi/linux/batman_adv.h>
52 54
53#include "hard-interface.h" 55#include "hard-interface.h"
54#include "log.h" 56#include "log.h"
55#include "netlink.h" 57#include "netlink.h"
56#include "originator.h" 58#include "originator.h"
57#include "packet.h"
58#include "send.h" 59#include "send.h"
59 60
60/** 61/**
@@ -97,7 +98,7 @@
97static u8 batadv_tp_prerandom[4096] __read_mostly; 98static u8 batadv_tp_prerandom[4096] __read_mostly;
98 99
99/** 100/**
100 * batadv_tp_session_cookie - generate session cookie based on session ids 101 * batadv_tp_session_cookie() - generate session cookie based on session ids
101 * @session: TP session identifier 102 * @session: TP session identifier
102 * @icmp_uid: icmp pseudo uid of the tp session 103 * @icmp_uid: icmp pseudo uid of the tp session
103 * 104 *
@@ -115,7 +116,7 @@ static u32 batadv_tp_session_cookie(const u8 session[2], u8 icmp_uid)
115} 116}
116 117
117/** 118/**
118 * batadv_tp_cwnd - compute the new cwnd size 119 * batadv_tp_cwnd() - compute the new cwnd size
119 * @base: base cwnd size value 120 * @base: base cwnd size value
120 * @increment: the value to add to base to get the new size 121 * @increment: the value to add to base to get the new size
121 * @min: minumim cwnd value (usually MSS) 122 * @min: minumim cwnd value (usually MSS)
@@ -140,7 +141,7 @@ static u32 batadv_tp_cwnd(u32 base, u32 increment, u32 min)
140} 141}
141 142
142/** 143/**
143 * batadv_tp_updated_cwnd - update the Congestion Windows 144 * batadv_tp_updated_cwnd() - update the Congestion Windows
144 * @tp_vars: the private data of the current TP meter session 145 * @tp_vars: the private data of the current TP meter session
145 * @mss: maximum segment size of transmission 146 * @mss: maximum segment size of transmission
146 * 147 *
@@ -176,7 +177,7 @@ static void batadv_tp_update_cwnd(struct batadv_tp_vars *tp_vars, u32 mss)
176} 177}
177 178
178/** 179/**
179 * batadv_tp_update_rto - calculate new retransmission timeout 180 * batadv_tp_update_rto() - calculate new retransmission timeout
180 * @tp_vars: the private data of the current TP meter session 181 * @tp_vars: the private data of the current TP meter session
181 * @new_rtt: new roundtrip time in msec 182 * @new_rtt: new roundtrip time in msec
182 */ 183 */
@@ -212,7 +213,7 @@ static void batadv_tp_update_rto(struct batadv_tp_vars *tp_vars,
212} 213}
213 214
214/** 215/**
215 * batadv_tp_batctl_notify - send client status result to client 216 * batadv_tp_batctl_notify() - send client status result to client
216 * @reason: reason for tp meter session stop 217 * @reason: reason for tp meter session stop
217 * @dst: destination of tp_meter session 218 * @dst: destination of tp_meter session
218 * @bat_priv: the bat priv with all the soft interface information 219 * @bat_priv: the bat priv with all the soft interface information
@@ -244,7 +245,7 @@ static void batadv_tp_batctl_notify(enum batadv_tp_meter_reason reason,
244} 245}
245 246
246/** 247/**
247 * batadv_tp_batctl_error_notify - send client error result to client 248 * batadv_tp_batctl_error_notify() - send client error result to client
248 * @reason: reason for tp meter session stop 249 * @reason: reason for tp meter session stop
249 * @dst: destination of tp_meter session 250 * @dst: destination of tp_meter session
250 * @bat_priv: the bat priv with all the soft interface information 251 * @bat_priv: the bat priv with all the soft interface information
@@ -259,7 +260,7 @@ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason,
259} 260}
260 261
261/** 262/**
262 * batadv_tp_list_find - find a tp_vars object in the global list 263 * batadv_tp_list_find() - find a tp_vars object in the global list
263 * @bat_priv: the bat priv with all the soft interface information 264 * @bat_priv: the bat priv with all the soft interface information
264 * @dst: the other endpoint MAC address to look for 265 * @dst: the other endpoint MAC address to look for
265 * 266 *
@@ -294,7 +295,8 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv,
294} 295}
295 296
296/** 297/**
297 * batadv_tp_list_find_session - find tp_vars session object in the global list 298 * batadv_tp_list_find_session() - find tp_vars session object in the global
299 * list
298 * @bat_priv: the bat priv with all the soft interface information 300 * @bat_priv: the bat priv with all the soft interface information
299 * @dst: the other endpoint MAC address to look for 301 * @dst: the other endpoint MAC address to look for
300 * @session: session identifier 302 * @session: session identifier
@@ -335,7 +337,7 @@ batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst,
335} 337}
336 338
337/** 339/**
338 * batadv_tp_vars_release - release batadv_tp_vars from lists and queue for 340 * batadv_tp_vars_release() - release batadv_tp_vars from lists and queue for
339 * free after rcu grace period 341 * free after rcu grace period
340 * @ref: kref pointer of the batadv_tp_vars 342 * @ref: kref pointer of the batadv_tp_vars
341 */ 343 */
@@ -360,7 +362,7 @@ static void batadv_tp_vars_release(struct kref *ref)
360} 362}
361 363
362/** 364/**
363 * batadv_tp_vars_put - decrement the batadv_tp_vars refcounter and possibly 365 * batadv_tp_vars_put() - decrement the batadv_tp_vars refcounter and possibly
364 * release it 366 * release it
365 * @tp_vars: the private data of the current TP meter session to be free'd 367 * @tp_vars: the private data of the current TP meter session to be free'd
366 */ 368 */
@@ -370,7 +372,7 @@ static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars)
370} 372}
371 373
372/** 374/**
373 * batadv_tp_sender_cleanup - cleanup sender data and drop and timer 375 * batadv_tp_sender_cleanup() - cleanup sender data and drop and timer
374 * @bat_priv: the bat priv with all the soft interface information 376 * @bat_priv: the bat priv with all the soft interface information
375 * @tp_vars: the private data of the current TP meter session to cleanup 377 * @tp_vars: the private data of the current TP meter session to cleanup
376 */ 378 */
@@ -400,7 +402,7 @@ static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv,
400} 402}
401 403
402/** 404/**
403 * batadv_tp_sender_end - print info about ended session and inform client 405 * batadv_tp_sender_end() - print info about ended session and inform client
404 * @bat_priv: the bat priv with all the soft interface information 406 * @bat_priv: the bat priv with all the soft interface information
405 * @tp_vars: the private data of the current TP meter session 407 * @tp_vars: the private data of the current TP meter session
406 */ 408 */
@@ -433,7 +435,7 @@ static void batadv_tp_sender_end(struct batadv_priv *bat_priv,
433} 435}
434 436
435/** 437/**
436 * batadv_tp_sender_shutdown - let sender thread/timer stop gracefully 438 * batadv_tp_sender_shutdown() - let sender thread/timer stop gracefully
437 * @tp_vars: the private data of the current TP meter session 439 * @tp_vars: the private data of the current TP meter session
438 * @reason: reason for tp meter session stop 440 * @reason: reason for tp meter session stop
439 */ 441 */
@@ -447,7 +449,7 @@ static void batadv_tp_sender_shutdown(struct batadv_tp_vars *tp_vars,
447} 449}
448 450
449/** 451/**
450 * batadv_tp_sender_finish - stop sender session after test_length was reached 452 * batadv_tp_sender_finish() - stop sender session after test_length was reached
451 * @work: delayed work reference of the related tp_vars 453 * @work: delayed work reference of the related tp_vars
452 */ 454 */
453static void batadv_tp_sender_finish(struct work_struct *work) 455static void batadv_tp_sender_finish(struct work_struct *work)
@@ -463,7 +465,7 @@ static void batadv_tp_sender_finish(struct work_struct *work)
463} 465}
464 466
465/** 467/**
466 * batadv_tp_reset_sender_timer - reschedule the sender timer 468 * batadv_tp_reset_sender_timer() - reschedule the sender timer
467 * @tp_vars: the private TP meter data for this session 469 * @tp_vars: the private TP meter data for this session
468 * 470 *
469 * Reschedule the timer using tp_vars->rto as delay 471 * Reschedule the timer using tp_vars->rto as delay
@@ -481,7 +483,7 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars)
481} 483}
482 484
483/** 485/**
484 * batadv_tp_sender_timeout - timer that fires in case of packet loss 486 * batadv_tp_sender_timeout() - timer that fires in case of packet loss
485 * @t: address to timer_list inside tp_vars 487 * @t: address to timer_list inside tp_vars
486 * 488 *
487 * If fired it means that there was packet loss. 489 * If fired it means that there was packet loss.
@@ -531,7 +533,7 @@ static void batadv_tp_sender_timeout(struct timer_list *t)
531} 533}
532 534
533/** 535/**
534 * batadv_tp_fill_prerandom - Fill buffer with prefetched random bytes 536 * batadv_tp_fill_prerandom() - Fill buffer with prefetched random bytes
535 * @tp_vars: the private TP meter data for this session 537 * @tp_vars: the private TP meter data for this session
536 * @buf: Buffer to fill with bytes 538 * @buf: Buffer to fill with bytes
537 * @nbytes: amount of pseudorandom bytes 539 * @nbytes: amount of pseudorandom bytes
@@ -563,7 +565,7 @@ static void batadv_tp_fill_prerandom(struct batadv_tp_vars *tp_vars,
563} 565}
564 566
565/** 567/**
566 * batadv_tp_send_msg - send a single message 568 * batadv_tp_send_msg() - send a single message
567 * @tp_vars: the private TP meter data for this session 569 * @tp_vars: the private TP meter data for this session
568 * @src: source mac address 570 * @src: source mac address
569 * @orig_node: the originator of the destination 571 * @orig_node: the originator of the destination
@@ -623,7 +625,7 @@ static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src,
623} 625}
624 626
625/** 627/**
626 * batadv_tp_recv_ack - ACK receiving function 628 * batadv_tp_recv_ack() - ACK receiving function
627 * @bat_priv: the bat priv with all the soft interface information 629 * @bat_priv: the bat priv with all the soft interface information
628 * @skb: the buffer containing the received packet 630 * @skb: the buffer containing the received packet
629 * 631 *
@@ -765,7 +767,7 @@ out:
765} 767}
766 768
767/** 769/**
768 * batadv_tp_avail - check if congestion window is not full 770 * batadv_tp_avail() - check if congestion window is not full
769 * @tp_vars: the private data of the current TP meter session 771 * @tp_vars: the private data of the current TP meter session
770 * @payload_len: size of the payload of a single message 772 * @payload_len: size of the payload of a single message
771 * 773 *
@@ -783,7 +785,7 @@ static bool batadv_tp_avail(struct batadv_tp_vars *tp_vars,
783} 785}
784 786
785/** 787/**
786 * batadv_tp_wait_available - wait until congestion window becomes free or 788 * batadv_tp_wait_available() - wait until congestion window becomes free or
787 * timeout is reached 789 * timeout is reached
788 * @tp_vars: the private data of the current TP meter session 790 * @tp_vars: the private data of the current TP meter session
789 * @plen: size of the payload of a single message 791 * @plen: size of the payload of a single message
@@ -805,7 +807,7 @@ static int batadv_tp_wait_available(struct batadv_tp_vars *tp_vars, size_t plen)
805} 807}
806 808
807/** 809/**
808 * batadv_tp_send - main sending thread of a tp meter session 810 * batadv_tp_send() - main sending thread of a tp meter session
809 * @arg: address of the related tp_vars 811 * @arg: address of the related tp_vars
810 * 812 *
811 * Return: nothing, this function never returns 813 * Return: nothing, this function never returns
@@ -904,7 +906,8 @@ out:
904} 906}
905 907
906/** 908/**
907 * batadv_tp_start_kthread - start new thread which manages the tp meter sender 909 * batadv_tp_start_kthread() - start new thread which manages the tp meter
910 * sender
908 * @tp_vars: the private data of the current TP meter session 911 * @tp_vars: the private data of the current TP meter session
909 */ 912 */
910static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars) 913static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars)
@@ -935,7 +938,7 @@ static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars)
935} 938}
936 939
937/** 940/**
938 * batadv_tp_start - start a new tp meter session 941 * batadv_tp_start() - start a new tp meter session
939 * @bat_priv: the bat priv with all the soft interface information 942 * @bat_priv: the bat priv with all the soft interface information
940 * @dst: the receiver MAC address 943 * @dst: the receiver MAC address
941 * @test_length: test length in milliseconds 944 * @test_length: test length in milliseconds
@@ -1060,7 +1063,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
1060} 1063}
1061 1064
1062/** 1065/**
1063 * batadv_tp_stop - stop currently running tp meter session 1066 * batadv_tp_stop() - stop currently running tp meter session
1064 * @bat_priv: the bat priv with all the soft interface information 1067 * @bat_priv: the bat priv with all the soft interface information
1065 * @dst: the receiver MAC address 1068 * @dst: the receiver MAC address
1066 * @return_value: reason for tp meter session stop 1069 * @return_value: reason for tp meter session stop
@@ -1092,7 +1095,7 @@ out:
1092} 1095}
1093 1096
1094/** 1097/**
1095 * batadv_tp_reset_receiver_timer - reset the receiver shutdown timer 1098 * batadv_tp_reset_receiver_timer() - reset the receiver shutdown timer
1096 * @tp_vars: the private data of the current TP meter session 1099 * @tp_vars: the private data of the current TP meter session
1097 * 1100 *
1098 * start the receiver shutdown timer or reset it if already started 1101 * start the receiver shutdown timer or reset it if already started
@@ -1104,7 +1107,7 @@ static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars)
1104} 1107}
1105 1108
1106/** 1109/**
1107 * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is 1110 * batadv_tp_receiver_shutdown() - stop a tp meter receiver when timeout is
1108 * reached without received ack 1111 * reached without received ack
1109 * @t: address to timer_list inside tp_vars 1112 * @t: address to timer_list inside tp_vars
1110 */ 1113 */
@@ -1149,7 +1152,7 @@ static void batadv_tp_receiver_shutdown(struct timer_list *t)
1149} 1152}
1150 1153
1151/** 1154/**
1152 * batadv_tp_send_ack - send an ACK packet 1155 * batadv_tp_send_ack() - send an ACK packet
1153 * @bat_priv: the bat priv with all the soft interface information 1156 * @bat_priv: the bat priv with all the soft interface information
1154 * @dst: the mac address of the destination originator 1157 * @dst: the mac address of the destination originator
1155 * @seq: the sequence number to ACK 1158 * @seq: the sequence number to ACK
@@ -1221,7 +1224,7 @@ out:
1221} 1224}
1222 1225
1223/** 1226/**
1224 * batadv_tp_handle_out_of_order - store an out of order packet 1227 * batadv_tp_handle_out_of_order() - store an out of order packet
1225 * @tp_vars: the private data of the current TP meter session 1228 * @tp_vars: the private data of the current TP meter session
1226 * @skb: the buffer containing the received packet 1229 * @skb: the buffer containing the received packet
1227 * 1230 *
@@ -1297,7 +1300,7 @@ out:
1297} 1300}
1298 1301
1299/** 1302/**
1300 * batadv_tp_ack_unordered - update number received bytes in current stream 1303 * batadv_tp_ack_unordered() - update number received bytes in current stream
1301 * without gaps 1304 * without gaps
1302 * @tp_vars: the private data of the current TP meter session 1305 * @tp_vars: the private data of the current TP meter session
1303 */ 1306 */
@@ -1330,7 +1333,7 @@ static void batadv_tp_ack_unordered(struct batadv_tp_vars *tp_vars)
1330} 1333}
1331 1334
1332/** 1335/**
1333 * batadv_tp_init_recv - return matching or create new receiver tp_vars 1336 * batadv_tp_init_recv() - return matching or create new receiver tp_vars
1334 * @bat_priv: the bat priv with all the soft interface information 1337 * @bat_priv: the bat priv with all the soft interface information
1335 * @icmp: received icmp tp msg 1338 * @icmp: received icmp tp msg
1336 * 1339 *
@@ -1383,7 +1386,7 @@ out_unlock:
1383} 1386}
1384 1387
1385/** 1388/**
1386 * batadv_tp_recv_msg - process a single data message 1389 * batadv_tp_recv_msg() - process a single data message
1387 * @bat_priv: the bat priv with all the soft interface information 1390 * @bat_priv: the bat priv with all the soft interface information
1388 * @skb: the buffer containing the received packet 1391 * @skb: the buffer containing the received packet
1389 * 1392 *
@@ -1468,7 +1471,7 @@ out:
1468} 1471}
1469 1472
1470/** 1473/**
1471 * batadv_tp_meter_recv - main TP Meter receiving function 1474 * batadv_tp_meter_recv() - main TP Meter receiving function
1472 * @bat_priv: the bat priv with all the soft interface information 1475 * @bat_priv: the bat priv with all the soft interface information
1473 * @skb: the buffer containing the received packet 1476 * @skb: the buffer containing the received packet
1474 */ 1477 */
@@ -1494,7 +1497,7 @@ void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb)
1494} 1497}
1495 1498
1496/** 1499/**
1497 * batadv_tp_meter_init - initialize global tp_meter structures 1500 * batadv_tp_meter_init() - initialize global tp_meter structures
1498 */ 1501 */
1499void __init batadv_tp_meter_init(void) 1502void __init batadv_tp_meter_init(void)
1500{ 1503{
diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
index a8ada5c123bd..c8b8f2cb2c2b 100644
--- a/net/batman-adv/tp_meter.h
+++ b/net/batman-adv/tp_meter.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Edo Monticelli, Antonio Quartulli 4 * Edo Monticelli, Antonio Quartulli
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 8a3ce79b1307..7550a9ccd695 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich, Antonio Quartulli 4 * Marek Lindner, Simon Wunderlich, Antonio Quartulli
@@ -20,14 +21,14 @@
20 21
21#include <linux/atomic.h> 22#include <linux/atomic.h>
22#include <linux/bitops.h> 23#include <linux/bitops.h>
23#include <linux/bug.h> 24#include <linux/build_bug.h>
24#include <linux/byteorder/generic.h> 25#include <linux/byteorder/generic.h>
25#include <linux/cache.h> 26#include <linux/cache.h>
26#include <linux/compiler.h> 27#include <linux/compiler.h>
27#include <linux/crc32c.h> 28#include <linux/crc32c.h>
28#include <linux/errno.h> 29#include <linux/errno.h>
29#include <linux/etherdevice.h> 30#include <linux/etherdevice.h>
30#include <linux/fs.h> 31#include <linux/gfp.h>
31#include <linux/if_ether.h> 32#include <linux/if_ether.h>
32#include <linux/init.h> 33#include <linux/init.h>
33#include <linux/jhash.h> 34#include <linux/jhash.h>
@@ -36,6 +37,7 @@
36#include <linux/kref.h> 37#include <linux/kref.h>
37#include <linux/list.h> 38#include <linux/list.h>
38#include <linux/lockdep.h> 39#include <linux/lockdep.h>
40#include <linux/net.h>
39#include <linux/netdevice.h> 41#include <linux/netdevice.h>
40#include <linux/netlink.h> 42#include <linux/netlink.h>
41#include <linux/rculist.h> 43#include <linux/rculist.h>
@@ -50,6 +52,7 @@
50#include <net/genetlink.h> 52#include <net/genetlink.h>
51#include <net/netlink.h> 53#include <net/netlink.h>
52#include <net/sock.h> 54#include <net/sock.h>
55#include <uapi/linux/batadv_packet.h>
53#include <uapi/linux/batman_adv.h> 56#include <uapi/linux/batman_adv.h>
54 57
55#include "bridge_loop_avoidance.h" 58#include "bridge_loop_avoidance.h"
@@ -58,7 +61,6 @@
58#include "log.h" 61#include "log.h"
59#include "netlink.h" 62#include "netlink.h"
60#include "originator.h" 63#include "originator.h"
61#include "packet.h"
62#include "soft-interface.h" 64#include "soft-interface.h"
63#include "tvlv.h" 65#include "tvlv.h"
64 66
@@ -86,7 +88,7 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
86 bool roaming); 88 bool roaming);
87 89
88/** 90/**
89 * batadv_compare_tt - check if two TT entries are the same 91 * batadv_compare_tt() - check if two TT entries are the same
90 * @node: the list element pointer of the first TT entry 92 * @node: the list element pointer of the first TT entry
91 * @data2: pointer to the tt_common_entry of the second TT entry 93 * @data2: pointer to the tt_common_entry of the second TT entry
92 * 94 *
@@ -105,7 +107,7 @@ static bool batadv_compare_tt(const struct hlist_node *node, const void *data2)
105} 107}
106 108
107/** 109/**
108 * batadv_choose_tt - return the index of the tt entry in the hash table 110 * batadv_choose_tt() - return the index of the tt entry in the hash table
109 * @data: pointer to the tt_common_entry object to map 111 * @data: pointer to the tt_common_entry object to map
110 * @size: the size of the hash table 112 * @size: the size of the hash table
111 * 113 *
@@ -125,7 +127,7 @@ static inline u32 batadv_choose_tt(const void *data, u32 size)
125} 127}
126 128
127/** 129/**
128 * batadv_tt_hash_find - look for a client in the given hash table 130 * batadv_tt_hash_find() - look for a client in the given hash table
129 * @hash: the hash table to search 131 * @hash: the hash table to search
130 * @addr: the mac address of the client to look for 132 * @addr: the mac address of the client to look for
131 * @vid: VLAN identifier 133 * @vid: VLAN identifier
@@ -170,7 +172,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const u8 *addr,
170} 172}
171 173
172/** 174/**
173 * batadv_tt_local_hash_find - search the local table for a given client 175 * batadv_tt_local_hash_find() - search the local table for a given client
174 * @bat_priv: the bat priv with all the soft interface information 176 * @bat_priv: the bat priv with all the soft interface information
175 * @addr: the mac address of the client to look for 177 * @addr: the mac address of the client to look for
176 * @vid: VLAN identifier 178 * @vid: VLAN identifier
@@ -195,7 +197,7 @@ batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
195} 197}
196 198
197/** 199/**
198 * batadv_tt_global_hash_find - search the global table for a given client 200 * batadv_tt_global_hash_find() - search the global table for a given client
199 * @bat_priv: the bat priv with all the soft interface information 201 * @bat_priv: the bat priv with all the soft interface information
200 * @addr: the mac address of the client to look for 202 * @addr: the mac address of the client to look for
201 * @vid: VLAN identifier 203 * @vid: VLAN identifier
@@ -220,7 +222,7 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
220} 222}
221 223
222/** 224/**
223 * batadv_tt_local_entry_free_rcu - free the tt_local_entry 225 * batadv_tt_local_entry_free_rcu() - free the tt_local_entry
224 * @rcu: rcu pointer of the tt_local_entry 226 * @rcu: rcu pointer of the tt_local_entry
225 */ 227 */
226static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu) 228static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu)
@@ -234,7 +236,7 @@ static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu)
234} 236}
235 237
236/** 238/**
237 * batadv_tt_local_entry_release - release tt_local_entry from lists and queue 239 * batadv_tt_local_entry_release() - release tt_local_entry from lists and queue
238 * for free after rcu grace period 240 * for free after rcu grace period
239 * @ref: kref pointer of the nc_node 241 * @ref: kref pointer of the nc_node
240 */ 242 */
@@ -251,7 +253,7 @@ static void batadv_tt_local_entry_release(struct kref *ref)
251} 253}
252 254
253/** 255/**
254 * batadv_tt_local_entry_put - decrement the tt_local_entry refcounter and 256 * batadv_tt_local_entry_put() - decrement the tt_local_entry refcounter and
255 * possibly release it 257 * possibly release it
256 * @tt_local_entry: tt_local_entry to be free'd 258 * @tt_local_entry: tt_local_entry to be free'd
257 */ 259 */
@@ -263,7 +265,7 @@ batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry)
263} 265}
264 266
265/** 267/**
266 * batadv_tt_global_entry_free_rcu - free the tt_global_entry 268 * batadv_tt_global_entry_free_rcu() - free the tt_global_entry
267 * @rcu: rcu pointer of the tt_global_entry 269 * @rcu: rcu pointer of the tt_global_entry
268 */ 270 */
269static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu) 271static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu)
@@ -277,8 +279,8 @@ static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu)
277} 279}
278 280
279/** 281/**
280 * batadv_tt_global_entry_release - release tt_global_entry from lists and queue 282 * batadv_tt_global_entry_release() - release tt_global_entry from lists and
281 * for free after rcu grace period 283 * queue for free after rcu grace period
282 * @ref: kref pointer of the nc_node 284 * @ref: kref pointer of the nc_node
283 */ 285 */
284static void batadv_tt_global_entry_release(struct kref *ref) 286static void batadv_tt_global_entry_release(struct kref *ref)
@@ -294,7 +296,7 @@ static void batadv_tt_global_entry_release(struct kref *ref)
294} 296}
295 297
296/** 298/**
297 * batadv_tt_global_entry_put - decrement the tt_global_entry refcounter and 299 * batadv_tt_global_entry_put() - decrement the tt_global_entry refcounter and
298 * possibly release it 300 * possibly release it
299 * @tt_global_entry: tt_global_entry to be free'd 301 * @tt_global_entry: tt_global_entry to be free'd
300 */ 302 */
@@ -306,7 +308,7 @@ batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry)
306} 308}
307 309
308/** 310/**
309 * batadv_tt_global_hash_count - count the number of orig entries 311 * batadv_tt_global_hash_count() - count the number of orig entries
310 * @bat_priv: the bat priv with all the soft interface information 312 * @bat_priv: the bat priv with all the soft interface information
311 * @addr: the mac address of the client to count entries for 313 * @addr: the mac address of the client to count entries for
312 * @vid: VLAN identifier 314 * @vid: VLAN identifier
@@ -331,8 +333,8 @@ int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
331} 333}
332 334
333/** 335/**
334 * batadv_tt_local_size_mod - change the size by v of the local table identified 336 * batadv_tt_local_size_mod() - change the size by v of the local table
335 * by vid 337 * identified by vid
336 * @bat_priv: the bat priv with all the soft interface information 338 * @bat_priv: the bat priv with all the soft interface information
337 * @vid: the VLAN identifier of the sub-table to change 339 * @vid: the VLAN identifier of the sub-table to change
338 * @v: the amount to sum to the local table size 340 * @v: the amount to sum to the local table size
@@ -352,8 +354,8 @@ static void batadv_tt_local_size_mod(struct batadv_priv *bat_priv,
352} 354}
353 355
354/** 356/**
355 * batadv_tt_local_size_inc - increase by one the local table size for the given 357 * batadv_tt_local_size_inc() - increase by one the local table size for the
356 * vid 358 * given vid
357 * @bat_priv: the bat priv with all the soft interface information 359 * @bat_priv: the bat priv with all the soft interface information
358 * @vid: the VLAN identifier 360 * @vid: the VLAN identifier
359 */ 361 */
@@ -364,8 +366,8 @@ static void batadv_tt_local_size_inc(struct batadv_priv *bat_priv,
364} 366}
365 367
366/** 368/**
367 * batadv_tt_local_size_dec - decrease by one the local table size for the given 369 * batadv_tt_local_size_dec() - decrease by one the local table size for the
368 * vid 370 * given vid
369 * @bat_priv: the bat priv with all the soft interface information 371 * @bat_priv: the bat priv with all the soft interface information
370 * @vid: the VLAN identifier 372 * @vid: the VLAN identifier
371 */ 373 */
@@ -376,7 +378,7 @@ static void batadv_tt_local_size_dec(struct batadv_priv *bat_priv,
376} 378}
377 379
378/** 380/**
379 * batadv_tt_global_size_mod - change the size by v of the global table 381 * batadv_tt_global_size_mod() - change the size by v of the global table
380 * for orig_node identified by vid 382 * for orig_node identified by vid
381 * @orig_node: the originator for which the table has to be modified 383 * @orig_node: the originator for which the table has to be modified
382 * @vid: the VLAN identifier 384 * @vid: the VLAN identifier
@@ -404,7 +406,7 @@ static void batadv_tt_global_size_mod(struct batadv_orig_node *orig_node,
404} 406}
405 407
406/** 408/**
407 * batadv_tt_global_size_inc - increase by one the global table size for the 409 * batadv_tt_global_size_inc() - increase by one the global table size for the
408 * given vid 410 * given vid
409 * @orig_node: the originator which global table size has to be decreased 411 * @orig_node: the originator which global table size has to be decreased
410 * @vid: the vlan identifier 412 * @vid: the vlan identifier
@@ -416,7 +418,7 @@ static void batadv_tt_global_size_inc(struct batadv_orig_node *orig_node,
416} 418}
417 419
418/** 420/**
419 * batadv_tt_global_size_dec - decrease by one the global table size for the 421 * batadv_tt_global_size_dec() - decrease by one the global table size for the
420 * given vid 422 * given vid
421 * @orig_node: the originator which global table size has to be decreased 423 * @orig_node: the originator which global table size has to be decreased
422 * @vid: the vlan identifier 424 * @vid: the vlan identifier
@@ -428,7 +430,7 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node,
428} 430}
429 431
430/** 432/**
431 * batadv_tt_orig_list_entry_free_rcu - free the orig_entry 433 * batadv_tt_orig_list_entry_free_rcu() - free the orig_entry
432 * @rcu: rcu pointer of the orig_entry 434 * @rcu: rcu pointer of the orig_entry
433 */ 435 */
434static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) 436static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
@@ -441,7 +443,7 @@ static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
441} 443}
442 444
443/** 445/**
444 * batadv_tt_orig_list_entry_release - release tt orig entry from lists and 446 * batadv_tt_orig_list_entry_release() - release tt orig entry from lists and
445 * queue for free after rcu grace period 447 * queue for free after rcu grace period
446 * @ref: kref pointer of the tt orig entry 448 * @ref: kref pointer of the tt orig entry
447 */ 449 */
@@ -457,7 +459,7 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref)
457} 459}
458 460
459/** 461/**
460 * batadv_tt_orig_list_entry_put - decrement the tt orig entry refcounter and 462 * batadv_tt_orig_list_entry_put() - decrement the tt orig entry refcounter and
461 * possibly release it 463 * possibly release it
462 * @orig_entry: tt orig entry to be free'd 464 * @orig_entry: tt orig entry to be free'd
463 */ 465 */
@@ -468,7 +470,7 @@ batadv_tt_orig_list_entry_put(struct batadv_tt_orig_list_entry *orig_entry)
468} 470}
469 471
470/** 472/**
471 * batadv_tt_local_event - store a local TT event (ADD/DEL) 473 * batadv_tt_local_event() - store a local TT event (ADD/DEL)
472 * @bat_priv: the bat priv with all the soft interface information 474 * @bat_priv: the bat priv with all the soft interface information
473 * @tt_local_entry: the TT entry involved in the event 475 * @tt_local_entry: the TT entry involved in the event
474 * @event_flags: flags to store in the event structure 476 * @event_flags: flags to store in the event structure
@@ -543,7 +545,7 @@ unlock:
543} 545}
544 546
545/** 547/**
546 * batadv_tt_len - compute length in bytes of given number of tt changes 548 * batadv_tt_len() - compute length in bytes of given number of tt changes
547 * @changes_num: number of tt changes 549 * @changes_num: number of tt changes
548 * 550 *
549 * Return: computed length in bytes. 551 * Return: computed length in bytes.
@@ -554,7 +556,7 @@ static int batadv_tt_len(int changes_num)
554} 556}
555 557
556/** 558/**
557 * batadv_tt_entries - compute the number of entries fitting in tt_len bytes 559 * batadv_tt_entries() - compute the number of entries fitting in tt_len bytes
558 * @tt_len: available space 560 * @tt_len: available space
559 * 561 *
560 * Return: the number of entries. 562 * Return: the number of entries.
@@ -565,8 +567,8 @@ static u16 batadv_tt_entries(u16 tt_len)
565} 567}
566 568
567/** 569/**
568 * batadv_tt_local_table_transmit_size - calculates the local translation table 570 * batadv_tt_local_table_transmit_size() - calculates the local translation
569 * size when transmitted over the air 571 * table size when transmitted over the air
570 * @bat_priv: the bat priv with all the soft interface information 572 * @bat_priv: the bat priv with all the soft interface information
571 * 573 *
572 * Return: local translation table size in bytes. 574 * Return: local translation table size in bytes.
@@ -625,7 +627,7 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
625} 627}
626 628
627/** 629/**
628 * batadv_tt_local_add - add a new client to the local table or update an 630 * batadv_tt_local_add() - add a new client to the local table or update an
629 * existing client 631 * existing client
630 * @soft_iface: netdev struct of the mesh interface 632 * @soft_iface: netdev struct of the mesh interface
631 * @addr: the mac address of the client to add 633 * @addr: the mac address of the client to add
@@ -830,7 +832,7 @@ out:
830} 832}
831 833
832/** 834/**
833 * batadv_tt_prepare_tvlv_global_data - prepare the TVLV TT header to send 835 * batadv_tt_prepare_tvlv_global_data() - prepare the TVLV TT header to send
834 * within a TT Response directed to another node 836 * within a TT Response directed to another node
835 * @orig_node: originator for which the TT data has to be prepared 837 * @orig_node: originator for which the TT data has to be prepared
836 * @tt_data: uninitialised pointer to the address of the TVLV buffer 838 * @tt_data: uninitialised pointer to the address of the TVLV buffer
@@ -903,8 +905,8 @@ out:
903} 905}
904 906
905/** 907/**
906 * batadv_tt_prepare_tvlv_local_data - allocate and prepare the TT TVLV for this 908 * batadv_tt_prepare_tvlv_local_data() - allocate and prepare the TT TVLV for
907 * node 909 * this node
908 * @bat_priv: the bat priv with all the soft interface information 910 * @bat_priv: the bat priv with all the soft interface information
909 * @tt_data: uninitialised pointer to the address of the TVLV buffer 911 * @tt_data: uninitialised pointer to the address of the TVLV buffer
910 * @tt_change: uninitialised pointer to the address of the area where the TT 912 * @tt_change: uninitialised pointer to the address of the area where the TT
@@ -977,8 +979,8 @@ out:
977} 979}
978 980
979/** 981/**
980 * batadv_tt_tvlv_container_update - update the translation table tvlv container 982 * batadv_tt_tvlv_container_update() - update the translation table tvlv
981 * after local tt changes have been committed 983 * container after local tt changes have been committed
982 * @bat_priv: the bat priv with all the soft interface information 984 * @bat_priv: the bat priv with all the soft interface information
983 */ 985 */
984static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) 986static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv)
@@ -1053,6 +1055,14 @@ container_register:
1053} 1055}
1054 1056
1055#ifdef CONFIG_BATMAN_ADV_DEBUGFS 1057#ifdef CONFIG_BATMAN_ADV_DEBUGFS
1058
1059/**
1060 * batadv_tt_local_seq_print_text() - Print the local tt table in a seq file
1061 * @seq: seq file to print on
1062 * @offset: not used
1063 *
1064 * Return: always 0
1065 */
1056int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) 1066int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
1057{ 1067{
1058 struct net_device *net_dev = (struct net_device *)seq->private; 1068 struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1123,7 +1133,7 @@ out:
1123#endif 1133#endif
1124 1134
1125/** 1135/**
1126 * batadv_tt_local_dump_entry - Dump one TT local entry into a message 1136 * batadv_tt_local_dump_entry() - Dump one TT local entry into a message
1127 * @msg :Netlink message to dump into 1137 * @msg :Netlink message to dump into
1128 * @portid: Port making netlink request 1138 * @portid: Port making netlink request
1129 * @seq: Sequence number of netlink message 1139 * @seq: Sequence number of netlink message
@@ -1179,7 +1189,7 @@ batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
1179} 1189}
1180 1190
1181/** 1191/**
1182 * batadv_tt_local_dump_bucket - Dump one TT local bucket into a message 1192 * batadv_tt_local_dump_bucket() - Dump one TT local bucket into a message
1183 * @msg: Netlink message to dump into 1193 * @msg: Netlink message to dump into
1184 * @portid: Port making netlink request 1194 * @portid: Port making netlink request
1185 * @seq: Sequence number of netlink message 1195 * @seq: Sequence number of netlink message
@@ -1216,7 +1226,7 @@ batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
1216} 1226}
1217 1227
1218/** 1228/**
1219 * batadv_tt_local_dump - Dump TT local entries into a message 1229 * batadv_tt_local_dump() - Dump TT local entries into a message
1220 * @msg: Netlink message to dump into 1230 * @msg: Netlink message to dump into
1221 * @cb: Parameters from query 1231 * @cb: Parameters from query
1222 * 1232 *
@@ -1300,7 +1310,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
1300} 1310}
1301 1311
1302/** 1312/**
1303 * batadv_tt_local_remove - logically remove an entry from the local table 1313 * batadv_tt_local_remove() - logically remove an entry from the local table
1304 * @bat_priv: the bat priv with all the soft interface information 1314 * @bat_priv: the bat priv with all the soft interface information
1305 * @addr: the MAC address of the client to remove 1315 * @addr: the MAC address of the client to remove
1306 * @vid: VLAN identifier 1316 * @vid: VLAN identifier
@@ -1362,7 +1372,7 @@ out:
1362} 1372}
1363 1373
1364/** 1374/**
1365 * batadv_tt_local_purge_list - purge inactive tt local entries 1375 * batadv_tt_local_purge_list() - purge inactive tt local entries
1366 * @bat_priv: the bat priv with all the soft interface information 1376 * @bat_priv: the bat priv with all the soft interface information
1367 * @head: pointer to the list containing the local tt entries 1377 * @head: pointer to the list containing the local tt entries
1368 * @timeout: parameter deciding whether a given tt local entry is considered 1378 * @timeout: parameter deciding whether a given tt local entry is considered
@@ -1397,7 +1407,7 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv,
1397} 1407}
1398 1408
1399/** 1409/**
1400 * batadv_tt_local_purge - purge inactive tt local entries 1410 * batadv_tt_local_purge() - purge inactive tt local entries
1401 * @bat_priv: the bat priv with all the soft interface information 1411 * @bat_priv: the bat priv with all the soft interface information
1402 * @timeout: parameter deciding whether a given tt local entry is considered 1412 * @timeout: parameter deciding whether a given tt local entry is considered
1403 * inactive or not 1413 * inactive or not
@@ -1490,7 +1500,7 @@ static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv)
1490} 1500}
1491 1501
1492/** 1502/**
1493 * batadv_tt_global_orig_entry_find - find a TT orig_list_entry 1503 * batadv_tt_global_orig_entry_find() - find a TT orig_list_entry
1494 * @entry: the TT global entry where the orig_list_entry has to be 1504 * @entry: the TT global entry where the orig_list_entry has to be
1495 * extracted from 1505 * extracted from
1496 * @orig_node: the originator for which the orig_list_entry has to be found 1506 * @orig_node: the originator for which the orig_list_entry has to be found
@@ -1524,8 +1534,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
1524} 1534}
1525 1535
1526/** 1536/**
1527 * batadv_tt_global_entry_has_orig - check if a TT global entry is also handled 1537 * batadv_tt_global_entry_has_orig() - check if a TT global entry is also
1528 * by a given originator 1538 * handled by a given originator
1529 * @entry: the TT global entry to check 1539 * @entry: the TT global entry to check
1530 * @orig_node: the originator to search in the list 1540 * @orig_node: the originator to search in the list
1531 * 1541 *
@@ -1550,7 +1560,7 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
1550} 1560}
1551 1561
1552/** 1562/**
1553 * batadv_tt_global_sync_flags - update TT sync flags 1563 * batadv_tt_global_sync_flags() - update TT sync flags
1554 * @tt_global: the TT global entry to update sync flags in 1564 * @tt_global: the TT global entry to update sync flags in
1555 * 1565 *
1556 * Updates the sync flag bits in the tt_global flag attribute with a logical 1566 * Updates the sync flag bits in the tt_global flag attribute with a logical
@@ -1574,7 +1584,7 @@ batadv_tt_global_sync_flags(struct batadv_tt_global_entry *tt_global)
1574} 1584}
1575 1585
1576/** 1586/**
1577 * batadv_tt_global_orig_entry_add - add or update a TT orig entry 1587 * batadv_tt_global_orig_entry_add() - add or update a TT orig entry
1578 * @tt_global: the TT global entry to add an orig entry in 1588 * @tt_global: the TT global entry to add an orig entry in
1579 * @orig_node: the originator to add an orig entry for 1589 * @orig_node: the originator to add an orig entry for
1580 * @ttvn: translation table version number of this changeset 1590 * @ttvn: translation table version number of this changeset
@@ -1624,7 +1634,7 @@ out:
1624} 1634}
1625 1635
1626/** 1636/**
1627 * batadv_tt_global_add - add a new TT global entry or update an existing one 1637 * batadv_tt_global_add() - add a new TT global entry or update an existing one
1628 * @bat_priv: the bat priv with all the soft interface information 1638 * @bat_priv: the bat priv with all the soft interface information
1629 * @orig_node: the originator announcing the client 1639 * @orig_node: the originator announcing the client
1630 * @tt_addr: the mac address of the non-mesh client 1640 * @tt_addr: the mac address of the non-mesh client
@@ -1796,7 +1806,7 @@ out:
1796} 1806}
1797 1807
1798/** 1808/**
1799 * batadv_transtable_best_orig - Get best originator list entry from tt entry 1809 * batadv_transtable_best_orig() - Get best originator list entry from tt entry
1800 * @bat_priv: the bat priv with all the soft interface information 1810 * @bat_priv: the bat priv with all the soft interface information
1801 * @tt_global_entry: global translation table entry to be analyzed 1811 * @tt_global_entry: global translation table entry to be analyzed
1802 * 1812 *
@@ -1842,8 +1852,8 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
1842 1852
1843#ifdef CONFIG_BATMAN_ADV_DEBUGFS 1853#ifdef CONFIG_BATMAN_ADV_DEBUGFS
1844/** 1854/**
1845 * batadv_tt_global_print_entry - print all orig nodes who announce the address 1855 * batadv_tt_global_print_entry() - print all orig nodes who announce the
1846 * for this global entry 1856 * address for this global entry
1847 * @bat_priv: the bat priv with all the soft interface information 1857 * @bat_priv: the bat priv with all the soft interface information
1848 * @tt_global_entry: global translation table entry to be printed 1858 * @tt_global_entry: global translation table entry to be printed
1849 * @seq: debugfs table seq_file struct 1859 * @seq: debugfs table seq_file struct
@@ -1925,6 +1935,13 @@ print_list:
1925 } 1935 }
1926} 1936}
1927 1937
1938/**
1939 * batadv_tt_global_seq_print_text() - Print the global tt table in a seq file
1940 * @seq: seq file to print on
1941 * @offset: not used
1942 *
1943 * Return: always 0
1944 */
1928int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) 1945int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
1929{ 1946{
1930 struct net_device *net_dev = (struct net_device *)seq->private; 1947 struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1967,7 +1984,7 @@ out:
1967#endif 1984#endif
1968 1985
1969/** 1986/**
1970 * batadv_tt_global_dump_subentry - Dump all TT local entries into a message 1987 * batadv_tt_global_dump_subentry() - Dump all TT local entries into a message
1971 * @msg: Netlink message to dump into 1988 * @msg: Netlink message to dump into
1972 * @portid: Port making netlink request 1989 * @portid: Port making netlink request
1973 * @seq: Sequence number of netlink message 1990 * @seq: Sequence number of netlink message
@@ -2028,7 +2045,7 @@ batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
2028} 2045}
2029 2046
2030/** 2047/**
2031 * batadv_tt_global_dump_entry - Dump one TT global entry into a message 2048 * batadv_tt_global_dump_entry() - Dump one TT global entry into a message
2032 * @msg: Netlink message to dump into 2049 * @msg: Netlink message to dump into
2033 * @portid: Port making netlink request 2050 * @portid: Port making netlink request
2034 * @seq: Sequence number of netlink message 2051 * @seq: Sequence number of netlink message
@@ -2073,7 +2090,7 @@ batadv_tt_global_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
2073} 2090}
2074 2091
2075/** 2092/**
2076 * batadv_tt_global_dump_bucket - Dump one TT local bucket into a message 2093 * batadv_tt_global_dump_bucket() - Dump one TT local bucket into a message
2077 * @msg: Netlink message to dump into 2094 * @msg: Netlink message to dump into
2078 * @portid: Port making netlink request 2095 * @portid: Port making netlink request
2079 * @seq: Sequence number of netlink message 2096 * @seq: Sequence number of netlink message
@@ -2112,7 +2129,7 @@ batadv_tt_global_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
2112} 2129}
2113 2130
2114/** 2131/**
2115 * batadv_tt_global_dump - Dump TT global entries into a message 2132 * batadv_tt_global_dump() - Dump TT global entries into a message
2116 * @msg: Netlink message to dump into 2133 * @msg: Netlink message to dump into
2117 * @cb: Parameters from query 2134 * @cb: Parameters from query
2118 * 2135 *
@@ -2180,7 +2197,7 @@ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb)
2180} 2197}
2181 2198
2182/** 2199/**
2183 * _batadv_tt_global_del_orig_entry - remove and free an orig_entry 2200 * _batadv_tt_global_del_orig_entry() - remove and free an orig_entry
2184 * @tt_global_entry: the global entry to remove the orig_entry from 2201 * @tt_global_entry: the global entry to remove the orig_entry from
2185 * @orig_entry: the orig entry to remove and free 2202 * @orig_entry: the orig entry to remove and free
2186 * 2203 *
@@ -2222,7 +2239,7 @@ batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry)
2222} 2239}
2223 2240
2224/** 2241/**
2225 * batadv_tt_global_del_orig_node - remove orig_node from a global tt entry 2242 * batadv_tt_global_del_orig_node() - remove orig_node from a global tt entry
2226 * @bat_priv: the bat priv with all the soft interface information 2243 * @bat_priv: the bat priv with all the soft interface information
2227 * @tt_global_entry: the global entry to remove the orig_node from 2244 * @tt_global_entry: the global entry to remove the orig_node from
2228 * @orig_node: the originator announcing the client 2245 * @orig_node: the originator announcing the client
@@ -2301,7 +2318,7 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv,
2301} 2318}
2302 2319
2303/** 2320/**
2304 * batadv_tt_global_del - remove a client from the global table 2321 * batadv_tt_global_del() - remove a client from the global table
2305 * @bat_priv: the bat priv with all the soft interface information 2322 * @bat_priv: the bat priv with all the soft interface information
2306 * @orig_node: an originator serving this client 2323 * @orig_node: an originator serving this client
2307 * @addr: the mac address of the client 2324 * @addr: the mac address of the client
@@ -2367,8 +2384,8 @@ out:
2367} 2384}
2368 2385
2369/** 2386/**
2370 * batadv_tt_global_del_orig - remove all the TT global entries belonging to the 2387 * batadv_tt_global_del_orig() - remove all the TT global entries belonging to
2371 * given originator matching the provided vid 2388 * the given originator matching the provided vid
2372 * @bat_priv: the bat priv with all the soft interface information 2389 * @bat_priv: the bat priv with all the soft interface information
2373 * @orig_node: the originator owning the entries to remove 2390 * @orig_node: the originator owning the entries to remove
2374 * @match_vid: the VLAN identifier to match. If negative all the entries will be 2391 * @match_vid: the VLAN identifier to match. If negative all the entries will be
@@ -2539,7 +2556,7 @@ _batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry,
2539} 2556}
2540 2557
2541/** 2558/**
2542 * batadv_transtable_search - get the mesh destination for a given client 2559 * batadv_transtable_search() - get the mesh destination for a given client
2543 * @bat_priv: the bat priv with all the soft interface information 2560 * @bat_priv: the bat priv with all the soft interface information
2544 * @src: mac address of the source client 2561 * @src: mac address of the source client
2545 * @addr: mac address of the destination client 2562 * @addr: mac address of the destination client
@@ -2599,7 +2616,7 @@ out:
2599} 2616}
2600 2617
2601/** 2618/**
2602 * batadv_tt_global_crc - calculates the checksum of the local table belonging 2619 * batadv_tt_global_crc() - calculates the checksum of the local table belonging
2603 * to the given orig_node 2620 * to the given orig_node
2604 * @bat_priv: the bat priv with all the soft interface information 2621 * @bat_priv: the bat priv with all the soft interface information
2605 * @orig_node: originator for which the CRC should be computed 2622 * @orig_node: originator for which the CRC should be computed
@@ -2694,7 +2711,7 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
2694} 2711}
2695 2712
2696/** 2713/**
2697 * batadv_tt_local_crc - calculates the checksum of the local table 2714 * batadv_tt_local_crc() - calculates the checksum of the local table
2698 * @bat_priv: the bat priv with all the soft interface information 2715 * @bat_priv: the bat priv with all the soft interface information
2699 * @vid: VLAN identifier for which the CRC32 has to be computed 2716 * @vid: VLAN identifier for which the CRC32 has to be computed
2700 * 2717 *
@@ -2751,7 +2768,7 @@ static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv,
2751} 2768}
2752 2769
2753/** 2770/**
2754 * batadv_tt_req_node_release - free tt_req node entry 2771 * batadv_tt_req_node_release() - free tt_req node entry
2755 * @ref: kref pointer of the tt req_node entry 2772 * @ref: kref pointer of the tt req_node entry
2756 */ 2773 */
2757static void batadv_tt_req_node_release(struct kref *ref) 2774static void batadv_tt_req_node_release(struct kref *ref)
@@ -2764,7 +2781,7 @@ static void batadv_tt_req_node_release(struct kref *ref)
2764} 2781}
2765 2782
2766/** 2783/**
2767 * batadv_tt_req_node_put - decrement the tt_req_node refcounter and 2784 * batadv_tt_req_node_put() - decrement the tt_req_node refcounter and
2768 * possibly release it 2785 * possibly release it
2769 * @tt_req_node: tt_req_node to be free'd 2786 * @tt_req_node: tt_req_node to be free'd
2770 */ 2787 */
@@ -2826,7 +2843,7 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv)
2826} 2843}
2827 2844
2828/** 2845/**
2829 * batadv_tt_req_node_new - search and possibly create a tt_req_node object 2846 * batadv_tt_req_node_new() - search and possibly create a tt_req_node object
2830 * @bat_priv: the bat priv with all the soft interface information 2847 * @bat_priv: the bat priv with all the soft interface information
2831 * @orig_node: orig node this request is being issued for 2848 * @orig_node: orig node this request is being issued for
2832 * 2849 *
@@ -2863,7 +2880,7 @@ unlock:
2863} 2880}
2864 2881
2865/** 2882/**
2866 * batadv_tt_local_valid - verify that given tt entry is a valid one 2883 * batadv_tt_local_valid() - verify that given tt entry is a valid one
2867 * @entry_ptr: to be checked local tt entry 2884 * @entry_ptr: to be checked local tt entry
2868 * @data_ptr: not used but definition required to satisfy the callback prototype 2885 * @data_ptr: not used but definition required to satisfy the callback prototype
2869 * 2886 *
@@ -2897,7 +2914,7 @@ static bool batadv_tt_global_valid(const void *entry_ptr,
2897} 2914}
2898 2915
2899/** 2916/**
2900 * batadv_tt_tvlv_generate - fill the tvlv buff with the tt entries from the 2917 * batadv_tt_tvlv_generate() - fill the tvlv buff with the tt entries from the
2901 * specified tt hash 2918 * specified tt hash
2902 * @bat_priv: the bat priv with all the soft interface information 2919 * @bat_priv: the bat priv with all the soft interface information
2903 * @hash: hash table containing the tt entries 2920 * @hash: hash table containing the tt entries
@@ -2948,7 +2965,7 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
2948} 2965}
2949 2966
2950/** 2967/**
2951 * batadv_tt_global_check_crc - check if all the CRCs are correct 2968 * batadv_tt_global_check_crc() - check if all the CRCs are correct
2952 * @orig_node: originator for which the CRCs have to be checked 2969 * @orig_node: originator for which the CRCs have to be checked
2953 * @tt_vlan: pointer to the first tvlv VLAN entry 2970 * @tt_vlan: pointer to the first tvlv VLAN entry
2954 * @num_vlan: number of tvlv VLAN entries 2971 * @num_vlan: number of tvlv VLAN entries
@@ -3005,7 +3022,7 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
3005} 3022}
3006 3023
3007/** 3024/**
3008 * batadv_tt_local_update_crc - update all the local CRCs 3025 * batadv_tt_local_update_crc() - update all the local CRCs
3009 * @bat_priv: the bat priv with all the soft interface information 3026 * @bat_priv: the bat priv with all the soft interface information
3010 */ 3027 */
3011static void batadv_tt_local_update_crc(struct batadv_priv *bat_priv) 3028static void batadv_tt_local_update_crc(struct batadv_priv *bat_priv)
@@ -3021,7 +3038,7 @@ static void batadv_tt_local_update_crc(struct batadv_priv *bat_priv)
3021} 3038}
3022 3039
3023/** 3040/**
3024 * batadv_tt_global_update_crc - update all the global CRCs for this orig_node 3041 * batadv_tt_global_update_crc() - update all the global CRCs for this orig_node
3025 * @bat_priv: the bat priv with all the soft interface information 3042 * @bat_priv: the bat priv with all the soft interface information
3026 * @orig_node: the orig_node for which the CRCs have to be updated 3043 * @orig_node: the orig_node for which the CRCs have to be updated
3027 */ 3044 */
@@ -3048,7 +3065,7 @@ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv,
3048} 3065}
3049 3066
3050/** 3067/**
3051 * batadv_send_tt_request - send a TT Request message to a given node 3068 * batadv_send_tt_request() - send a TT Request message to a given node
3052 * @bat_priv: the bat priv with all the soft interface information 3069 * @bat_priv: the bat priv with all the soft interface information
3053 * @dst_orig_node: the destination of the message 3070 * @dst_orig_node: the destination of the message
3054 * @ttvn: the version number that the source of the message is looking for 3071 * @ttvn: the version number that the source of the message is looking for
@@ -3137,7 +3154,7 @@ out:
3137} 3154}
3138 3155
3139/** 3156/**
3140 * batadv_send_other_tt_response - send reply to tt request concerning another 3157 * batadv_send_other_tt_response() - send reply to tt request concerning another
3141 * node's translation table 3158 * node's translation table
3142 * @bat_priv: the bat priv with all the soft interface information 3159 * @bat_priv: the bat priv with all the soft interface information
3143 * @tt_data: tt data containing the tt request information 3160 * @tt_data: tt data containing the tt request information
@@ -3270,8 +3287,8 @@ out:
3270} 3287}
3271 3288
3272/** 3289/**
3273 * batadv_send_my_tt_response - send reply to tt request concerning this node's 3290 * batadv_send_my_tt_response() - send reply to tt request concerning this
3274 * translation table 3291 * node's translation table
3275 * @bat_priv: the bat priv with all the soft interface information 3292 * @bat_priv: the bat priv with all the soft interface information
3276 * @tt_data: tt data containing the tt request information 3293 * @tt_data: tt data containing the tt request information
3277 * @req_src: mac address of tt request sender 3294 * @req_src: mac address of tt request sender
@@ -3388,7 +3405,7 @@ out:
3388} 3405}
3389 3406
3390/** 3407/**
3391 * batadv_send_tt_response - send reply to tt request 3408 * batadv_send_tt_response() - send reply to tt request
3392 * @bat_priv: the bat priv with all the soft interface information 3409 * @bat_priv: the bat priv with all the soft interface information
3393 * @tt_data: tt data containing the tt request information 3410 * @tt_data: tt data containing the tt request information
3394 * @req_src: mac address of tt request sender 3411 * @req_src: mac address of tt request sender
@@ -3484,7 +3501,7 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv,
3484} 3501}
3485 3502
3486/** 3503/**
3487 * batadv_is_my_client - check if a client is served by the local node 3504 * batadv_is_my_client() - check if a client is served by the local node
3488 * @bat_priv: the bat priv with all the soft interface information 3505 * @bat_priv: the bat priv with all the soft interface information
3489 * @addr: the mac address of the client to check 3506 * @addr: the mac address of the client to check
3490 * @vid: VLAN identifier 3507 * @vid: VLAN identifier
@@ -3514,7 +3531,7 @@ out:
3514} 3531}
3515 3532
3516/** 3533/**
3517 * batadv_handle_tt_response - process incoming tt reply 3534 * batadv_handle_tt_response() - process incoming tt reply
3518 * @bat_priv: the bat priv with all the soft interface information 3535 * @bat_priv: the bat priv with all the soft interface information
3519 * @tt_data: tt data containing the tt request information 3536 * @tt_data: tt data containing the tt request information
3520 * @resp_src: mac address of tt reply sender 3537 * @resp_src: mac address of tt reply sender
@@ -3607,7 +3624,7 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv)
3607} 3624}
3608 3625
3609/** 3626/**
3610 * batadv_tt_check_roam_count - check if a client has roamed too frequently 3627 * batadv_tt_check_roam_count() - check if a client has roamed too frequently
3611 * @bat_priv: the bat priv with all the soft interface information 3628 * @bat_priv: the bat priv with all the soft interface information
3612 * @client: mac address of the roaming client 3629 * @client: mac address of the roaming client
3613 * 3630 *
@@ -3662,7 +3679,7 @@ unlock:
3662} 3679}
3663 3680
3664/** 3681/**
3665 * batadv_send_roam_adv - send a roaming advertisement message 3682 * batadv_send_roam_adv() - send a roaming advertisement message
3666 * @bat_priv: the bat priv with all the soft interface information 3683 * @bat_priv: the bat priv with all the soft interface information
3667 * @client: mac address of the roaming client 3684 * @client: mac address of the roaming client
3668 * @vid: VLAN identifier 3685 * @vid: VLAN identifier
@@ -3727,6 +3744,10 @@ static void batadv_tt_purge(struct work_struct *work)
3727 msecs_to_jiffies(BATADV_TT_WORK_PERIOD)); 3744 msecs_to_jiffies(BATADV_TT_WORK_PERIOD));
3728} 3745}
3729 3746
3747/**
3748 * batadv_tt_free() - Free translation table of soft interface
3749 * @bat_priv: the bat priv with all the soft interface information
3750 */
3730void batadv_tt_free(struct batadv_priv *bat_priv) 3751void batadv_tt_free(struct batadv_priv *bat_priv)
3731{ 3752{
3732 batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_TT, 1); 3753 batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_TT, 1);
@@ -3744,7 +3765,7 @@ void batadv_tt_free(struct batadv_priv *bat_priv)
3744} 3765}
3745 3766
3746/** 3767/**
3747 * batadv_tt_local_set_flags - set or unset the specified flags on the local 3768 * batadv_tt_local_set_flags() - set or unset the specified flags on the local
3748 * table and possibly count them in the TT size 3769 * table and possibly count them in the TT size
3749 * @bat_priv: the bat priv with all the soft interface information 3770 * @bat_priv: the bat priv with all the soft interface information
3750 * @flags: the flag to switch 3771 * @flags: the flag to switch
@@ -3830,7 +3851,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
3830} 3851}
3831 3852
3832/** 3853/**
3833 * batadv_tt_local_commit_changes_nolock - commit all pending local tt changes 3854 * batadv_tt_local_commit_changes_nolock() - commit all pending local tt changes
3834 * which have been queued in the time since the last commit 3855 * which have been queued in the time since the last commit
3835 * @bat_priv: the bat priv with all the soft interface information 3856 * @bat_priv: the bat priv with all the soft interface information
3836 * 3857 *
@@ -3863,7 +3884,7 @@ static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv)
3863} 3884}
3864 3885
3865/** 3886/**
3866 * batadv_tt_local_commit_changes - commit all pending local tt changes which 3887 * batadv_tt_local_commit_changes() - commit all pending local tt changes which
3867 * have been queued in the time since the last commit 3888 * have been queued in the time since the last commit
3868 * @bat_priv: the bat priv with all the soft interface information 3889 * @bat_priv: the bat priv with all the soft interface information
3869 */ 3890 */
@@ -3874,6 +3895,15 @@ void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv)
3874 spin_unlock_bh(&bat_priv->tt.commit_lock); 3895 spin_unlock_bh(&bat_priv->tt.commit_lock);
3875} 3896}
3876 3897
3898/**
3899 * batadv_is_ap_isolated() - Check if packet from upper layer should be dropped
3900 * @bat_priv: the bat priv with all the soft interface information
3901 * @src: source mac address of packet
3902 * @dst: destination mac address of packet
3903 * @vid: vlan id of packet
3904 *
3905 * Return: true when src+dst(+vid) pair should be isolated, false otherwise
3906 */
3877bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst, 3907bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst,
3878 unsigned short vid) 3908 unsigned short vid)
3879{ 3909{
@@ -3909,7 +3939,7 @@ vlan_put:
3909} 3939}
3910 3940
3911/** 3941/**
3912 * batadv_tt_update_orig - update global translation table with new tt 3942 * batadv_tt_update_orig() - update global translation table with new tt
3913 * information received via ogms 3943 * information received via ogms
3914 * @bat_priv: the bat priv with all the soft interface information 3944 * @bat_priv: the bat priv with all the soft interface information
3915 * @orig_node: the orig_node of the ogm 3945 * @orig_node: the orig_node of the ogm
@@ -3994,7 +4024,7 @@ request_table:
3994} 4024}
3995 4025
3996/** 4026/**
3997 * batadv_tt_global_client_is_roaming - check if a client is marked as roaming 4027 * batadv_tt_global_client_is_roaming() - check if a client is marked as roaming
3998 * @bat_priv: the bat priv with all the soft interface information 4028 * @bat_priv: the bat priv with all the soft interface information
3999 * @addr: the mac address of the client to check 4029 * @addr: the mac address of the client to check
4000 * @vid: VLAN identifier 4030 * @vid: VLAN identifier
@@ -4020,7 +4050,7 @@ out:
4020} 4050}
4021 4051
4022/** 4052/**
4023 * batadv_tt_local_client_is_roaming - tells whether the client is roaming 4053 * batadv_tt_local_client_is_roaming() - tells whether the client is roaming
4024 * @bat_priv: the bat priv with all the soft interface information 4054 * @bat_priv: the bat priv with all the soft interface information
4025 * @addr: the mac address of the local client to query 4055 * @addr: the mac address of the local client to query
4026 * @vid: VLAN identifier 4056 * @vid: VLAN identifier
@@ -4045,6 +4075,15 @@ out:
4045 return ret; 4075 return ret;
4046} 4076}
4047 4077
4078/**
4079 * batadv_tt_add_temporary_global_entry() - Add temporary entry to global TT
4080 * @bat_priv: the bat priv with all the soft interface information
4081 * @orig_node: orig node which the temporary entry should be associated with
4082 * @addr: mac address of the client
4083 * @vid: VLAN id of the new temporary global translation table
4084 *
4085 * Return: true when temporary tt entry could be added, false otherwise
4086 */
4048bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, 4087bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
4049 struct batadv_orig_node *orig_node, 4088 struct batadv_orig_node *orig_node,
4050 const unsigned char *addr, 4089 const unsigned char *addr,
@@ -4069,7 +4108,7 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
4069} 4108}
4070 4109
4071/** 4110/**
4072 * batadv_tt_local_resize_to_mtu - resize the local translation table fit the 4111 * batadv_tt_local_resize_to_mtu() - resize the local translation table fit the
4073 * maximum packet size that can be transported through the mesh 4112 * maximum packet size that can be transported through the mesh
4074 * @soft_iface: netdev struct of the mesh interface 4113 * @soft_iface: netdev struct of the mesh interface
4075 * 4114 *
@@ -4110,7 +4149,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface)
4110} 4149}
4111 4150
4112/** 4151/**
4113 * batadv_tt_tvlv_ogm_handler_v1 - process incoming tt tvlv container 4152 * batadv_tt_tvlv_ogm_handler_v1() - process incoming tt tvlv container
4114 * @bat_priv: the bat priv with all the soft interface information 4153 * @bat_priv: the bat priv with all the soft interface information
4115 * @orig: the orig_node of the ogm 4154 * @orig: the orig_node of the ogm
4116 * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) 4155 * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
@@ -4149,7 +4188,7 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
4149} 4188}
4150 4189
4151/** 4190/**
4152 * batadv_tt_tvlv_unicast_handler_v1 - process incoming (unicast) tt tvlv 4191 * batadv_tt_tvlv_unicast_handler_v1() - process incoming (unicast) tt tvlv
4153 * container 4192 * container
4154 * @bat_priv: the bat priv with all the soft interface information 4193 * @bat_priv: the bat priv with all the soft interface information
4155 * @src: mac address of tt tvlv sender 4194 * @src: mac address of tt tvlv sender
@@ -4231,7 +4270,8 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
4231} 4270}
4232 4271
4233/** 4272/**
4234 * batadv_roam_tvlv_unicast_handler_v1 - process incoming tt roam tvlv container 4273 * batadv_roam_tvlv_unicast_handler_v1() - process incoming tt roam tvlv
4274 * container
4235 * @bat_priv: the bat priv with all the soft interface information 4275 * @bat_priv: the bat priv with all the soft interface information
4236 * @src: mac address of tt tvlv sender 4276 * @src: mac address of tt tvlv sender
4237 * @dst: mac address of tt tvlv recipient 4277 * @dst: mac address of tt tvlv recipient
@@ -4281,7 +4321,7 @@ out:
4281} 4321}
4282 4322
4283/** 4323/**
4284 * batadv_tt_init - initialise the translation table internals 4324 * batadv_tt_init() - initialise the translation table internals
4285 * @bat_priv: the bat priv with all the soft interface information 4325 * @bat_priv: the bat priv with all the soft interface information
4286 * 4326 *
4287 * Return: 0 on success or negative error number in case of failure. 4327 * Return: 0 on success or negative error number in case of failure.
@@ -4317,7 +4357,7 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
4317} 4357}
4318 4358
4319/** 4359/**
4320 * batadv_tt_global_is_isolated - check if a client is marked as isolated 4360 * batadv_tt_global_is_isolated() - check if a client is marked as isolated
4321 * @bat_priv: the bat priv with all the soft interface information 4361 * @bat_priv: the bat priv with all the soft interface information
4322 * @addr: the mac address of the client 4362 * @addr: the mac address of the client
4323 * @vid: the identifier of the VLAN where this client is connected 4363 * @vid: the identifier of the VLAN where this client is connected
@@ -4343,7 +4383,7 @@ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
4343} 4383}
4344 4384
4345/** 4385/**
4346 * batadv_tt_cache_init - Initialize tt memory object cache 4386 * batadv_tt_cache_init() - Initialize tt memory object cache
4347 * 4387 *
4348 * Return: 0 on success or negative error number in case of failure. 4388 * Return: 0 on success or negative error number in case of failure.
4349 */ 4389 */
@@ -4412,7 +4452,7 @@ err_tt_tl_destroy:
4412} 4452}
4413 4453
4414/** 4454/**
4415 * batadv_tt_cache_destroy - Destroy tt memory object cache 4455 * batadv_tt_cache_destroy() - Destroy tt memory object cache
4416 */ 4456 */
4417void batadv_tt_cache_destroy(void) 4457void batadv_tt_cache_destroy(void)
4418{ 4458{
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 411d586191da..8d9e3abec2c8 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich, Antonio Quartulli 4 * Marek Lindner, Simon Wunderlich, Antonio Quartulli
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 1d9e267caec9..5ffcb45ac6ff 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
@@ -19,7 +20,7 @@
19 20
20#include <linux/byteorder/generic.h> 21#include <linux/byteorder/generic.h>
21#include <linux/etherdevice.h> 22#include <linux/etherdevice.h>
22#include <linux/fs.h> 23#include <linux/gfp.h>
23#include <linux/if_ether.h> 24#include <linux/if_ether.h>
24#include <linux/kernel.h> 25#include <linux/kernel.h>
25#include <linux/kref.h> 26#include <linux/kref.h>
@@ -35,14 +36,14 @@
35#include <linux/stddef.h> 36#include <linux/stddef.h>
36#include <linux/string.h> 37#include <linux/string.h>
37#include <linux/types.h> 38#include <linux/types.h>
39#include <uapi/linux/batadv_packet.h>
38 40
39#include "originator.h" 41#include "originator.h"
40#include "packet.h"
41#include "send.h" 42#include "send.h"
42#include "tvlv.h" 43#include "tvlv.h"
43 44
44/** 45/**
45 * batadv_tvlv_handler_release - release tvlv handler from lists and queue for 46 * batadv_tvlv_handler_release() - release tvlv handler from lists and queue for
46 * free after rcu grace period 47 * free after rcu grace period
47 * @ref: kref pointer of the tvlv 48 * @ref: kref pointer of the tvlv
48 */ 49 */
@@ -55,7 +56,7 @@ static void batadv_tvlv_handler_release(struct kref *ref)
55} 56}
56 57
57/** 58/**
58 * batadv_tvlv_handler_put - decrement the tvlv container refcounter and 59 * batadv_tvlv_handler_put() - decrement the tvlv container refcounter and
59 * possibly release it 60 * possibly release it
60 * @tvlv_handler: the tvlv handler to free 61 * @tvlv_handler: the tvlv handler to free
61 */ 62 */
@@ -65,7 +66,7 @@ static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
65} 66}
66 67
67/** 68/**
68 * batadv_tvlv_handler_get - retrieve tvlv handler from the tvlv handler list 69 * batadv_tvlv_handler_get() - retrieve tvlv handler from the tvlv handler list
69 * based on the provided type and version (both need to match) 70 * based on the provided type and version (both need to match)
70 * @bat_priv: the bat priv with all the soft interface information 71 * @bat_priv: the bat priv with all the soft interface information
71 * @type: tvlv handler type to look for 72 * @type: tvlv handler type to look for
@@ -99,7 +100,7 @@ batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version)
99} 100}
100 101
101/** 102/**
102 * batadv_tvlv_container_release - release tvlv from lists and free 103 * batadv_tvlv_container_release() - release tvlv from lists and free
103 * @ref: kref pointer of the tvlv 104 * @ref: kref pointer of the tvlv
104 */ 105 */
105static void batadv_tvlv_container_release(struct kref *ref) 106static void batadv_tvlv_container_release(struct kref *ref)
@@ -111,7 +112,7 @@ static void batadv_tvlv_container_release(struct kref *ref)
111} 112}
112 113
113/** 114/**
114 * batadv_tvlv_container_put - decrement the tvlv container refcounter and 115 * batadv_tvlv_container_put() - decrement the tvlv container refcounter and
115 * possibly release it 116 * possibly release it
116 * @tvlv: the tvlv container to free 117 * @tvlv: the tvlv container to free
117 */ 118 */
@@ -121,7 +122,7 @@ static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
121} 122}
122 123
123/** 124/**
124 * batadv_tvlv_container_get - retrieve tvlv container from the tvlv container 125 * batadv_tvlv_container_get() - retrieve tvlv container from the tvlv container
125 * list based on the provided type and version (both need to match) 126 * list based on the provided type and version (both need to match)
126 * @bat_priv: the bat priv with all the soft interface information 127 * @bat_priv: the bat priv with all the soft interface information
127 * @type: tvlv container type to look for 128 * @type: tvlv container type to look for
@@ -155,7 +156,7 @@ batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
155} 156}
156 157
157/** 158/**
158 * batadv_tvlv_container_list_size - calculate the size of the tvlv container 159 * batadv_tvlv_container_list_size() - calculate the size of the tvlv container
159 * list entries 160 * list entries
160 * @bat_priv: the bat priv with all the soft interface information 161 * @bat_priv: the bat priv with all the soft interface information
161 * 162 *
@@ -180,8 +181,8 @@ static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
180} 181}
181 182
182/** 183/**
183 * batadv_tvlv_container_remove - remove tvlv container from the tvlv container 184 * batadv_tvlv_container_remove() - remove tvlv container from the tvlv
184 * list 185 * container list
185 * @bat_priv: the bat priv with all the soft interface information 186 * @bat_priv: the bat priv with all the soft interface information
186 * @tvlv: the to be removed tvlv container 187 * @tvlv: the to be removed tvlv container
187 * 188 *
@@ -204,7 +205,7 @@ static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv,
204} 205}
205 206
206/** 207/**
207 * batadv_tvlv_container_unregister - unregister tvlv container based on the 208 * batadv_tvlv_container_unregister() - unregister tvlv container based on the
208 * provided type and version (both need to match) 209 * provided type and version (both need to match)
209 * @bat_priv: the bat priv with all the soft interface information 210 * @bat_priv: the bat priv with all the soft interface information
210 * @type: tvlv container type to unregister 211 * @type: tvlv container type to unregister
@@ -222,7 +223,7 @@ void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
222} 223}
223 224
224/** 225/**
225 * batadv_tvlv_container_register - register tvlv type, version and content 226 * batadv_tvlv_container_register() - register tvlv type, version and content
226 * to be propagated with each (primary interface) OGM 227 * to be propagated with each (primary interface) OGM
227 * @bat_priv: the bat priv with all the soft interface information 228 * @bat_priv: the bat priv with all the soft interface information
228 * @type: tvlv container type 229 * @type: tvlv container type
@@ -267,7 +268,7 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
267} 268}
268 269
269/** 270/**
270 * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate 271 * batadv_tvlv_realloc_packet_buff() - reallocate packet buffer to accommodate
271 * requested packet size 272 * requested packet size
272 * @packet_buff: packet buffer 273 * @packet_buff: packet buffer
273 * @packet_buff_len: packet buffer size 274 * @packet_buff_len: packet buffer size
@@ -300,7 +301,7 @@ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
300} 301}
301 302
302/** 303/**
303 * batadv_tvlv_container_ogm_append - append tvlv container content to given 304 * batadv_tvlv_container_ogm_append() - append tvlv container content to given
304 * OGM packet buffer 305 * OGM packet buffer
305 * @bat_priv: the bat priv with all the soft interface information 306 * @bat_priv: the bat priv with all the soft interface information
306 * @packet_buff: ogm packet buffer 307 * @packet_buff: ogm packet buffer
@@ -353,7 +354,7 @@ end:
353} 354}
354 355
355/** 356/**
356 * batadv_tvlv_call_handler - parse the given tvlv buffer to call the 357 * batadv_tvlv_call_handler() - parse the given tvlv buffer to call the
357 * appropriate handlers 358 * appropriate handlers
358 * @bat_priv: the bat priv with all the soft interface information 359 * @bat_priv: the bat priv with all the soft interface information
359 * @tvlv_handler: tvlv callback function handling the tvlv content 360 * @tvlv_handler: tvlv callback function handling the tvlv content
@@ -407,7 +408,7 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
407} 408}
408 409
409/** 410/**
410 * batadv_tvlv_containers_process - parse the given tvlv buffer to call the 411 * batadv_tvlv_containers_process() - parse the given tvlv buffer to call the
411 * appropriate handlers 412 * appropriate handlers
412 * @bat_priv: the bat priv with all the soft interface information 413 * @bat_priv: the bat priv with all the soft interface information
413 * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet 414 * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
@@ -474,7 +475,7 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
474} 475}
475 476
476/** 477/**
477 * batadv_tvlv_ogm_receive - process an incoming ogm and call the appropriate 478 * batadv_tvlv_ogm_receive() - process an incoming ogm and call the appropriate
478 * handlers 479 * handlers
479 * @bat_priv: the bat priv with all the soft interface information 480 * @bat_priv: the bat priv with all the soft interface information
480 * @batadv_ogm_packet: ogm packet containing the tvlv containers 481 * @batadv_ogm_packet: ogm packet containing the tvlv containers
@@ -501,7 +502,7 @@ void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
501} 502}
502 503
503/** 504/**
504 * batadv_tvlv_handler_register - register tvlv handler based on the provided 505 * batadv_tvlv_handler_register() - register tvlv handler based on the provided
505 * type and version (both need to match) for ogm tvlv payload and/or unicast 506 * type and version (both need to match) for ogm tvlv payload and/or unicast
506 * payload 507 * payload
507 * @bat_priv: the bat priv with all the soft interface information 508 * @bat_priv: the bat priv with all the soft interface information
@@ -556,7 +557,7 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
556} 557}
557 558
558/** 559/**
559 * batadv_tvlv_handler_unregister - unregister tvlv handler based on the 560 * batadv_tvlv_handler_unregister() - unregister tvlv handler based on the
560 * provided type and version (both need to match) 561 * provided type and version (both need to match)
561 * @bat_priv: the bat priv with all the soft interface information 562 * @bat_priv: the bat priv with all the soft interface information
562 * @type: tvlv handler type to be unregistered 563 * @type: tvlv handler type to be unregistered
@@ -579,7 +580,7 @@ void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
579} 580}
580 581
581/** 582/**
582 * batadv_tvlv_unicast_send - send a unicast packet with tvlv payload to the 583 * batadv_tvlv_unicast_send() - send a unicast packet with tvlv payload to the
583 * specified host 584 * specified host
584 * @bat_priv: the bat priv with all the soft interface information 585 * @bat_priv: the bat priv with all the soft interface information
585 * @src: source mac address of the unicast packet 586 * @src: source mac address of the unicast packet
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
index 4d01400ada30..a74df33f446d 100644
--- a/net/batman-adv/tvlv.h
+++ b/net/batman-adv/tvlv.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index a62795868794..bb1578410e0c 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: 2/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
2 * 3 *
3 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
@@ -34,10 +35,9 @@
34#include <linux/types.h> 35#include <linux/types.h>
35#include <linux/wait.h> 36#include <linux/wait.h>
36#include <linux/workqueue.h> 37#include <linux/workqueue.h>
38#include <uapi/linux/batadv_packet.h>
37#include <uapi/linux/batman_adv.h> 39#include <uapi/linux/batman_adv.h>
38 40
39#include "packet.h"
40
41struct seq_file; 41struct seq_file;
42 42
43#ifdef CONFIG_BATMAN_ADV_DAT 43#ifdef CONFIG_BATMAN_ADV_DAT
@@ -54,13 +54,15 @@ struct seq_file;
54 54
55/** 55/**
56 * enum batadv_dhcp_recipient - dhcp destination 56 * enum batadv_dhcp_recipient - dhcp destination
57 * @BATADV_DHCP_NO: packet is not a dhcp message
58 * @BATADV_DHCP_TO_SERVER: dhcp message is directed to a server
59 * @BATADV_DHCP_TO_CLIENT: dhcp message is directed to a client
60 */ 57 */
61enum batadv_dhcp_recipient { 58enum batadv_dhcp_recipient {
59 /** @BATADV_DHCP_NO: packet is not a dhcp message */
62 BATADV_DHCP_NO = 0, 60 BATADV_DHCP_NO = 0,
61
62 /** @BATADV_DHCP_TO_SERVER: dhcp message is directed to a server */
63 BATADV_DHCP_TO_SERVER, 63 BATADV_DHCP_TO_SERVER,
64
65 /** @BATADV_DHCP_TO_CLIENT: dhcp message is directed to a client */
64 BATADV_DHCP_TO_CLIENT, 66 BATADV_DHCP_TO_CLIENT,
65}; 67};
66 68
@@ -78,196 +80,274 @@ enum batadv_dhcp_recipient {
78 80
79/** 81/**
80 * struct batadv_hard_iface_bat_iv - per hard-interface B.A.T.M.A.N. IV data 82 * struct batadv_hard_iface_bat_iv - per hard-interface B.A.T.M.A.N. IV data
81 * @ogm_buff: buffer holding the OGM packet
82 * @ogm_buff_len: length of the OGM packet buffer
83 * @ogm_seqno: OGM sequence number - used to identify each OGM
84 */ 83 */
85struct batadv_hard_iface_bat_iv { 84struct batadv_hard_iface_bat_iv {
85 /** @ogm_buff: buffer holding the OGM packet */
86 unsigned char *ogm_buff; 86 unsigned char *ogm_buff;
87
88 /** @ogm_buff_len: length of the OGM packet buffer */
87 int ogm_buff_len; 89 int ogm_buff_len;
90
91 /** @ogm_seqno: OGM sequence number - used to identify each OGM */
88 atomic_t ogm_seqno; 92 atomic_t ogm_seqno;
89}; 93};
90 94
91/** 95/**
92 * enum batadv_v_hard_iface_flags - interface flags useful to B.A.T.M.A.N. V 96 * enum batadv_v_hard_iface_flags - interface flags useful to B.A.T.M.A.N. V
93 * @BATADV_FULL_DUPLEX: tells if the connection over this link is full-duplex
94 * @BATADV_WARNING_DEFAULT: tells whether we have warned the user that no
95 * throughput data is available for this interface and that default values are
96 * assumed.
97 */ 97 */
98enum batadv_v_hard_iface_flags { 98enum batadv_v_hard_iface_flags {
99 /**
100 * @BATADV_FULL_DUPLEX: tells if the connection over this link is
101 * full-duplex
102 */
99 BATADV_FULL_DUPLEX = BIT(0), 103 BATADV_FULL_DUPLEX = BIT(0),
104
105 /**
106 * @BATADV_WARNING_DEFAULT: tells whether we have warned the user that
107 * no throughput data is available for this interface and that default
108 * values are assumed.
109 */
100 BATADV_WARNING_DEFAULT = BIT(1), 110 BATADV_WARNING_DEFAULT = BIT(1),
101}; 111};
102 112
103/** 113/**
104 * struct batadv_hard_iface_bat_v - per hard-interface B.A.T.M.A.N. V data 114 * struct batadv_hard_iface_bat_v - per hard-interface B.A.T.M.A.N. V data
105 * @elp_interval: time interval between two ELP transmissions
106 * @elp_seqno: current ELP sequence number
107 * @elp_skb: base skb containing the ELP message to send
108 * @elp_wq: workqueue used to schedule ELP transmissions
109 * @throughput_override: throughput override to disable link auto-detection
110 * @flags: interface specific flags
111 */ 115 */
112struct batadv_hard_iface_bat_v { 116struct batadv_hard_iface_bat_v {
117 /** @elp_interval: time interval between two ELP transmissions */
113 atomic_t elp_interval; 118 atomic_t elp_interval;
119
120 /** @elp_seqno: current ELP sequence number */
114 atomic_t elp_seqno; 121 atomic_t elp_seqno;
122
123 /** @elp_skb: base skb containing the ELP message to send */
115 struct sk_buff *elp_skb; 124 struct sk_buff *elp_skb;
125
126 /** @elp_wq: workqueue used to schedule ELP transmissions */
116 struct delayed_work elp_wq; 127 struct delayed_work elp_wq;
128
129 /**
130 * @throughput_override: throughput override to disable link
131 * auto-detection
132 */
117 atomic_t throughput_override; 133 atomic_t throughput_override;
134
135 /** @flags: interface specific flags */
118 u8 flags; 136 u8 flags;
119}; 137};
120 138
121/** 139/**
122 * enum batadv_hard_iface_wifi_flags - Flags describing the wifi configuration 140 * enum batadv_hard_iface_wifi_flags - Flags describing the wifi configuration
123 * of a batadv_hard_iface 141 * of a batadv_hard_iface
124 * @BATADV_HARDIF_WIFI_WEXT_DIRECT: it is a wext wifi device
125 * @BATADV_HARDIF_WIFI_CFG80211_DIRECT: it is a cfg80211 wifi device
126 * @BATADV_HARDIF_WIFI_WEXT_INDIRECT: link device is a wext wifi device
127 * @BATADV_HARDIF_WIFI_CFG80211_INDIRECT: link device is a cfg80211 wifi device
128 */ 142 */
129enum batadv_hard_iface_wifi_flags { 143enum batadv_hard_iface_wifi_flags {
144 /** @BATADV_HARDIF_WIFI_WEXT_DIRECT: it is a wext wifi device */
130 BATADV_HARDIF_WIFI_WEXT_DIRECT = BIT(0), 145 BATADV_HARDIF_WIFI_WEXT_DIRECT = BIT(0),
146
147 /** @BATADV_HARDIF_WIFI_CFG80211_DIRECT: it is a cfg80211 wifi device */
131 BATADV_HARDIF_WIFI_CFG80211_DIRECT = BIT(1), 148 BATADV_HARDIF_WIFI_CFG80211_DIRECT = BIT(1),
149
150 /**
151 * @BATADV_HARDIF_WIFI_WEXT_INDIRECT: link device is a wext wifi device
152 */
132 BATADV_HARDIF_WIFI_WEXT_INDIRECT = BIT(2), 153 BATADV_HARDIF_WIFI_WEXT_INDIRECT = BIT(2),
154
155 /**
156 * @BATADV_HARDIF_WIFI_CFG80211_INDIRECT: link device is a cfg80211 wifi
157 * device
158 */
133 BATADV_HARDIF_WIFI_CFG80211_INDIRECT = BIT(3), 159 BATADV_HARDIF_WIFI_CFG80211_INDIRECT = BIT(3),
134}; 160};
135 161
136/** 162/**
137 * struct batadv_hard_iface - network device known to batman-adv 163 * struct batadv_hard_iface - network device known to batman-adv
138 * @list: list node for batadv_hardif_list
139 * @if_num: identificator of the interface
140 * @if_status: status of the interface for batman-adv
141 * @num_bcasts: number of payload re-broadcasts on this interface (ARQ)
142 * @wifi_flags: flags whether this is (directly or indirectly) a wifi interface
143 * @net_dev: pointer to the net_device
144 * @hardif_obj: kobject of the per interface sysfs "mesh" directory
145 * @refcount: number of contexts the object is used
146 * @batman_adv_ptype: packet type describing packets that should be processed by
147 * batman-adv for this interface
148 * @soft_iface: the batman-adv interface which uses this network interface
149 * @rcu: struct used for freeing in an RCU-safe manner
150 * @bat_iv: per hard-interface B.A.T.M.A.N. IV data
151 * @bat_v: per hard-interface B.A.T.M.A.N. V data
152 * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
153 * @neigh_list: list of unique single hop neighbors via this interface
154 * @neigh_list_lock: lock protecting neigh_list
155 */ 164 */
156struct batadv_hard_iface { 165struct batadv_hard_iface {
166 /** @list: list node for batadv_hardif_list */
157 struct list_head list; 167 struct list_head list;
168
169 /** @if_num: identificator of the interface */
158 s16 if_num; 170 s16 if_num;
171
172 /** @if_status: status of the interface for batman-adv */
159 char if_status; 173 char if_status;
174
175 /**
176 * @num_bcasts: number of payload re-broadcasts on this interface (ARQ)
177 */
160 u8 num_bcasts; 178 u8 num_bcasts;
179
180 /**
181 * @wifi_flags: flags whether this is (directly or indirectly) a wifi
182 * interface
183 */
161 u32 wifi_flags; 184 u32 wifi_flags;
185
186 /** @net_dev: pointer to the net_device */
162 struct net_device *net_dev; 187 struct net_device *net_dev;
188
189 /** @hardif_obj: kobject of the per interface sysfs "mesh" directory */
163 struct kobject *hardif_obj; 190 struct kobject *hardif_obj;
191
192 /** @refcount: number of contexts the object is used */
164 struct kref refcount; 193 struct kref refcount;
194
195 /**
196 * @batman_adv_ptype: packet type describing packets that should be
197 * processed by batman-adv for this interface
198 */
165 struct packet_type batman_adv_ptype; 199 struct packet_type batman_adv_ptype;
200
201 /**
202 * @soft_iface: the batman-adv interface which uses this network
203 * interface
204 */
166 struct net_device *soft_iface; 205 struct net_device *soft_iface;
206
207 /** @rcu: struct used for freeing in an RCU-safe manner */
167 struct rcu_head rcu; 208 struct rcu_head rcu;
209
210 /** @bat_iv: per hard-interface B.A.T.M.A.N. IV data */
168 struct batadv_hard_iface_bat_iv bat_iv; 211 struct batadv_hard_iface_bat_iv bat_iv;
212
169#ifdef CONFIG_BATMAN_ADV_BATMAN_V 213#ifdef CONFIG_BATMAN_ADV_BATMAN_V
214 /** @bat_v: per hard-interface B.A.T.M.A.N. V data */
170 struct batadv_hard_iface_bat_v bat_v; 215 struct batadv_hard_iface_bat_v bat_v;
171#endif 216#endif
217
218 /**
219 * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
220 */
172 struct dentry *debug_dir; 221 struct dentry *debug_dir;
222
223 /**
224 * @neigh_list: list of unique single hop neighbors via this interface
225 */
173 struct hlist_head neigh_list; 226 struct hlist_head neigh_list;
174 /* neigh_list_lock protects: neigh_list */ 227
228 /** @neigh_list_lock: lock protecting neigh_list */
175 spinlock_t neigh_list_lock; 229 spinlock_t neigh_list_lock;
176}; 230};
177 231
178/** 232/**
179 * struct batadv_orig_ifinfo - originator info per outgoing interface 233 * struct batadv_orig_ifinfo - originator info per outgoing interface
180 * @list: list node for orig_node::ifinfo_list
181 * @if_outgoing: pointer to outgoing hard-interface
182 * @router: router that should be used to reach this originator
183 * @last_real_seqno: last and best known sequence number
184 * @last_ttl: ttl of last received packet
185 * @last_seqno_forwarded: seqno of the OGM which was forwarded last
186 * @batman_seqno_reset: time when the batman seqno window was reset
187 * @refcount: number of contexts the object is used
188 * @rcu: struct used for freeing in an RCU-safe manner
189 */ 234 */
190struct batadv_orig_ifinfo { 235struct batadv_orig_ifinfo {
236 /** @list: list node for &batadv_orig_node.ifinfo_list */
191 struct hlist_node list; 237 struct hlist_node list;
238
239 /** @if_outgoing: pointer to outgoing hard-interface */
192 struct batadv_hard_iface *if_outgoing; 240 struct batadv_hard_iface *if_outgoing;
193 struct batadv_neigh_node __rcu *router; /* rcu protected pointer */ 241
242 /** @router: router that should be used to reach this originator */
243 struct batadv_neigh_node __rcu *router;
244
245 /** @last_real_seqno: last and best known sequence number */
194 u32 last_real_seqno; 246 u32 last_real_seqno;
247
248 /** @last_ttl: ttl of last received packet */
195 u8 last_ttl; 249 u8 last_ttl;
250
251 /** @last_seqno_forwarded: seqno of the OGM which was forwarded last */
196 u32 last_seqno_forwarded; 252 u32 last_seqno_forwarded;
253
254 /** @batman_seqno_reset: time when the batman seqno window was reset */
197 unsigned long batman_seqno_reset; 255 unsigned long batman_seqno_reset;
256
257 /** @refcount: number of contexts the object is used */
198 struct kref refcount; 258 struct kref refcount;
259
260 /** @rcu: struct used for freeing in an RCU-safe manner */
199 struct rcu_head rcu; 261 struct rcu_head rcu;
200}; 262};
201 263
202/** 264/**
203 * struct batadv_frag_table_entry - head in the fragment buffer table 265 * struct batadv_frag_table_entry - head in the fragment buffer table
204 * @fragment_list: head of list with fragments
205 * @lock: lock to protect the list of fragments
206 * @timestamp: time (jiffie) of last received fragment
207 * @seqno: sequence number of the fragments in the list
208 * @size: accumulated size of packets in list
209 * @total_size: expected size of the assembled packet
210 */ 266 */
211struct batadv_frag_table_entry { 267struct batadv_frag_table_entry {
268 /** @fragment_list: head of list with fragments */
212 struct hlist_head fragment_list; 269 struct hlist_head fragment_list;
213 spinlock_t lock; /* protects fragment_list */ 270
271 /** @lock: lock to protect the list of fragments */
272 spinlock_t lock;
273
274 /** @timestamp: time (jiffie) of last received fragment */
214 unsigned long timestamp; 275 unsigned long timestamp;
276
277 /** @seqno: sequence number of the fragments in the list */
215 u16 seqno; 278 u16 seqno;
279
280 /** @size: accumulated size of packets in list */
216 u16 size; 281 u16 size;
282
283 /** @total_size: expected size of the assembled packet */
217 u16 total_size; 284 u16 total_size;
218}; 285};
219 286
220/** 287/**
221 * struct batadv_frag_list_entry - entry in a list of fragments 288 * struct batadv_frag_list_entry - entry in a list of fragments
222 * @list: list node information
223 * @skb: fragment
224 * @no: fragment number in the set
225 */ 289 */
226struct batadv_frag_list_entry { 290struct batadv_frag_list_entry {
291 /** @list: list node information */
227 struct hlist_node list; 292 struct hlist_node list;
293
294 /** @skb: fragment */
228 struct sk_buff *skb; 295 struct sk_buff *skb;
296
297 /** @no: fragment number in the set */
229 u8 no; 298 u8 no;
230}; 299};
231 300
232/** 301/**
233 * struct batadv_vlan_tt - VLAN specific TT attributes 302 * struct batadv_vlan_tt - VLAN specific TT attributes
234 * @crc: CRC32 checksum of the entries belonging to this vlan
235 * @num_entries: number of TT entries for this VLAN
236 */ 303 */
237struct batadv_vlan_tt { 304struct batadv_vlan_tt {
305 /** @crc: CRC32 checksum of the entries belonging to this vlan */
238 u32 crc; 306 u32 crc;
307
308 /** @num_entries: number of TT entries for this VLAN */
239 atomic_t num_entries; 309 atomic_t num_entries;
240}; 310};
241 311
242/** 312/**
243 * struct batadv_orig_node_vlan - VLAN specific data per orig_node 313 * struct batadv_orig_node_vlan - VLAN specific data per orig_node
244 * @vid: the VLAN identifier
245 * @tt: VLAN specific TT attributes
246 * @list: list node for orig_node::vlan_list
247 * @refcount: number of context where this object is currently in use
248 * @rcu: struct used for freeing in a RCU-safe manner
249 */ 314 */
250struct batadv_orig_node_vlan { 315struct batadv_orig_node_vlan {
316 /** @vid: the VLAN identifier */
251 unsigned short vid; 317 unsigned short vid;
318
319 /** @tt: VLAN specific TT attributes */
252 struct batadv_vlan_tt tt; 320 struct batadv_vlan_tt tt;
321
322 /** @list: list node for &batadv_orig_node.vlan_list */
253 struct hlist_node list; 323 struct hlist_node list;
324
325 /**
326 * @refcount: number of context where this object is currently in use
327 */
254 struct kref refcount; 328 struct kref refcount;
329
330 /** @rcu: struct used for freeing in a RCU-safe manner */
255 struct rcu_head rcu; 331 struct rcu_head rcu;
256}; 332};
257 333
258/** 334/**
259 * struct batadv_orig_bat_iv - B.A.T.M.A.N. IV private orig_node members 335 * struct batadv_orig_bat_iv - B.A.T.M.A.N. IV private orig_node members
260 * @bcast_own: set of bitfields (one per hard-interface) where each one counts
261 * the number of our OGMs this orig_node rebroadcasted "back" to us (relative
262 * to last_real_seqno). Every bitfield is BATADV_TQ_LOCAL_WINDOW_SIZE bits long.
263 * @bcast_own_sum: sum of bcast_own
264 * @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum,
265 * neigh_node->bat_iv.real_bits & neigh_node->bat_iv.real_packet_count
266 */ 336 */
267struct batadv_orig_bat_iv { 337struct batadv_orig_bat_iv {
338 /**
339 * @bcast_own: set of bitfields (one per hard-interface) where each one
340 * counts the number of our OGMs this orig_node rebroadcasted "back" to
341 * us (relative to last_real_seqno). Every bitfield is
342 * BATADV_TQ_LOCAL_WINDOW_SIZE bits long.
343 */
268 unsigned long *bcast_own; 344 unsigned long *bcast_own;
345
346 /** @bcast_own_sum: sum of bcast_own */
269 u8 *bcast_own_sum; 347 u8 *bcast_own_sum;
270 /* ogm_cnt_lock protects: bcast_own, bcast_own_sum, 348
349 /**
350 * @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum,
271 * neigh_node->bat_iv.real_bits & neigh_node->bat_iv.real_packet_count 351 * neigh_node->bat_iv.real_bits & neigh_node->bat_iv.real_packet_count
272 */ 352 */
273 spinlock_t ogm_cnt_lock; 353 spinlock_t ogm_cnt_lock;
@@ -275,130 +355,205 @@ struct batadv_orig_bat_iv {
275 355
276/** 356/**
277 * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh 357 * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh
278 * @orig: originator ethernet address
279 * @ifinfo_list: list for routers per outgoing interface
280 * @last_bonding_candidate: pointer to last ifinfo of last used router
281 * @dat_addr: address of the orig node in the distributed hash
282 * @last_seen: time when last packet from this node was received
283 * @bcast_seqno_reset: time when the broadcast seqno window was reset
284 * @mcast_handler_lock: synchronizes mcast-capability and -flag changes
285 * @mcast_flags: multicast flags announced by the orig node
286 * @mcast_want_all_unsnoopables_node: a list node for the
287 * mcast.want_all_unsnoopables list
288 * @mcast_want_all_ipv4_node: a list node for the mcast.want_all_ipv4 list
289 * @mcast_want_all_ipv6_node: a list node for the mcast.want_all_ipv6 list
290 * @capabilities: announced capabilities of this originator
291 * @capa_initialized: bitfield to remember whether a capability was initialized
292 * @last_ttvn: last seen translation table version number
293 * @tt_buff: last tt changeset this node received from the orig node
294 * @tt_buff_len: length of the last tt changeset this node received from the
295 * orig node
296 * @tt_buff_lock: lock that protects tt_buff and tt_buff_len
297 * @tt_lock: prevents from updating the table while reading it. Table update is
298 * made up by two operations (data structure update and metdata -CRC/TTVN-
299 * recalculation) and they have to be executed atomically in order to avoid
300 * another thread to read the table/metadata between those.
301 * @bcast_bits: bitfield containing the info which payload broadcast originated
302 * from this orig node this host already has seen (relative to
303 * last_bcast_seqno)
304 * @last_bcast_seqno: last broadcast sequence number received by this host
305 * @neigh_list: list of potential next hop neighbor towards this orig node
306 * @neigh_list_lock: lock protecting neigh_list and router
307 * @hash_entry: hlist node for batadv_priv::orig_hash
308 * @bat_priv: pointer to soft_iface this orig node belongs to
309 * @bcast_seqno_lock: lock protecting bcast_bits & last_bcast_seqno
310 * @refcount: number of contexts the object is used
311 * @rcu: struct used for freeing in an RCU-safe manner
312 * @in_coding_list: list of nodes this orig can hear
313 * @out_coding_list: list of nodes that can hear this orig
314 * @in_coding_list_lock: protects in_coding_list
315 * @out_coding_list_lock: protects out_coding_list
316 * @fragments: array with heads for fragment chains
317 * @vlan_list: a list of orig_node_vlan structs, one per VLAN served by the
318 * originator represented by this object
319 * @vlan_list_lock: lock protecting vlan_list
320 * @bat_iv: B.A.T.M.A.N. IV private structure
321 */ 358 */
322struct batadv_orig_node { 359struct batadv_orig_node {
360 /** @orig: originator ethernet address */
323 u8 orig[ETH_ALEN]; 361 u8 orig[ETH_ALEN];
362
363 /** @ifinfo_list: list for routers per outgoing interface */
324 struct hlist_head ifinfo_list; 364 struct hlist_head ifinfo_list;
365
366 /**
367 * @last_bonding_candidate: pointer to last ifinfo of last used router
368 */
325 struct batadv_orig_ifinfo *last_bonding_candidate; 369 struct batadv_orig_ifinfo *last_bonding_candidate;
370
326#ifdef CONFIG_BATMAN_ADV_DAT 371#ifdef CONFIG_BATMAN_ADV_DAT
372 /** @dat_addr: address of the orig node in the distributed hash */
327 batadv_dat_addr_t dat_addr; 373 batadv_dat_addr_t dat_addr;
328#endif 374#endif
375
376 /** @last_seen: time when last packet from this node was received */
329 unsigned long last_seen; 377 unsigned long last_seen;
378
379 /**
380 * @bcast_seqno_reset: time when the broadcast seqno window was reset
381 */
330 unsigned long bcast_seqno_reset; 382 unsigned long bcast_seqno_reset;
383
331#ifdef CONFIG_BATMAN_ADV_MCAST 384#ifdef CONFIG_BATMAN_ADV_MCAST
332 /* synchronizes mcast tvlv specific orig changes */ 385 /**
386 * @mcast_handler_lock: synchronizes mcast-capability and -flag changes
387 */
333 spinlock_t mcast_handler_lock; 388 spinlock_t mcast_handler_lock;
389
390 /** @mcast_flags: multicast flags announced by the orig node */
334 u8 mcast_flags; 391 u8 mcast_flags;
392
393 /**
394 * @mcast_want_all_unsnoopables_node: a list node for the
395 * mcast.want_all_unsnoopables list
396 */
335 struct hlist_node mcast_want_all_unsnoopables_node; 397 struct hlist_node mcast_want_all_unsnoopables_node;
398
399 /**
400 * @mcast_want_all_ipv4_node: a list node for the mcast.want_all_ipv4
401 * list
402 */
336 struct hlist_node mcast_want_all_ipv4_node; 403 struct hlist_node mcast_want_all_ipv4_node;
404 /**
405 * @mcast_want_all_ipv6_node: a list node for the mcast.want_all_ipv6
406 * list
407 */
337 struct hlist_node mcast_want_all_ipv6_node; 408 struct hlist_node mcast_want_all_ipv6_node;
338#endif 409#endif
410
411 /** @capabilities: announced capabilities of this originator */
339 unsigned long capabilities; 412 unsigned long capabilities;
413
414 /**
415 * @capa_initialized: bitfield to remember whether a capability was
416 * initialized
417 */
340 unsigned long capa_initialized; 418 unsigned long capa_initialized;
419
420 /** @last_ttvn: last seen translation table version number */
341 atomic_t last_ttvn; 421 atomic_t last_ttvn;
422
423 /** @tt_buff: last tt changeset this node received from the orig node */
342 unsigned char *tt_buff; 424 unsigned char *tt_buff;
425
426 /**
427 * @tt_buff_len: length of the last tt changeset this node received
428 * from the orig node
429 */
343 s16 tt_buff_len; 430 s16 tt_buff_len;
344 spinlock_t tt_buff_lock; /* protects tt_buff & tt_buff_len */ 431
345 /* prevents from changing the table while reading it */ 432 /** @tt_buff_lock: lock that protects tt_buff and tt_buff_len */
433 spinlock_t tt_buff_lock;
434
435 /**
436 * @tt_lock: prevents from updating the table while reading it. Table
437 * update is made up by two operations (data structure update and
438 * metdata -CRC/TTVN-recalculation) and they have to be executed
439 * atomically in order to avoid another thread to read the
440 * table/metadata between those.
441 */
346 spinlock_t tt_lock; 442 spinlock_t tt_lock;
443
444 /**
445 * @bcast_bits: bitfield containing the info which payload broadcast
446 * originated from this orig node this host already has seen (relative
447 * to last_bcast_seqno)
448 */
347 DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); 449 DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
450
451 /**
452 * @last_bcast_seqno: last broadcast sequence number received by this
453 * host
454 */
348 u32 last_bcast_seqno; 455 u32 last_bcast_seqno;
456
457 /**
458 * @neigh_list: list of potential next hop neighbor towards this orig
459 * node
460 */
349 struct hlist_head neigh_list; 461 struct hlist_head neigh_list;
350 /* neigh_list_lock protects: neigh_list, ifinfo_list, 462
351 * last_bonding_candidate and router 463 /**
464 * @neigh_list_lock: lock protecting neigh_list, ifinfo_list,
465 * last_bonding_candidate and router
352 */ 466 */
353 spinlock_t neigh_list_lock; 467 spinlock_t neigh_list_lock;
468
469 /** @hash_entry: hlist node for &batadv_priv.orig_hash */
354 struct hlist_node hash_entry; 470 struct hlist_node hash_entry;
471
472 /** @bat_priv: pointer to soft_iface this orig node belongs to */
355 struct batadv_priv *bat_priv; 473 struct batadv_priv *bat_priv;
356 /* bcast_seqno_lock protects: bcast_bits & last_bcast_seqno */ 474
475 /** @bcast_seqno_lock: lock protecting bcast_bits & last_bcast_seqno */
357 spinlock_t bcast_seqno_lock; 476 spinlock_t bcast_seqno_lock;
477
478 /** @refcount: number of contexts the object is used */
358 struct kref refcount; 479 struct kref refcount;
480
481 /** @rcu: struct used for freeing in an RCU-safe manner */
359 struct rcu_head rcu; 482 struct rcu_head rcu;
483
360#ifdef CONFIG_BATMAN_ADV_NC 484#ifdef CONFIG_BATMAN_ADV_NC
485 /** @in_coding_list: list of nodes this orig can hear */
361 struct list_head in_coding_list; 486 struct list_head in_coding_list;
487
488 /** @out_coding_list: list of nodes that can hear this orig */
362 struct list_head out_coding_list; 489 struct list_head out_coding_list;
363 spinlock_t in_coding_list_lock; /* Protects in_coding_list */ 490
364 spinlock_t out_coding_list_lock; /* Protects out_coding_list */ 491 /** @in_coding_list_lock: protects in_coding_list */
492 spinlock_t in_coding_list_lock;
493
494 /** @out_coding_list_lock: protects out_coding_list */
495 spinlock_t out_coding_list_lock;
365#endif 496#endif
497
498 /** @fragments: array with heads for fragment chains */
366 struct batadv_frag_table_entry fragments[BATADV_FRAG_BUFFER_COUNT]; 499 struct batadv_frag_table_entry fragments[BATADV_FRAG_BUFFER_COUNT];
500
501 /**
502 * @vlan_list: a list of orig_node_vlan structs, one per VLAN served by
503 * the originator represented by this object
504 */
367 struct hlist_head vlan_list; 505 struct hlist_head vlan_list;
368 spinlock_t vlan_list_lock; /* protects vlan_list */ 506
507 /** @vlan_list_lock: lock protecting vlan_list */
508 spinlock_t vlan_list_lock;
509
510 /** @bat_iv: B.A.T.M.A.N. IV private structure */
369 struct batadv_orig_bat_iv bat_iv; 511 struct batadv_orig_bat_iv bat_iv;
370}; 512};
371 513
372/** 514/**
373 * enum batadv_orig_capabilities - orig node capabilities 515 * enum batadv_orig_capabilities - orig node capabilities
374 * @BATADV_ORIG_CAPA_HAS_DAT: orig node has distributed arp table enabled
375 * @BATADV_ORIG_CAPA_HAS_NC: orig node has network coding enabled
376 * @BATADV_ORIG_CAPA_HAS_TT: orig node has tt capability
377 * @BATADV_ORIG_CAPA_HAS_MCAST: orig node has some multicast capability
378 * (= orig node announces a tvlv of type BATADV_TVLV_MCAST)
379 */ 516 */
380enum batadv_orig_capabilities { 517enum batadv_orig_capabilities {
518 /**
519 * @BATADV_ORIG_CAPA_HAS_DAT: orig node has distributed arp table
520 * enabled
521 */
381 BATADV_ORIG_CAPA_HAS_DAT, 522 BATADV_ORIG_CAPA_HAS_DAT,
523
524 /** @BATADV_ORIG_CAPA_HAS_NC: orig node has network coding enabled */
382 BATADV_ORIG_CAPA_HAS_NC, 525 BATADV_ORIG_CAPA_HAS_NC,
526
527 /** @BATADV_ORIG_CAPA_HAS_TT: orig node has tt capability */
383 BATADV_ORIG_CAPA_HAS_TT, 528 BATADV_ORIG_CAPA_HAS_TT,
529
530 /**
531 * @BATADV_ORIG_CAPA_HAS_MCAST: orig node has some multicast capability
532 * (= orig node announces a tvlv of type BATADV_TVLV_MCAST)
533 */
384 BATADV_ORIG_CAPA_HAS_MCAST, 534 BATADV_ORIG_CAPA_HAS_MCAST,
385}; 535};
386 536
387/** 537/**
388 * struct batadv_gw_node - structure for orig nodes announcing gw capabilities 538 * struct batadv_gw_node - structure for orig nodes announcing gw capabilities
389 * @list: list node for batadv_priv_gw::list
390 * @orig_node: pointer to corresponding orig node
391 * @bandwidth_down: advertised uplink download bandwidth
392 * @bandwidth_up: advertised uplink upload bandwidth
393 * @refcount: number of contexts the object is used
394 * @rcu: struct used for freeing in an RCU-safe manner
395 */ 539 */
396struct batadv_gw_node { 540struct batadv_gw_node {
541 /** @list: list node for &batadv_priv_gw.list */
397 struct hlist_node list; 542 struct hlist_node list;
543
544 /** @orig_node: pointer to corresponding orig node */
398 struct batadv_orig_node *orig_node; 545 struct batadv_orig_node *orig_node;
546
547 /** @bandwidth_down: advertised uplink download bandwidth */
399 u32 bandwidth_down; 548 u32 bandwidth_down;
549
550 /** @bandwidth_up: advertised uplink upload bandwidth */
400 u32 bandwidth_up; 551 u32 bandwidth_up;
552
553 /** @refcount: number of contexts the object is used */
401 struct kref refcount; 554 struct kref refcount;
555
556 /** @rcu: struct used for freeing in an RCU-safe manner */
402 struct rcu_head rcu; 557 struct rcu_head rcu;
403}; 558};
404 559
@@ -407,118 +562,161 @@ DECLARE_EWMA(throughput, 10, 8)
407/** 562/**
408 * struct batadv_hardif_neigh_node_bat_v - B.A.T.M.A.N. V private neighbor 563 * struct batadv_hardif_neigh_node_bat_v - B.A.T.M.A.N. V private neighbor
409 * information 564 * information
410 * @throughput: ewma link throughput towards this neighbor
411 * @elp_interval: time interval between two ELP transmissions
412 * @elp_latest_seqno: latest and best known ELP sequence number
413 * @last_unicast_tx: when the last unicast packet has been sent to this neighbor
414 * @metric_work: work queue callback item for metric update
415 */ 565 */
416struct batadv_hardif_neigh_node_bat_v { 566struct batadv_hardif_neigh_node_bat_v {
567 /** @throughput: ewma link throughput towards this neighbor */
417 struct ewma_throughput throughput; 568 struct ewma_throughput throughput;
569
570 /** @elp_interval: time interval between two ELP transmissions */
418 u32 elp_interval; 571 u32 elp_interval;
572
573 /** @elp_latest_seqno: latest and best known ELP sequence number */
419 u32 elp_latest_seqno; 574 u32 elp_latest_seqno;
575
576 /**
577 * @last_unicast_tx: when the last unicast packet has been sent to this
578 * neighbor
579 */
420 unsigned long last_unicast_tx; 580 unsigned long last_unicast_tx;
581
582 /** @metric_work: work queue callback item for metric update */
421 struct work_struct metric_work; 583 struct work_struct metric_work;
422}; 584};
423 585
424/** 586/**
425 * struct batadv_hardif_neigh_node - unique neighbor per hard-interface 587 * struct batadv_hardif_neigh_node - unique neighbor per hard-interface
426 * @list: list node for batadv_hard_iface::neigh_list
427 * @addr: the MAC address of the neighboring interface
428 * @orig: the address of the originator this neighbor node belongs to
429 * @if_incoming: pointer to incoming hard-interface
430 * @last_seen: when last packet via this neighbor was received
431 * @bat_v: B.A.T.M.A.N. V private data
432 * @refcount: number of contexts the object is used
433 * @rcu: struct used for freeing in a RCU-safe manner
434 */ 588 */
435struct batadv_hardif_neigh_node { 589struct batadv_hardif_neigh_node {
590 /** @list: list node for &batadv_hard_iface.neigh_list */
436 struct hlist_node list; 591 struct hlist_node list;
592
593 /** @addr: the MAC address of the neighboring interface */
437 u8 addr[ETH_ALEN]; 594 u8 addr[ETH_ALEN];
595
596 /**
597 * @orig: the address of the originator this neighbor node belongs to
598 */
438 u8 orig[ETH_ALEN]; 599 u8 orig[ETH_ALEN];
600
601 /** @if_incoming: pointer to incoming hard-interface */
439 struct batadv_hard_iface *if_incoming; 602 struct batadv_hard_iface *if_incoming;
603
604 /** @last_seen: when last packet via this neighbor was received */
440 unsigned long last_seen; 605 unsigned long last_seen;
606
441#ifdef CONFIG_BATMAN_ADV_BATMAN_V 607#ifdef CONFIG_BATMAN_ADV_BATMAN_V
608 /** @bat_v: B.A.T.M.A.N. V private data */
442 struct batadv_hardif_neigh_node_bat_v bat_v; 609 struct batadv_hardif_neigh_node_bat_v bat_v;
443#endif 610#endif
611
612 /** @refcount: number of contexts the object is used */
444 struct kref refcount; 613 struct kref refcount;
614
615 /** @rcu: struct used for freeing in a RCU-safe manner */
445 struct rcu_head rcu; 616 struct rcu_head rcu;
446}; 617};
447 618
448/** 619/**
449 * struct batadv_neigh_node - structure for single hops neighbors 620 * struct batadv_neigh_node - structure for single hops neighbors
450 * @list: list node for batadv_orig_node::neigh_list
451 * @orig_node: pointer to corresponding orig_node
452 * @addr: the MAC address of the neighboring interface
453 * @ifinfo_list: list for routing metrics per outgoing interface
454 * @ifinfo_lock: lock protecting private ifinfo members and list
455 * @if_incoming: pointer to incoming hard-interface
456 * @last_seen: when last packet via this neighbor was received
457 * @hardif_neigh: hardif_neigh of this neighbor
458 * @refcount: number of contexts the object is used
459 * @rcu: struct used for freeing in an RCU-safe manner
460 */ 621 */
461struct batadv_neigh_node { 622struct batadv_neigh_node {
623 /** @list: list node for &batadv_orig_node.neigh_list */
462 struct hlist_node list; 624 struct hlist_node list;
625
626 /** @orig_node: pointer to corresponding orig_node */
463 struct batadv_orig_node *orig_node; 627 struct batadv_orig_node *orig_node;
628
629 /** @addr: the MAC address of the neighboring interface */
464 u8 addr[ETH_ALEN]; 630 u8 addr[ETH_ALEN];
631
632 /** @ifinfo_list: list for routing metrics per outgoing interface */
465 struct hlist_head ifinfo_list; 633 struct hlist_head ifinfo_list;
466 spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */ 634
635 /** @ifinfo_lock: lock protecting ifinfo_list and its members */
636 spinlock_t ifinfo_lock;
637
638 /** @if_incoming: pointer to incoming hard-interface */
467 struct batadv_hard_iface *if_incoming; 639 struct batadv_hard_iface *if_incoming;
640
641 /** @last_seen: when last packet via this neighbor was received */
468 unsigned long last_seen; 642 unsigned long last_seen;
643
644 /** @hardif_neigh: hardif_neigh of this neighbor */
469 struct batadv_hardif_neigh_node *hardif_neigh; 645 struct batadv_hardif_neigh_node *hardif_neigh;
646
647 /** @refcount: number of contexts the object is used */
470 struct kref refcount; 648 struct kref refcount;
649
650 /** @rcu: struct used for freeing in an RCU-safe manner */
471 struct rcu_head rcu; 651 struct rcu_head rcu;
472}; 652};
473 653
474/** 654/**
475 * struct batadv_neigh_ifinfo_bat_iv - neighbor information per outgoing 655 * struct batadv_neigh_ifinfo_bat_iv - neighbor information per outgoing
476 * interface for B.A.T.M.A.N. IV 656 * interface for B.A.T.M.A.N. IV
477 * @tq_recv: ring buffer of received TQ values from this neigh node
478 * @tq_index: ring buffer index
479 * @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv)
480 * @real_bits: bitfield containing the number of OGMs received from this neigh
481 * node (relative to orig_node->last_real_seqno)
482 * @real_packet_count: counted result of real_bits
483 */ 657 */
484struct batadv_neigh_ifinfo_bat_iv { 658struct batadv_neigh_ifinfo_bat_iv {
659 /** @tq_recv: ring buffer of received TQ values from this neigh node */
485 u8 tq_recv[BATADV_TQ_GLOBAL_WINDOW_SIZE]; 660 u8 tq_recv[BATADV_TQ_GLOBAL_WINDOW_SIZE];
661
662 /** @tq_index: ring buffer index */
486 u8 tq_index; 663 u8 tq_index;
664
665 /**
666 * @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv)
667 */
487 u8 tq_avg; 668 u8 tq_avg;
669
670 /**
671 * @real_bits: bitfield containing the number of OGMs received from this
672 * neigh node (relative to orig_node->last_real_seqno)
673 */
488 DECLARE_BITMAP(real_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); 674 DECLARE_BITMAP(real_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
675
676 /** @real_packet_count: counted result of real_bits */
489 u8 real_packet_count; 677 u8 real_packet_count;
490}; 678};
491 679
492/** 680/**
493 * struct batadv_neigh_ifinfo_bat_v - neighbor information per outgoing 681 * struct batadv_neigh_ifinfo_bat_v - neighbor information per outgoing
494 * interface for B.A.T.M.A.N. V 682 * interface for B.A.T.M.A.N. V
495 * @throughput: last throughput metric received from originator via this neigh
496 * @last_seqno: last sequence number known for this neighbor
497 */ 683 */
498struct batadv_neigh_ifinfo_bat_v { 684struct batadv_neigh_ifinfo_bat_v {
685 /**
686 * @throughput: last throughput metric received from originator via this
687 * neigh
688 */
499 u32 throughput; 689 u32 throughput;
690
691 /** @last_seqno: last sequence number known for this neighbor */
500 u32 last_seqno; 692 u32 last_seqno;
501}; 693};
502 694
503/** 695/**
504 * struct batadv_neigh_ifinfo - neighbor information per outgoing interface 696 * struct batadv_neigh_ifinfo - neighbor information per outgoing interface
505 * @list: list node for batadv_neigh_node::ifinfo_list
506 * @if_outgoing: pointer to outgoing hard-interface
507 * @bat_iv: B.A.T.M.A.N. IV private structure
508 * @bat_v: B.A.T.M.A.N. V private data
509 * @last_ttl: last received ttl from this neigh node
510 * @refcount: number of contexts the object is used
511 * @rcu: struct used for freeing in a RCU-safe manner
512 */ 697 */
513struct batadv_neigh_ifinfo { 698struct batadv_neigh_ifinfo {
699 /** @list: list node for &batadv_neigh_node.ifinfo_list */
514 struct hlist_node list; 700 struct hlist_node list;
701
702 /** @if_outgoing: pointer to outgoing hard-interface */
515 struct batadv_hard_iface *if_outgoing; 703 struct batadv_hard_iface *if_outgoing;
704
705 /** @bat_iv: B.A.T.M.A.N. IV private structure */
516 struct batadv_neigh_ifinfo_bat_iv bat_iv; 706 struct batadv_neigh_ifinfo_bat_iv bat_iv;
707
517#ifdef CONFIG_BATMAN_ADV_BATMAN_V 708#ifdef CONFIG_BATMAN_ADV_BATMAN_V
709 /** @bat_v: B.A.T.M.A.N. V private data */
518 struct batadv_neigh_ifinfo_bat_v bat_v; 710 struct batadv_neigh_ifinfo_bat_v bat_v;
519#endif 711#endif
712
713 /** @last_ttl: last received ttl from this neigh node */
520 u8 last_ttl; 714 u8 last_ttl;
715
716 /** @refcount: number of contexts the object is used */
521 struct kref refcount; 717 struct kref refcount;
718
719 /** @rcu: struct used for freeing in a RCU-safe manner */
522 struct rcu_head rcu; 720 struct rcu_head rcu;
523}; 721};
524 722
@@ -526,148 +724,278 @@ struct batadv_neigh_ifinfo {
526 724
527/** 725/**
528 * struct batadv_bcast_duplist_entry - structure for LAN broadcast suppression 726 * struct batadv_bcast_duplist_entry - structure for LAN broadcast suppression
529 * @orig: mac address of orig node orginating the broadcast
530 * @crc: crc32 checksum of broadcast payload
531 * @entrytime: time when the broadcast packet was received
532 */ 727 */
533struct batadv_bcast_duplist_entry { 728struct batadv_bcast_duplist_entry {
729 /** @orig: mac address of orig node orginating the broadcast */
534 u8 orig[ETH_ALEN]; 730 u8 orig[ETH_ALEN];
731
732 /** @crc: crc32 checksum of broadcast payload */
535 __be32 crc; 733 __be32 crc;
734
735 /** @entrytime: time when the broadcast packet was received */
536 unsigned long entrytime; 736 unsigned long entrytime;
537}; 737};
538#endif 738#endif
539 739
540/** 740/**
541 * enum batadv_counters - indices for traffic counters 741 * enum batadv_counters - indices for traffic counters
542 * @BATADV_CNT_TX: transmitted payload traffic packet counter
543 * @BATADV_CNT_TX_BYTES: transmitted payload traffic bytes counter
544 * @BATADV_CNT_TX_DROPPED: dropped transmission payload traffic packet counter
545 * @BATADV_CNT_RX: received payload traffic packet counter
546 * @BATADV_CNT_RX_BYTES: received payload traffic bytes counter
547 * @BATADV_CNT_FORWARD: forwarded payload traffic packet counter
548 * @BATADV_CNT_FORWARD_BYTES: forwarded payload traffic bytes counter
549 * @BATADV_CNT_MGMT_TX: transmitted routing protocol traffic packet counter
550 * @BATADV_CNT_MGMT_TX_BYTES: transmitted routing protocol traffic bytes counter
551 * @BATADV_CNT_MGMT_RX: received routing protocol traffic packet counter
552 * @BATADV_CNT_MGMT_RX_BYTES: received routing protocol traffic bytes counter
553 * @BATADV_CNT_FRAG_TX: transmitted fragment traffic packet counter
554 * @BATADV_CNT_FRAG_TX_BYTES: transmitted fragment traffic bytes counter
555 * @BATADV_CNT_FRAG_RX: received fragment traffic packet counter
556 * @BATADV_CNT_FRAG_RX_BYTES: received fragment traffic bytes counter
557 * @BATADV_CNT_FRAG_FWD: forwarded fragment traffic packet counter
558 * @BATADV_CNT_FRAG_FWD_BYTES: forwarded fragment traffic bytes counter
559 * @BATADV_CNT_TT_REQUEST_TX: transmitted tt req traffic packet counter
560 * @BATADV_CNT_TT_REQUEST_RX: received tt req traffic packet counter
561 * @BATADV_CNT_TT_RESPONSE_TX: transmitted tt resp traffic packet counter
562 * @BATADV_CNT_TT_RESPONSE_RX: received tt resp traffic packet counter
563 * @BATADV_CNT_TT_ROAM_ADV_TX: transmitted tt roam traffic packet counter
564 * @BATADV_CNT_TT_ROAM_ADV_RX: received tt roam traffic packet counter
565 * @BATADV_CNT_DAT_GET_TX: transmitted dht GET traffic packet counter
566 * @BATADV_CNT_DAT_GET_RX: received dht GET traffic packet counter
567 * @BATADV_CNT_DAT_PUT_TX: transmitted dht PUT traffic packet counter
568 * @BATADV_CNT_DAT_PUT_RX: received dht PUT traffic packet counter
569 * @BATADV_CNT_DAT_CACHED_REPLY_TX: transmitted dat cache reply traffic packet
570 * counter
571 * @BATADV_CNT_NC_CODE: transmitted nc-combined traffic packet counter
572 * @BATADV_CNT_NC_CODE_BYTES: transmitted nc-combined traffic bytes counter
573 * @BATADV_CNT_NC_RECODE: transmitted nc-recombined traffic packet counter
574 * @BATADV_CNT_NC_RECODE_BYTES: transmitted nc-recombined traffic bytes counter
575 * @BATADV_CNT_NC_BUFFER: counter for packets buffered for later nc decoding
576 * @BATADV_CNT_NC_DECODE: received and nc-decoded traffic packet counter
577 * @BATADV_CNT_NC_DECODE_BYTES: received and nc-decoded traffic bytes counter
578 * @BATADV_CNT_NC_DECODE_FAILED: received and decode-failed traffic packet
579 * counter
580 * @BATADV_CNT_NC_SNIFFED: counter for nc-decoded packets received in promisc
581 * mode.
582 * @BATADV_CNT_NUM: number of traffic counters
583 */ 742 */
584enum batadv_counters { 743enum batadv_counters {
744 /** @BATADV_CNT_TX: transmitted payload traffic packet counter */
585 BATADV_CNT_TX, 745 BATADV_CNT_TX,
746
747 /** @BATADV_CNT_TX_BYTES: transmitted payload traffic bytes counter */
586 BATADV_CNT_TX_BYTES, 748 BATADV_CNT_TX_BYTES,
749
750 /**
751 * @BATADV_CNT_TX_DROPPED: dropped transmission payload traffic packet
752 * counter
753 */
587 BATADV_CNT_TX_DROPPED, 754 BATADV_CNT_TX_DROPPED,
755
756 /** @BATADV_CNT_RX: received payload traffic packet counter */
588 BATADV_CNT_RX, 757 BATADV_CNT_RX,
758
759 /** @BATADV_CNT_RX_BYTES: received payload traffic bytes counter */
589 BATADV_CNT_RX_BYTES, 760 BATADV_CNT_RX_BYTES,
761
762 /** @BATADV_CNT_FORWARD: forwarded payload traffic packet counter */
590 BATADV_CNT_FORWARD, 763 BATADV_CNT_FORWARD,
764
765 /**
766 * @BATADV_CNT_FORWARD_BYTES: forwarded payload traffic bytes counter
767 */
591 BATADV_CNT_FORWARD_BYTES, 768 BATADV_CNT_FORWARD_BYTES,
769
770 /**
771 * @BATADV_CNT_MGMT_TX: transmitted routing protocol traffic packet
772 * counter
773 */
592 BATADV_CNT_MGMT_TX, 774 BATADV_CNT_MGMT_TX,
775
776 /**
777 * @BATADV_CNT_MGMT_TX_BYTES: transmitted routing protocol traffic bytes
778 * counter
779 */
593 BATADV_CNT_MGMT_TX_BYTES, 780 BATADV_CNT_MGMT_TX_BYTES,
781
782 /**
783 * @BATADV_CNT_MGMT_RX: received routing protocol traffic packet counter
784 */
594 BATADV_CNT_MGMT_RX, 785 BATADV_CNT_MGMT_RX,
786
787 /**
788 * @BATADV_CNT_MGMT_RX_BYTES: received routing protocol traffic bytes
789 * counter
790 */
595 BATADV_CNT_MGMT_RX_BYTES, 791 BATADV_CNT_MGMT_RX_BYTES,
792
793 /** @BATADV_CNT_FRAG_TX: transmitted fragment traffic packet counter */
596 BATADV_CNT_FRAG_TX, 794 BATADV_CNT_FRAG_TX,
795
796 /**
797 * @BATADV_CNT_FRAG_TX_BYTES: transmitted fragment traffic bytes counter
798 */
597 BATADV_CNT_FRAG_TX_BYTES, 799 BATADV_CNT_FRAG_TX_BYTES,
800
801 /** @BATADV_CNT_FRAG_RX: received fragment traffic packet counter */
598 BATADV_CNT_FRAG_RX, 802 BATADV_CNT_FRAG_RX,
803
804 /**
805 * @BATADV_CNT_FRAG_RX_BYTES: received fragment traffic bytes counter
806 */
599 BATADV_CNT_FRAG_RX_BYTES, 807 BATADV_CNT_FRAG_RX_BYTES,
808
809 /** @BATADV_CNT_FRAG_FWD: forwarded fragment traffic packet counter */
600 BATADV_CNT_FRAG_FWD, 810 BATADV_CNT_FRAG_FWD,
811
812 /**
813 * @BATADV_CNT_FRAG_FWD_BYTES: forwarded fragment traffic bytes counter
814 */
601 BATADV_CNT_FRAG_FWD_BYTES, 815 BATADV_CNT_FRAG_FWD_BYTES,
816
817 /**
818 * @BATADV_CNT_TT_REQUEST_TX: transmitted tt req traffic packet counter
819 */
602 BATADV_CNT_TT_REQUEST_TX, 820 BATADV_CNT_TT_REQUEST_TX,
821
822 /** @BATADV_CNT_TT_REQUEST_RX: received tt req traffic packet counter */
603 BATADV_CNT_TT_REQUEST_RX, 823 BATADV_CNT_TT_REQUEST_RX,
824
825 /**
826 * @BATADV_CNT_TT_RESPONSE_TX: transmitted tt resp traffic packet
827 * counter
828 */
604 BATADV_CNT_TT_RESPONSE_TX, 829 BATADV_CNT_TT_RESPONSE_TX,
830
831 /**
832 * @BATADV_CNT_TT_RESPONSE_RX: received tt resp traffic packet counter
833 */
605 BATADV_CNT_TT_RESPONSE_RX, 834 BATADV_CNT_TT_RESPONSE_RX,
835
836 /**
837 * @BATADV_CNT_TT_ROAM_ADV_TX: transmitted tt roam traffic packet
838 * counter
839 */
606 BATADV_CNT_TT_ROAM_ADV_TX, 840 BATADV_CNT_TT_ROAM_ADV_TX,
841
842 /**
843 * @BATADV_CNT_TT_ROAM_ADV_RX: received tt roam traffic packet counter
844 */
607 BATADV_CNT_TT_ROAM_ADV_RX, 845 BATADV_CNT_TT_ROAM_ADV_RX,
846
608#ifdef CONFIG_BATMAN_ADV_DAT 847#ifdef CONFIG_BATMAN_ADV_DAT
848 /**
849 * @BATADV_CNT_DAT_GET_TX: transmitted dht GET traffic packet counter
850 */
609 BATADV_CNT_DAT_GET_TX, 851 BATADV_CNT_DAT_GET_TX,
852
853 /** @BATADV_CNT_DAT_GET_RX: received dht GET traffic packet counter */
610 BATADV_CNT_DAT_GET_RX, 854 BATADV_CNT_DAT_GET_RX,
855
856 /**
857 * @BATADV_CNT_DAT_PUT_TX: transmitted dht PUT traffic packet counter
858 */
611 BATADV_CNT_DAT_PUT_TX, 859 BATADV_CNT_DAT_PUT_TX,
860
861 /** @BATADV_CNT_DAT_PUT_RX: received dht PUT traffic packet counter */
612 BATADV_CNT_DAT_PUT_RX, 862 BATADV_CNT_DAT_PUT_RX,
863
864 /**
865 * @BATADV_CNT_DAT_CACHED_REPLY_TX: transmitted dat cache reply traffic
866 * packet counter
867 */
613 BATADV_CNT_DAT_CACHED_REPLY_TX, 868 BATADV_CNT_DAT_CACHED_REPLY_TX,
614#endif 869#endif
870
615#ifdef CONFIG_BATMAN_ADV_NC 871#ifdef CONFIG_BATMAN_ADV_NC
872 /**
873 * @BATADV_CNT_NC_CODE: transmitted nc-combined traffic packet counter
874 */
616 BATADV_CNT_NC_CODE, 875 BATADV_CNT_NC_CODE,
876
877 /**
878 * @BATADV_CNT_NC_CODE_BYTES: transmitted nc-combined traffic bytes
879 * counter
880 */
617 BATADV_CNT_NC_CODE_BYTES, 881 BATADV_CNT_NC_CODE_BYTES,
882
883 /**
884 * @BATADV_CNT_NC_RECODE: transmitted nc-recombined traffic packet
885 * counter
886 */
618 BATADV_CNT_NC_RECODE, 887 BATADV_CNT_NC_RECODE,
888
889 /**
890 * @BATADV_CNT_NC_RECODE_BYTES: transmitted nc-recombined traffic bytes
891 * counter
892 */
619 BATADV_CNT_NC_RECODE_BYTES, 893 BATADV_CNT_NC_RECODE_BYTES,
894
895 /**
896 * @BATADV_CNT_NC_BUFFER: counter for packets buffered for later nc
897 * decoding
898 */
620 BATADV_CNT_NC_BUFFER, 899 BATADV_CNT_NC_BUFFER,
900
901 /**
902 * @BATADV_CNT_NC_DECODE: received and nc-decoded traffic packet counter
903 */
621 BATADV_CNT_NC_DECODE, 904 BATADV_CNT_NC_DECODE,
905
906 /**
907 * @BATADV_CNT_NC_DECODE_BYTES: received and nc-decoded traffic bytes
908 * counter
909 */
622 BATADV_CNT_NC_DECODE_BYTES, 910 BATADV_CNT_NC_DECODE_BYTES,
911
912 /**
913 * @BATADV_CNT_NC_DECODE_FAILED: received and decode-failed traffic
914 * packet counter
915 */
623 BATADV_CNT_NC_DECODE_FAILED, 916 BATADV_CNT_NC_DECODE_FAILED,
917
918 /**
919 * @BATADV_CNT_NC_SNIFFED: counter for nc-decoded packets received in
920 * promisc mode.
921 */
624 BATADV_CNT_NC_SNIFFED, 922 BATADV_CNT_NC_SNIFFED,
625#endif 923#endif
924
925 /** @BATADV_CNT_NUM: number of traffic counters */
626 BATADV_CNT_NUM, 926 BATADV_CNT_NUM,
627}; 927};
628 928
629/** 929/**
630 * struct batadv_priv_tt - per mesh interface translation table data 930 * struct batadv_priv_tt - per mesh interface translation table data
631 * @vn: translation table version number
632 * @ogm_append_cnt: counter of number of OGMs containing the local tt diff
633 * @local_changes: changes registered in an originator interval
634 * @changes_list: tracks tt local changes within an originator interval
635 * @local_hash: local translation table hash table
636 * @global_hash: global translation table hash table
637 * @req_list: list of pending & unanswered tt_requests
638 * @roam_list: list of the last roaming events of each client limiting the
639 * number of roaming events to avoid route flapping
640 * @changes_list_lock: lock protecting changes_list
641 * @req_list_lock: lock protecting req_list
642 * @roam_list_lock: lock protecting roam_list
643 * @last_changeset: last tt changeset this host has generated
644 * @last_changeset_len: length of last tt changeset this host has generated
645 * @last_changeset_lock: lock protecting last_changeset & last_changeset_len
646 * @commit_lock: prevents from executing a local TT commit while reading the
647 * local table. The local TT commit is made up by two operations (data
648 * structure update and metdata -CRC/TTVN- recalculation) and they have to be
649 * executed atomically in order to avoid another thread to read the
650 * table/metadata between those.
651 * @work: work queue callback item for translation table purging
652 */ 931 */
653struct batadv_priv_tt { 932struct batadv_priv_tt {
933 /** @vn: translation table version number */
654 atomic_t vn; 934 atomic_t vn;
935
936 /**
937 * @ogm_append_cnt: counter of number of OGMs containing the local tt
938 * diff
939 */
655 atomic_t ogm_append_cnt; 940 atomic_t ogm_append_cnt;
941
942 /** @local_changes: changes registered in an originator interval */
656 atomic_t local_changes; 943 atomic_t local_changes;
944
945 /**
946 * @changes_list: tracks tt local changes within an originator interval
947 */
657 struct list_head changes_list; 948 struct list_head changes_list;
949
950 /** @local_hash: local translation table hash table */
658 struct batadv_hashtable *local_hash; 951 struct batadv_hashtable *local_hash;
952
953 /** @global_hash: global translation table hash table */
659 struct batadv_hashtable *global_hash; 954 struct batadv_hashtable *global_hash;
955
956 /** @req_list: list of pending & unanswered tt_requests */
660 struct hlist_head req_list; 957 struct hlist_head req_list;
958
959 /**
960 * @roam_list: list of the last roaming events of each client limiting
961 * the number of roaming events to avoid route flapping
962 */
661 struct list_head roam_list; 963 struct list_head roam_list;
662 spinlock_t changes_list_lock; /* protects changes */ 964
663 spinlock_t req_list_lock; /* protects req_list */ 965 /** @changes_list_lock: lock protecting changes_list */
664 spinlock_t roam_list_lock; /* protects roam_list */ 966 spinlock_t changes_list_lock;
967
968 /** @req_list_lock: lock protecting req_list */
969 spinlock_t req_list_lock;
970
971 /** @roam_list_lock: lock protecting roam_list */
972 spinlock_t roam_list_lock;
973
974 /** @last_changeset: last tt changeset this host has generated */
665 unsigned char *last_changeset; 975 unsigned char *last_changeset;
976
977 /**
978 * @last_changeset_len: length of last tt changeset this host has
979 * generated
980 */
666 s16 last_changeset_len; 981 s16 last_changeset_len;
667 /* protects last_changeset & last_changeset_len */ 982
983 /**
984 * @last_changeset_lock: lock protecting last_changeset &
985 * last_changeset_len
986 */
668 spinlock_t last_changeset_lock; 987 spinlock_t last_changeset_lock;
669 /* prevents from executing a commit while reading the table */ 988
989 /**
990 * @commit_lock: prevents from executing a local TT commit while reading
991 * the local table. The local TT commit is made up by two operations
992 * (data structure update and metdata -CRC/TTVN- recalculation) and
993 * they have to be executed atomically in order to avoid another thread
994 * to read the table/metadata between those.
995 */
670 spinlock_t commit_lock; 996 spinlock_t commit_lock;
997
998 /** @work: work queue callback item for translation table purging */
671 struct delayed_work work; 999 struct delayed_work work;
672}; 1000};
673 1001
@@ -675,31 +1003,57 @@ struct batadv_priv_tt {
675 1003
676/** 1004/**
677 * struct batadv_priv_bla - per mesh interface bridge loope avoidance data 1005 * struct batadv_priv_bla - per mesh interface bridge loope avoidance data
678 * @num_requests: number of bla requests in flight
679 * @claim_hash: hash table containing mesh nodes this host has claimed
680 * @backbone_hash: hash table containing all detected backbone gateways
681 * @loopdetect_addr: MAC address used for own loopdetection frames
682 * @loopdetect_lasttime: time when the loopdetection frames were sent
683 * @loopdetect_next: how many periods to wait for the next loopdetect process
684 * @bcast_duplist: recently received broadcast packets array (for broadcast
685 * duplicate suppression)
686 * @bcast_duplist_curr: index of last broadcast packet added to bcast_duplist
687 * @bcast_duplist_lock: lock protecting bcast_duplist & bcast_duplist_curr
688 * @claim_dest: local claim data (e.g. claim group)
689 * @work: work queue callback item for cleanups & bla announcements
690 */ 1006 */
691struct batadv_priv_bla { 1007struct batadv_priv_bla {
1008 /** @num_requests: number of bla requests in flight */
692 atomic_t num_requests; 1009 atomic_t num_requests;
1010
1011 /**
1012 * @claim_hash: hash table containing mesh nodes this host has claimed
1013 */
693 struct batadv_hashtable *claim_hash; 1014 struct batadv_hashtable *claim_hash;
1015
1016 /**
1017 * @backbone_hash: hash table containing all detected backbone gateways
1018 */
694 struct batadv_hashtable *backbone_hash; 1019 struct batadv_hashtable *backbone_hash;
1020
1021 /** @loopdetect_addr: MAC address used for own loopdetection frames */
695 u8 loopdetect_addr[ETH_ALEN]; 1022 u8 loopdetect_addr[ETH_ALEN];
1023
1024 /**
1025 * @loopdetect_lasttime: time when the loopdetection frames were sent
1026 */
696 unsigned long loopdetect_lasttime; 1027 unsigned long loopdetect_lasttime;
1028
1029 /**
1030 * @loopdetect_next: how many periods to wait for the next loopdetect
1031 * process
1032 */
697 atomic_t loopdetect_next; 1033 atomic_t loopdetect_next;
1034
1035 /**
1036 * @bcast_duplist: recently received broadcast packets array (for
1037 * broadcast duplicate suppression)
1038 */
698 struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE]; 1039 struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE];
1040
1041 /**
1042 * @bcast_duplist_curr: index of last broadcast packet added to
1043 * bcast_duplist
1044 */
699 int bcast_duplist_curr; 1045 int bcast_duplist_curr;
700 /* protects bcast_duplist & bcast_duplist_curr */ 1046
1047 /**
1048 * @bcast_duplist_lock: lock protecting bcast_duplist &
1049 * bcast_duplist_curr
1050 */
701 spinlock_t bcast_duplist_lock; 1051 spinlock_t bcast_duplist_lock;
1052
1053 /** @claim_dest: local claim data (e.g. claim group) */
702 struct batadv_bla_claim_dst claim_dest; 1054 struct batadv_bla_claim_dst claim_dest;
1055
1056 /** @work: work queue callback item for cleanups & bla announcements */
703 struct delayed_work work; 1057 struct delayed_work work;
704}; 1058};
705#endif 1059#endif
@@ -708,68 +1062,94 @@ struct batadv_priv_bla {
708 1062
709/** 1063/**
710 * struct batadv_priv_debug_log - debug logging data 1064 * struct batadv_priv_debug_log - debug logging data
711 * @log_buff: buffer holding the logs (ring bufer)
712 * @log_start: index of next character to read
713 * @log_end: index of next character to write
714 * @lock: lock protecting log_buff, log_start & log_end
715 * @queue_wait: log reader's wait queue
716 */ 1065 */
717struct batadv_priv_debug_log { 1066struct batadv_priv_debug_log {
1067 /** @log_buff: buffer holding the logs (ring bufer) */
718 char log_buff[BATADV_LOG_BUF_LEN]; 1068 char log_buff[BATADV_LOG_BUF_LEN];
1069
1070 /** @log_start: index of next character to read */
719 unsigned long log_start; 1071 unsigned long log_start;
1072
1073 /** @log_end: index of next character to write */
720 unsigned long log_end; 1074 unsigned long log_end;
721 spinlock_t lock; /* protects log_buff, log_start and log_end */ 1075
1076 /** @lock: lock protecting log_buff, log_start & log_end */
1077 spinlock_t lock;
1078
1079 /** @queue_wait: log reader's wait queue */
722 wait_queue_head_t queue_wait; 1080 wait_queue_head_t queue_wait;
723}; 1081};
724#endif 1082#endif
725 1083
726/** 1084/**
727 * struct batadv_priv_gw - per mesh interface gateway data 1085 * struct batadv_priv_gw - per mesh interface gateway data
728 * @gateway_list: list of available gateway nodes
729 * @list_lock: lock protecting gateway_list & curr_gw
730 * @curr_gw: pointer to currently selected gateway node
731 * @mode: gateway operation: off, client or server (see batadv_gw_modes)
732 * @sel_class: gateway selection class (applies if gw_mode client)
733 * @bandwidth_down: advertised uplink download bandwidth (if gw_mode server)
734 * @bandwidth_up: advertised uplink upload bandwidth (if gw_mode server)
735 * @reselect: bool indicating a gateway re-selection is in progress
736 */ 1086 */
737struct batadv_priv_gw { 1087struct batadv_priv_gw {
1088 /** @gateway_list: list of available gateway nodes */
738 struct hlist_head gateway_list; 1089 struct hlist_head gateway_list;
739 spinlock_t list_lock; /* protects gateway_list & curr_gw */ 1090
740 struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */ 1091 /** @list_lock: lock protecting gateway_list & curr_gw */
1092 spinlock_t list_lock;
1093
1094 /** @curr_gw: pointer to currently selected gateway node */
1095 struct batadv_gw_node __rcu *curr_gw;
1096
1097 /**
1098 * @mode: gateway operation: off, client or server (see batadv_gw_modes)
1099 */
741 atomic_t mode; 1100 atomic_t mode;
1101
1102 /** @sel_class: gateway selection class (applies if gw_mode client) */
742 atomic_t sel_class; 1103 atomic_t sel_class;
1104
1105 /**
1106 * @bandwidth_down: advertised uplink download bandwidth (if gw_mode
1107 * server)
1108 */
743 atomic_t bandwidth_down; 1109 atomic_t bandwidth_down;
1110
1111 /**
1112 * @bandwidth_up: advertised uplink upload bandwidth (if gw_mode server)
1113 */
744 atomic_t bandwidth_up; 1114 atomic_t bandwidth_up;
1115
1116 /** @reselect: bool indicating a gateway re-selection is in progress */
745 atomic_t reselect; 1117 atomic_t reselect;
746}; 1118};
747 1119
748/** 1120/**
749 * struct batadv_priv_tvlv - per mesh interface tvlv data 1121 * struct batadv_priv_tvlv - per mesh interface tvlv data
750 * @container_list: list of registered tvlv containers to be sent with each OGM
751 * @handler_list: list of the various tvlv content handlers
752 * @container_list_lock: protects tvlv container list access
753 * @handler_list_lock: protects handler list access
754 */ 1122 */
755struct batadv_priv_tvlv { 1123struct batadv_priv_tvlv {
1124 /**
1125 * @container_list: list of registered tvlv containers to be sent with
1126 * each OGM
1127 */
756 struct hlist_head container_list; 1128 struct hlist_head container_list;
1129
1130 /** @handler_list: list of the various tvlv content handlers */
757 struct hlist_head handler_list; 1131 struct hlist_head handler_list;
758 spinlock_t container_list_lock; /* protects container_list */ 1132
759 spinlock_t handler_list_lock; /* protects handler_list */ 1133 /** @container_list_lock: protects tvlv container list access */
1134 spinlock_t container_list_lock;
1135
1136 /** @handler_list_lock: protects handler list access */
1137 spinlock_t handler_list_lock;
760}; 1138};
761 1139
762#ifdef CONFIG_BATMAN_ADV_DAT 1140#ifdef CONFIG_BATMAN_ADV_DAT
763 1141
764/** 1142/**
765 * struct batadv_priv_dat - per mesh interface DAT private data 1143 * struct batadv_priv_dat - per mesh interface DAT private data
766 * @addr: node DAT address
767 * @hash: hashtable representing the local ARP cache
768 * @work: work queue callback item for cache purging
769 */ 1144 */
770struct batadv_priv_dat { 1145struct batadv_priv_dat {
1146 /** @addr: node DAT address */
771 batadv_dat_addr_t addr; 1147 batadv_dat_addr_t addr;
1148
1149 /** @hash: hashtable representing the local ARP cache */
772 struct batadv_hashtable *hash; 1150 struct batadv_hashtable *hash;
1151
1152 /** @work: work queue callback item for cache purging */
773 struct delayed_work work; 1153 struct delayed_work work;
774}; 1154};
775#endif 1155#endif
@@ -777,375 +1157,582 @@ struct batadv_priv_dat {
777#ifdef CONFIG_BATMAN_ADV_MCAST 1157#ifdef CONFIG_BATMAN_ADV_MCAST
778/** 1158/**
779 * struct batadv_mcast_querier_state - IGMP/MLD querier state when bridged 1159 * struct batadv_mcast_querier_state - IGMP/MLD querier state when bridged
780 * @exists: whether a querier exists in the mesh
781 * @shadowing: if a querier exists, whether it is potentially shadowing
782 * multicast listeners (i.e. querier is behind our own bridge segment)
783 */ 1160 */
784struct batadv_mcast_querier_state { 1161struct batadv_mcast_querier_state {
1162 /** @exists: whether a querier exists in the mesh */
785 bool exists; 1163 bool exists;
1164
1165 /**
1166 * @shadowing: if a querier exists, whether it is potentially shadowing
1167 * multicast listeners (i.e. querier is behind our own bridge segment)
1168 */
786 bool shadowing; 1169 bool shadowing;
787}; 1170};
788 1171
789/** 1172/**
790 * struct batadv_priv_mcast - per mesh interface mcast data 1173 * struct batadv_priv_mcast - per mesh interface mcast data
791 * @mla_list: list of multicast addresses we are currently announcing via TT
792 * @want_all_unsnoopables_list: a list of orig_nodes wanting all unsnoopable
793 * multicast traffic
794 * @want_all_ipv4_list: a list of orig_nodes wanting all IPv4 multicast traffic
795 * @want_all_ipv6_list: a list of orig_nodes wanting all IPv6 multicast traffic
796 * @querier_ipv4: the current state of an IGMP querier in the mesh
797 * @querier_ipv6: the current state of an MLD querier in the mesh
798 * @flags: the flags we have last sent in our mcast tvlv
799 * @enabled: whether the multicast tvlv is currently enabled
800 * @bridged: whether the soft interface has a bridge on top
801 * @num_disabled: number of nodes that have no mcast tvlv
802 * @num_want_all_unsnoopables: number of nodes wanting unsnoopable IP traffic
803 * @num_want_all_ipv4: counter for items in want_all_ipv4_list
804 * @num_want_all_ipv6: counter for items in want_all_ipv6_list
805 * @want_lists_lock: lock for protecting modifications to mcast want lists
806 * (traversals are rcu-locked)
807 * @work: work queue callback item for multicast TT and TVLV updates
808 */ 1174 */
809struct batadv_priv_mcast { 1175struct batadv_priv_mcast {
1176 /**
1177 * @mla_list: list of multicast addresses we are currently announcing
1178 * via TT
1179 */
810 struct hlist_head mla_list; /* see __batadv_mcast_mla_update() */ 1180 struct hlist_head mla_list; /* see __batadv_mcast_mla_update() */
1181
1182 /**
1183 * @want_all_unsnoopables_list: a list of orig_nodes wanting all
1184 * unsnoopable multicast traffic
1185 */
811 struct hlist_head want_all_unsnoopables_list; 1186 struct hlist_head want_all_unsnoopables_list;
1187
1188 /**
1189 * @want_all_ipv4_list: a list of orig_nodes wanting all IPv4 multicast
1190 * traffic
1191 */
812 struct hlist_head want_all_ipv4_list; 1192 struct hlist_head want_all_ipv4_list;
1193
1194 /**
1195 * @want_all_ipv6_list: a list of orig_nodes wanting all IPv6 multicast
1196 * traffic
1197 */
813 struct hlist_head want_all_ipv6_list; 1198 struct hlist_head want_all_ipv6_list;
1199
1200 /** @querier_ipv4: the current state of an IGMP querier in the mesh */
814 struct batadv_mcast_querier_state querier_ipv4; 1201 struct batadv_mcast_querier_state querier_ipv4;
1202
1203 /** @querier_ipv6: the current state of an MLD querier in the mesh */
815 struct batadv_mcast_querier_state querier_ipv6; 1204 struct batadv_mcast_querier_state querier_ipv6;
1205
1206 /** @flags: the flags we have last sent in our mcast tvlv */
816 u8 flags; 1207 u8 flags;
1208
1209 /** @enabled: whether the multicast tvlv is currently enabled */
817 bool enabled; 1210 bool enabled;
1211
1212 /** @bridged: whether the soft interface has a bridge on top */
818 bool bridged; 1213 bool bridged;
1214
1215 /** @num_disabled: number of nodes that have no mcast tvlv */
819 atomic_t num_disabled; 1216 atomic_t num_disabled;
1217
1218 /**
1219 * @num_want_all_unsnoopables: number of nodes wanting unsnoopable IP
1220 * traffic
1221 */
820 atomic_t num_want_all_unsnoopables; 1222 atomic_t num_want_all_unsnoopables;
1223
1224 /** @num_want_all_ipv4: counter for items in want_all_ipv4_list */
821 atomic_t num_want_all_ipv4; 1225 atomic_t num_want_all_ipv4;
1226
1227 /** @num_want_all_ipv6: counter for items in want_all_ipv6_list */
822 atomic_t num_want_all_ipv6; 1228 atomic_t num_want_all_ipv6;
823 /* protects want_all_{unsnoopables,ipv4,ipv6}_list */ 1229
1230 /**
1231 * @want_lists_lock: lock for protecting modifications to mcasts
1232 * want_all_{unsnoopables,ipv4,ipv6}_list (traversals are rcu-locked)
1233 */
824 spinlock_t want_lists_lock; 1234 spinlock_t want_lists_lock;
1235
1236 /** @work: work queue callback item for multicast TT and TVLV updates */
825 struct delayed_work work; 1237 struct delayed_work work;
826}; 1238};
827#endif 1239#endif
828 1240
829/** 1241/**
830 * struct batadv_priv_nc - per mesh interface network coding private data 1242 * struct batadv_priv_nc - per mesh interface network coding private data
831 * @work: work queue callback item for cleanup
832 * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
833 * @min_tq: only consider neighbors for encoding if neigh_tq > min_tq
834 * @max_fwd_delay: maximum packet forward delay to allow coding of packets
835 * @max_buffer_time: buffer time for sniffed packets used to decoding
836 * @timestamp_fwd_flush: timestamp of last forward packet queue flush
837 * @timestamp_sniffed_purge: timestamp of last sniffed packet queue purge
838 * @coding_hash: Hash table used to buffer skbs while waiting for another
839 * incoming skb to code it with. Skbs are added to the buffer just before being
840 * forwarded in routing.c
841 * @decoding_hash: Hash table used to buffer skbs that might be needed to decode
842 * a received coded skb. The buffer is used for 1) skbs arriving on the
843 * soft-interface; 2) skbs overheard on the hard-interface; and 3) skbs
844 * forwarded by batman-adv.
845 */ 1243 */
846struct batadv_priv_nc { 1244struct batadv_priv_nc {
1245 /** @work: work queue callback item for cleanup */
847 struct delayed_work work; 1246 struct delayed_work work;
1247
1248 /**
1249 * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
1250 */
848 struct dentry *debug_dir; 1251 struct dentry *debug_dir;
1252
1253 /**
1254 * @min_tq: only consider neighbors for encoding if neigh_tq > min_tq
1255 */
849 u8 min_tq; 1256 u8 min_tq;
1257
1258 /**
1259 * @max_fwd_delay: maximum packet forward delay to allow coding of
1260 * packets
1261 */
850 u32 max_fwd_delay; 1262 u32 max_fwd_delay;
1263
1264 /**
1265 * @max_buffer_time: buffer time for sniffed packets used to decoding
1266 */
851 u32 max_buffer_time; 1267 u32 max_buffer_time;
1268
1269 /**
1270 * @timestamp_fwd_flush: timestamp of last forward packet queue flush
1271 */
852 unsigned long timestamp_fwd_flush; 1272 unsigned long timestamp_fwd_flush;
1273
1274 /**
1275 * @timestamp_sniffed_purge: timestamp of last sniffed packet queue
1276 * purge
1277 */
853 unsigned long timestamp_sniffed_purge; 1278 unsigned long timestamp_sniffed_purge;
1279
1280 /**
1281 * @coding_hash: Hash table used to buffer skbs while waiting for
1282 * another incoming skb to code it with. Skbs are added to the buffer
1283 * just before being forwarded in routing.c
1284 */
854 struct batadv_hashtable *coding_hash; 1285 struct batadv_hashtable *coding_hash;
1286
1287 /**
1288 * @decoding_hash: Hash table used to buffer skbs that might be needed
1289 * to decode a received coded skb. The buffer is used for 1) skbs
1290 * arriving on the soft-interface; 2) skbs overheard on the
1291 * hard-interface; and 3) skbs forwarded by batman-adv.
1292 */
855 struct batadv_hashtable *decoding_hash; 1293 struct batadv_hashtable *decoding_hash;
856}; 1294};
857 1295
858/** 1296/**
859 * struct batadv_tp_unacked - unacked packet meta-information 1297 * struct batadv_tp_unacked - unacked packet meta-information
860 * @seqno: seqno of the unacked packet
861 * @len: length of the packet
862 * @list: list node for batadv_tp_vars::unacked_list
863 * 1298 *
864 * This struct is supposed to represent a buffer unacked packet. However, since 1299 * This struct is supposed to represent a buffer unacked packet. However, since
865 * the purpose of the TP meter is to count the traffic only, there is no need to 1300 * the purpose of the TP meter is to count the traffic only, there is no need to
866 * store the entire sk_buff, the starting offset and the length are enough 1301 * store the entire sk_buff, the starting offset and the length are enough
867 */ 1302 */
868struct batadv_tp_unacked { 1303struct batadv_tp_unacked {
1304 /** @seqno: seqno of the unacked packet */
869 u32 seqno; 1305 u32 seqno;
1306
1307 /** @len: length of the packet */
870 u16 len; 1308 u16 len;
1309
1310 /** @list: list node for &batadv_tp_vars.unacked_list */
871 struct list_head list; 1311 struct list_head list;
872}; 1312};
873 1313
874/** 1314/**
875 * enum batadv_tp_meter_role - Modus in tp meter session 1315 * enum batadv_tp_meter_role - Modus in tp meter session
876 * @BATADV_TP_RECEIVER: Initialized as receiver
877 * @BATADV_TP_SENDER: Initialized as sender
878 */ 1316 */
879enum batadv_tp_meter_role { 1317enum batadv_tp_meter_role {
1318 /** @BATADV_TP_RECEIVER: Initialized as receiver */
880 BATADV_TP_RECEIVER, 1319 BATADV_TP_RECEIVER,
1320
1321 /** @BATADV_TP_SENDER: Initialized as sender */
881 BATADV_TP_SENDER 1322 BATADV_TP_SENDER
882}; 1323};
883 1324
884/** 1325/**
885 * struct batadv_tp_vars - tp meter private variables per session 1326 * struct batadv_tp_vars - tp meter private variables per session
886 * @list: list node for bat_priv::tp_list
887 * @timer: timer for ack (receiver) and retry (sender)
888 * @bat_priv: pointer to the mesh object
889 * @start_time: start time in jiffies
890 * @other_end: mac address of remote
891 * @role: receiver/sender modi
892 * @sending: sending binary semaphore: 1 if sending, 0 is not
893 * @reason: reason for a stopped session
894 * @finish_work: work item for the finishing procedure
895 * @test_length: test length in milliseconds
896 * @session: TP session identifier
897 * @icmp_uid: local ICMP "socket" index
898 * @dec_cwnd: decimal part of the cwnd used during linear growth
899 * @cwnd: current size of the congestion window
900 * @cwnd_lock: lock do protect @cwnd & @dec_cwnd
901 * @ss_threshold: Slow Start threshold. Once cwnd exceeds this value the
902 * connection switches to the Congestion Avoidance state
903 * @last_acked: last acked byte
904 * @last_sent: last sent byte, not yet acked
905 * @tot_sent: amount of data sent/ACKed so far
906 * @dup_acks: duplicate ACKs counter
907 * @fast_recovery: true if in Fast Recovery mode
908 * @recover: last sent seqno when entering Fast Recovery
909 * @rto: sender timeout
910 * @srtt: smoothed RTT scaled by 2^3
911 * @rttvar: RTT variation scaled by 2^2
912 * @more_bytes: waiting queue anchor when waiting for more ack/retry timeout
913 * @prerandom_offset: offset inside the prerandom buffer
914 * @prerandom_lock: spinlock protecting access to prerandom_offset
915 * @last_recv: last in-order received packet
916 * @unacked_list: list of unacked packets (meta-info only)
917 * @unacked_lock: protect unacked_list
918 * @last_recv_time: time time (jiffies) a msg was received
919 * @refcount: number of context where the object is used
920 * @rcu: struct used for freeing in an RCU-safe manner
921 */ 1327 */
922struct batadv_tp_vars { 1328struct batadv_tp_vars {
1329 /** @list: list node for &bat_priv.tp_list */
923 struct hlist_node list; 1330 struct hlist_node list;
1331
1332 /** @timer: timer for ack (receiver) and retry (sender) */
924 struct timer_list timer; 1333 struct timer_list timer;
1334
1335 /** @bat_priv: pointer to the mesh object */
925 struct batadv_priv *bat_priv; 1336 struct batadv_priv *bat_priv;
1337
1338 /** @start_time: start time in jiffies */
926 unsigned long start_time; 1339 unsigned long start_time;
1340
1341 /** @other_end: mac address of remote */
927 u8 other_end[ETH_ALEN]; 1342 u8 other_end[ETH_ALEN];
1343
1344 /** @role: receiver/sender modi */
928 enum batadv_tp_meter_role role; 1345 enum batadv_tp_meter_role role;
1346
1347 /** @sending: sending binary semaphore: 1 if sending, 0 is not */
929 atomic_t sending; 1348 atomic_t sending;
1349
1350 /** @reason: reason for a stopped session */
930 enum batadv_tp_meter_reason reason; 1351 enum batadv_tp_meter_reason reason;
1352
1353 /** @finish_work: work item for the finishing procedure */
931 struct delayed_work finish_work; 1354 struct delayed_work finish_work;
1355
1356 /** @test_length: test length in milliseconds */
932 u32 test_length; 1357 u32 test_length;
1358
1359 /** @session: TP session identifier */
933 u8 session[2]; 1360 u8 session[2];
1361
1362 /** @icmp_uid: local ICMP "socket" index */
934 u8 icmp_uid; 1363 u8 icmp_uid;
935 1364
936 /* sender variables */ 1365 /* sender variables */
1366
1367 /** @dec_cwnd: decimal part of the cwnd used during linear growth */
937 u16 dec_cwnd; 1368 u16 dec_cwnd;
1369
1370 /** @cwnd: current size of the congestion window */
938 u32 cwnd; 1371 u32 cwnd;
939 spinlock_t cwnd_lock; /* Protects cwnd & dec_cwnd */ 1372
1373 /** @cwnd_lock: lock do protect @cwnd & @dec_cwnd */
1374 spinlock_t cwnd_lock;
1375
1376 /**
1377 * @ss_threshold: Slow Start threshold. Once cwnd exceeds this value the
1378 * connection switches to the Congestion Avoidance state
1379 */
940 u32 ss_threshold; 1380 u32 ss_threshold;
1381
1382 /** @last_acked: last acked byte */
941 atomic_t last_acked; 1383 atomic_t last_acked;
1384
1385 /** @last_sent: last sent byte, not yet acked */
942 u32 last_sent; 1386 u32 last_sent;
1387
1388 /** @tot_sent: amount of data sent/ACKed so far */
943 atomic64_t tot_sent; 1389 atomic64_t tot_sent;
1390
1391 /** @dup_acks: duplicate ACKs counter */
944 atomic_t dup_acks; 1392 atomic_t dup_acks;
1393
1394 /** @fast_recovery: true if in Fast Recovery mode */
945 bool fast_recovery; 1395 bool fast_recovery;
1396
1397 /** @recover: last sent seqno when entering Fast Recovery */
946 u32 recover; 1398 u32 recover;
1399
1400 /** @rto: sender timeout */
947 u32 rto; 1401 u32 rto;
1402
1403 /** @srtt: smoothed RTT scaled by 2^3 */
948 u32 srtt; 1404 u32 srtt;
1405
1406 /** @rttvar: RTT variation scaled by 2^2 */
949 u32 rttvar; 1407 u32 rttvar;
1408
1409 /**
1410 * @more_bytes: waiting queue anchor when waiting for more ack/retry
1411 * timeout
1412 */
950 wait_queue_head_t more_bytes; 1413 wait_queue_head_t more_bytes;
1414
1415 /** @prerandom_offset: offset inside the prerandom buffer */
951 u32 prerandom_offset; 1416 u32 prerandom_offset;
952 spinlock_t prerandom_lock; /* Protects prerandom_offset */ 1417
1418 /** @prerandom_lock: spinlock protecting access to prerandom_offset */
1419 spinlock_t prerandom_lock;
953 1420
954 /* receiver variables */ 1421 /* receiver variables */
1422
1423 /** @last_recv: last in-order received packet */
955 u32 last_recv; 1424 u32 last_recv;
1425
1426 /** @unacked_list: list of unacked packets (meta-info only) */
956 struct list_head unacked_list; 1427 struct list_head unacked_list;
957 spinlock_t unacked_lock; /* Protects unacked_list */ 1428
1429 /** @unacked_lock: protect unacked_list */
1430 spinlock_t unacked_lock;
1431
1432 /** @last_recv_time: time time (jiffies) a msg was received */
958 unsigned long last_recv_time; 1433 unsigned long last_recv_time;
1434
1435 /** @refcount: number of context where the object is used */
959 struct kref refcount; 1436 struct kref refcount;
1437
1438 /** @rcu: struct used for freeing in an RCU-safe manner */
960 struct rcu_head rcu; 1439 struct rcu_head rcu;
961}; 1440};
962 1441
963/** 1442/**
964 * struct batadv_softif_vlan - per VLAN attributes set 1443 * struct batadv_softif_vlan - per VLAN attributes set
965 * @bat_priv: pointer to the mesh object
966 * @vid: VLAN identifier
967 * @kobj: kobject for sysfs vlan subdirectory
968 * @ap_isolation: AP isolation state
969 * @tt: TT private attributes (VLAN specific)
970 * @list: list node for bat_priv::softif_vlan_list
971 * @refcount: number of context where this object is currently in use
972 * @rcu: struct used for freeing in a RCU-safe manner
973 */ 1444 */
974struct batadv_softif_vlan { 1445struct batadv_softif_vlan {
1446 /** @bat_priv: pointer to the mesh object */
975 struct batadv_priv *bat_priv; 1447 struct batadv_priv *bat_priv;
1448
1449 /** @vid: VLAN identifier */
976 unsigned short vid; 1450 unsigned short vid;
1451
1452 /** @kobj: kobject for sysfs vlan subdirectory */
977 struct kobject *kobj; 1453 struct kobject *kobj;
1454
1455 /** @ap_isolation: AP isolation state */
978 atomic_t ap_isolation; /* boolean */ 1456 atomic_t ap_isolation; /* boolean */
1457
1458 /** @tt: TT private attributes (VLAN specific) */
979 struct batadv_vlan_tt tt; 1459 struct batadv_vlan_tt tt;
1460
1461 /** @list: list node for &bat_priv.softif_vlan_list */
980 struct hlist_node list; 1462 struct hlist_node list;
1463
1464 /**
1465 * @refcount: number of context where this object is currently in use
1466 */
981 struct kref refcount; 1467 struct kref refcount;
1468
1469 /** @rcu: struct used for freeing in a RCU-safe manner */
982 struct rcu_head rcu; 1470 struct rcu_head rcu;
983}; 1471};
984 1472
985/** 1473/**
986 * struct batadv_priv_bat_v - B.A.T.M.A.N. V per soft-interface private data 1474 * struct batadv_priv_bat_v - B.A.T.M.A.N. V per soft-interface private data
987 * @ogm_buff: buffer holding the OGM packet
988 * @ogm_buff_len: length of the OGM packet buffer
989 * @ogm_seqno: OGM sequence number - used to identify each OGM
990 * @ogm_wq: workqueue used to schedule OGM transmissions
991 */ 1475 */
992struct batadv_priv_bat_v { 1476struct batadv_priv_bat_v {
1477 /** @ogm_buff: buffer holding the OGM packet */
993 unsigned char *ogm_buff; 1478 unsigned char *ogm_buff;
1479
1480 /** @ogm_buff_len: length of the OGM packet buffer */
994 int ogm_buff_len; 1481 int ogm_buff_len;
1482
1483 /** @ogm_seqno: OGM sequence number - used to identify each OGM */
995 atomic_t ogm_seqno; 1484 atomic_t ogm_seqno;
1485
1486 /** @ogm_wq: workqueue used to schedule OGM transmissions */
996 struct delayed_work ogm_wq; 1487 struct delayed_work ogm_wq;
997}; 1488};
998 1489
999/** 1490/**
1000 * struct batadv_priv - per mesh interface data 1491 * struct batadv_priv - per mesh interface data
1001 * @mesh_state: current status of the mesh (inactive/active/deactivating)
1002 * @soft_iface: net device which holds this struct as private data
1003 * @bat_counters: mesh internal traffic statistic counters (see batadv_counters)
1004 * @aggregated_ogms: bool indicating whether OGM aggregation is enabled
1005 * @bonding: bool indicating whether traffic bonding is enabled
1006 * @fragmentation: bool indicating whether traffic fragmentation is enabled
1007 * @packet_size_max: max packet size that can be transmitted via
1008 * multiple fragmented skbs or a single frame if fragmentation is disabled
1009 * @frag_seqno: incremental counter to identify chains of egress fragments
1010 * @bridge_loop_avoidance: bool indicating whether bridge loop avoidance is
1011 * enabled
1012 * @distributed_arp_table: bool indicating whether distributed ARP table is
1013 * enabled
1014 * @multicast_mode: Enable or disable multicast optimizations on this node's
1015 * sender/originating side
1016 * @orig_interval: OGM broadcast interval in milliseconds
1017 * @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop
1018 * @log_level: configured log level (see batadv_dbg_level)
1019 * @isolation_mark: the skb->mark value used to match packets for AP isolation
1020 * @isolation_mark_mask: bitmask identifying the bits in skb->mark to be used
1021 * for the isolation mark
1022 * @bcast_seqno: last sent broadcast packet sequence number
1023 * @bcast_queue_left: number of remaining buffered broadcast packet slots
1024 * @batman_queue_left: number of remaining OGM packet slots
1025 * @num_ifaces: number of interfaces assigned to this mesh interface
1026 * @mesh_obj: kobject for sysfs mesh subdirectory
1027 * @debug_dir: dentry for debugfs batman-adv subdirectory
1028 * @forw_bat_list: list of aggregated OGMs that will be forwarded
1029 * @forw_bcast_list: list of broadcast packets that will be rebroadcasted
1030 * @tp_list: list of tp sessions
1031 * @tp_num: number of currently active tp sessions
1032 * @orig_hash: hash table containing mesh participants (orig nodes)
1033 * @forw_bat_list_lock: lock protecting forw_bat_list
1034 * @forw_bcast_list_lock: lock protecting forw_bcast_list
1035 * @tp_list_lock: spinlock protecting @tp_list
1036 * @orig_work: work queue callback item for orig node purging
1037 * @primary_if: one of the hard-interfaces assigned to this mesh interface
1038 * becomes the primary interface
1039 * @algo_ops: routing algorithm used by this mesh interface
1040 * @softif_vlan_list: a list of softif_vlan structs, one per VLAN created on top
1041 * of the mesh interface represented by this object
1042 * @softif_vlan_list_lock: lock protecting softif_vlan_list
1043 * @bla: bridge loope avoidance data
1044 * @debug_log: holding debug logging relevant data
1045 * @gw: gateway data
1046 * @tt: translation table data
1047 * @tvlv: type-version-length-value data
1048 * @dat: distributed arp table data
1049 * @mcast: multicast data
1050 * @network_coding: bool indicating whether network coding is enabled
1051 * @nc: network coding data
1052 * @bat_v: B.A.T.M.A.N. V per soft-interface private data
1053 */ 1492 */
1054struct batadv_priv { 1493struct batadv_priv {
1494 /**
1495 * @mesh_state: current status of the mesh
1496 * (inactive/active/deactivating)
1497 */
1055 atomic_t mesh_state; 1498 atomic_t mesh_state;
1499
1500 /** @soft_iface: net device which holds this struct as private data */
1056 struct net_device *soft_iface; 1501 struct net_device *soft_iface;
1502
1503 /**
1504 * @bat_counters: mesh internal traffic statistic counters (see
1505 * batadv_counters)
1506 */
1057 u64 __percpu *bat_counters; /* Per cpu counters */ 1507 u64 __percpu *bat_counters; /* Per cpu counters */
1508
1509 /**
1510 * @aggregated_ogms: bool indicating whether OGM aggregation is enabled
1511 */
1058 atomic_t aggregated_ogms; 1512 atomic_t aggregated_ogms;
1513
1514 /** @bonding: bool indicating whether traffic bonding is enabled */
1059 atomic_t bonding; 1515 atomic_t bonding;
1516
1517 /**
1518 * @fragmentation: bool indicating whether traffic fragmentation is
1519 * enabled
1520 */
1060 atomic_t fragmentation; 1521 atomic_t fragmentation;
1522
1523 /**
1524 * @packet_size_max: max packet size that can be transmitted via
1525 * multiple fragmented skbs or a single frame if fragmentation is
1526 * disabled
1527 */
1061 atomic_t packet_size_max; 1528 atomic_t packet_size_max;
1529
1530 /**
1531 * @frag_seqno: incremental counter to identify chains of egress
1532 * fragments
1533 */
1062 atomic_t frag_seqno; 1534 atomic_t frag_seqno;
1535
1063#ifdef CONFIG_BATMAN_ADV_BLA 1536#ifdef CONFIG_BATMAN_ADV_BLA
1537 /**
1538 * @bridge_loop_avoidance: bool indicating whether bridge loop
1539 * avoidance is enabled
1540 */
1064 atomic_t bridge_loop_avoidance; 1541 atomic_t bridge_loop_avoidance;
1065#endif 1542#endif
1543
1066#ifdef CONFIG_BATMAN_ADV_DAT 1544#ifdef CONFIG_BATMAN_ADV_DAT
1545 /**
1546 * @distributed_arp_table: bool indicating whether distributed ARP table
1547 * is enabled
1548 */
1067 atomic_t distributed_arp_table; 1549 atomic_t distributed_arp_table;
1068#endif 1550#endif
1551
1069#ifdef CONFIG_BATMAN_ADV_MCAST 1552#ifdef CONFIG_BATMAN_ADV_MCAST
1553 /**
1554 * @multicast_mode: Enable or disable multicast optimizations on this
1555 * node's sender/originating side
1556 */
1070 atomic_t multicast_mode; 1557 atomic_t multicast_mode;
1071#endif 1558#endif
1559
1560 /** @orig_interval: OGM broadcast interval in milliseconds */
1072 atomic_t orig_interval; 1561 atomic_t orig_interval;
1562
1563 /**
1564 * @hop_penalty: penalty which will be applied to an OGM's tq-field on
1565 * every hop
1566 */
1073 atomic_t hop_penalty; 1567 atomic_t hop_penalty;
1568
1074#ifdef CONFIG_BATMAN_ADV_DEBUG 1569#ifdef CONFIG_BATMAN_ADV_DEBUG
1570 /** @log_level: configured log level (see batadv_dbg_level) */
1075 atomic_t log_level; 1571 atomic_t log_level;
1076#endif 1572#endif
1573
1574 /**
1575 * @isolation_mark: the skb->mark value used to match packets for AP
1576 * isolation
1577 */
1077 u32 isolation_mark; 1578 u32 isolation_mark;
1579
1580 /**
1581 * @isolation_mark_mask: bitmask identifying the bits in skb->mark to be
1582 * used for the isolation mark
1583 */
1078 u32 isolation_mark_mask; 1584 u32 isolation_mark_mask;
1585
1586 /** @bcast_seqno: last sent broadcast packet sequence number */
1079 atomic_t bcast_seqno; 1587 atomic_t bcast_seqno;
1588
1589 /**
1590 * @bcast_queue_left: number of remaining buffered broadcast packet
1591 * slots
1592 */
1080 atomic_t bcast_queue_left; 1593 atomic_t bcast_queue_left;
1594
1595 /** @batman_queue_left: number of remaining OGM packet slots */
1081 atomic_t batman_queue_left; 1596 atomic_t batman_queue_left;
1597
1598 /** @num_ifaces: number of interfaces assigned to this mesh interface */
1082 char num_ifaces; 1599 char num_ifaces;
1600
1601 /** @mesh_obj: kobject for sysfs mesh subdirectory */
1083 struct kobject *mesh_obj; 1602 struct kobject *mesh_obj;
1603
1604 /** @debug_dir: dentry for debugfs batman-adv subdirectory */
1084 struct dentry *debug_dir; 1605 struct dentry *debug_dir;
1606
1607 /** @forw_bat_list: list of aggregated OGMs that will be forwarded */
1085 struct hlist_head forw_bat_list; 1608 struct hlist_head forw_bat_list;
1609
1610 /**
1611 * @forw_bcast_list: list of broadcast packets that will be
1612 * rebroadcasted
1613 */
1086 struct hlist_head forw_bcast_list; 1614 struct hlist_head forw_bcast_list;
1615
1616 /** @tp_list: list of tp sessions */
1087 struct hlist_head tp_list; 1617 struct hlist_head tp_list;
1618
1619 /** @tp_num: number of currently active tp sessions */
1088 struct batadv_hashtable *orig_hash; 1620 struct batadv_hashtable *orig_hash;
1089 spinlock_t forw_bat_list_lock; /* protects forw_bat_list */ 1621
1090 spinlock_t forw_bcast_list_lock; /* protects forw_bcast_list */ 1622 /** @orig_hash: hash table containing mesh participants (orig nodes) */
1091 spinlock_t tp_list_lock; /* protects tp_list */ 1623 spinlock_t forw_bat_list_lock;
1624
1625 /** @forw_bat_list_lock: lock protecting forw_bat_list */
1626 spinlock_t forw_bcast_list_lock;
1627
1628 /** @forw_bcast_list_lock: lock protecting forw_bcast_list */
1629 spinlock_t tp_list_lock;
1630
1631 /** @tp_list_lock: spinlock protecting @tp_list */
1092 atomic_t tp_num; 1632 atomic_t tp_num;
1633
1634 /** @orig_work: work queue callback item for orig node purging */
1093 struct delayed_work orig_work; 1635 struct delayed_work orig_work;
1636
1637 /**
1638 * @primary_if: one of the hard-interfaces assigned to this mesh
1639 * interface becomes the primary interface
1640 */
1094 struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */ 1641 struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */
1642
1643 /** @algo_ops: routing algorithm used by this mesh interface */
1095 struct batadv_algo_ops *algo_ops; 1644 struct batadv_algo_ops *algo_ops;
1645
1646 /**
1647 * @softif_vlan_list: a list of softif_vlan structs, one per VLAN
1648 * created on top of the mesh interface represented by this object
1649 */
1096 struct hlist_head softif_vlan_list; 1650 struct hlist_head softif_vlan_list;
1097 spinlock_t softif_vlan_list_lock; /* protects softif_vlan_list */ 1651
1652 /** @softif_vlan_list_lock: lock protecting softif_vlan_list */
1653 spinlock_t softif_vlan_list_lock;
1654
1098#ifdef CONFIG_BATMAN_ADV_BLA 1655#ifdef CONFIG_BATMAN_ADV_BLA
1656 /** @bla: bridge loope avoidance data */
1099 struct batadv_priv_bla bla; 1657 struct batadv_priv_bla bla;
1100#endif 1658#endif
1659
1101#ifdef CONFIG_BATMAN_ADV_DEBUG 1660#ifdef CONFIG_BATMAN_ADV_DEBUG
1661 /** @debug_log: holding debug logging relevant data */
1102 struct batadv_priv_debug_log *debug_log; 1662 struct batadv_priv_debug_log *debug_log;
1103#endif 1663#endif
1664
1665 /** @gw: gateway data */
1104 struct batadv_priv_gw gw; 1666 struct batadv_priv_gw gw;
1667
1668 /** @tt: translation table data */
1105 struct batadv_priv_tt tt; 1669 struct batadv_priv_tt tt;
1670
1671 /** @tvlv: type-version-length-value data */
1106 struct batadv_priv_tvlv tvlv; 1672 struct batadv_priv_tvlv tvlv;
1673
1107#ifdef CONFIG_BATMAN_ADV_DAT 1674#ifdef CONFIG_BATMAN_ADV_DAT
1675 /** @dat: distributed arp table data */
1108 struct batadv_priv_dat dat; 1676 struct batadv_priv_dat dat;
1109#endif 1677#endif
1678
1110#ifdef CONFIG_BATMAN_ADV_MCAST 1679#ifdef CONFIG_BATMAN_ADV_MCAST
1680 /** @mcast: multicast data */
1111 struct batadv_priv_mcast mcast; 1681 struct batadv_priv_mcast mcast;
1112#endif 1682#endif
1683
1113#ifdef CONFIG_BATMAN_ADV_NC 1684#ifdef CONFIG_BATMAN_ADV_NC
1685 /**
1686 * @network_coding: bool indicating whether network coding is enabled
1687 */
1114 atomic_t network_coding; 1688 atomic_t network_coding;
1689
1690 /** @nc: network coding data */
1115 struct batadv_priv_nc nc; 1691 struct batadv_priv_nc nc;
1116#endif /* CONFIG_BATMAN_ADV_NC */ 1692#endif /* CONFIG_BATMAN_ADV_NC */
1693
1117#ifdef CONFIG_BATMAN_ADV_BATMAN_V 1694#ifdef CONFIG_BATMAN_ADV_BATMAN_V
1695 /** @bat_v: B.A.T.M.A.N. V per soft-interface private data */
1118 struct batadv_priv_bat_v bat_v; 1696 struct batadv_priv_bat_v bat_v;
1119#endif 1697#endif
1120}; 1698};
1121 1699
1122/** 1700/**
1123 * struct batadv_socket_client - layer2 icmp socket client data 1701 * struct batadv_socket_client - layer2 icmp socket client data
1124 * @queue_list: packet queue for packets destined for this socket client
1125 * @queue_len: number of packets in the packet queue (queue_list)
1126 * @index: socket client's index in the batadv_socket_client_hash
1127 * @lock: lock protecting queue_list, queue_len & index
1128 * @queue_wait: socket client's wait queue
1129 * @bat_priv: pointer to soft_iface this client belongs to
1130 */ 1702 */
1131struct batadv_socket_client { 1703struct batadv_socket_client {
1704 /**
1705 * @queue_list: packet queue for packets destined for this socket client
1706 */
1132 struct list_head queue_list; 1707 struct list_head queue_list;
1708
1709 /** @queue_len: number of packets in the packet queue (queue_list) */
1133 unsigned int queue_len; 1710 unsigned int queue_len;
1711
1712 /** @index: socket client's index in the batadv_socket_client_hash */
1134 unsigned char index; 1713 unsigned char index;
1135 spinlock_t lock; /* protects queue_list, queue_len & index */ 1714
1715 /** @lock: lock protecting queue_list, queue_len & index */
1716 spinlock_t lock;
1717
1718 /** @queue_wait: socket client's wait queue */
1136 wait_queue_head_t queue_wait; 1719 wait_queue_head_t queue_wait;
1720
1721 /** @bat_priv: pointer to soft_iface this client belongs to */
1137 struct batadv_priv *bat_priv; 1722 struct batadv_priv *bat_priv;
1138}; 1723};
1139 1724
1140/** 1725/**
1141 * struct batadv_socket_packet - layer2 icmp packet for socket client 1726 * struct batadv_socket_packet - layer2 icmp packet for socket client
1142 * @list: list node for batadv_socket_client::queue_list
1143 * @icmp_len: size of the layer2 icmp packet
1144 * @icmp_packet: layer2 icmp packet
1145 */ 1727 */
1146struct batadv_socket_packet { 1728struct batadv_socket_packet {
1729 /** @list: list node for &batadv_socket_client.queue_list */
1147 struct list_head list; 1730 struct list_head list;
1731
1732 /** @icmp_len: size of the layer2 icmp packet */
1148 size_t icmp_len; 1733 size_t icmp_len;
1734
1735 /** @icmp_packet: layer2 icmp packet */
1149 u8 icmp_packet[BATADV_ICMP_MAX_PACKET_SIZE]; 1736 u8 icmp_packet[BATADV_ICMP_MAX_PACKET_SIZE];
1150}; 1737};
1151 1738
@@ -1153,312 +1740,432 @@ struct batadv_socket_packet {
1153 1740
1154/** 1741/**
1155 * struct batadv_bla_backbone_gw - batman-adv gateway bridged into the LAN 1742 * struct batadv_bla_backbone_gw - batman-adv gateway bridged into the LAN
1156 * @orig: originator address of backbone node (mac address of primary iface)
1157 * @vid: vlan id this gateway was detected on
1158 * @hash_entry: hlist node for batadv_priv_bla::backbone_hash
1159 * @bat_priv: pointer to soft_iface this backbone gateway belongs to
1160 * @lasttime: last time we heard of this backbone gw
1161 * @wait_periods: grace time for bridge forward delays and bla group forming at
1162 * bootup phase - no bcast traffic is formwared until it has elapsed
1163 * @request_sent: if this bool is set to true we are out of sync with this
1164 * backbone gateway - no bcast traffic is formwared until the situation was
1165 * resolved
1166 * @crc: crc16 checksum over all claims
1167 * @crc_lock: lock protecting crc
1168 * @report_work: work struct for reporting detected loops
1169 * @refcount: number of contexts the object is used
1170 * @rcu: struct used for freeing in an RCU-safe manner
1171 */ 1743 */
1172struct batadv_bla_backbone_gw { 1744struct batadv_bla_backbone_gw {
1745 /**
1746 * @orig: originator address of backbone node (mac address of primary
1747 * iface)
1748 */
1173 u8 orig[ETH_ALEN]; 1749 u8 orig[ETH_ALEN];
1750
1751 /** @vid: vlan id this gateway was detected on */
1174 unsigned short vid; 1752 unsigned short vid;
1753
1754 /** @hash_entry: hlist node for &batadv_priv_bla.backbone_hash */
1175 struct hlist_node hash_entry; 1755 struct hlist_node hash_entry;
1756
1757 /** @bat_priv: pointer to soft_iface this backbone gateway belongs to */
1176 struct batadv_priv *bat_priv; 1758 struct batadv_priv *bat_priv;
1759
1760 /** @lasttime: last time we heard of this backbone gw */
1177 unsigned long lasttime; 1761 unsigned long lasttime;
1762
1763 /**
1764 * @wait_periods: grace time for bridge forward delays and bla group
1765 * forming at bootup phase - no bcast traffic is formwared until it has
1766 * elapsed
1767 */
1178 atomic_t wait_periods; 1768 atomic_t wait_periods;
1769
1770 /**
1771 * @request_sent: if this bool is set to true we are out of sync with
1772 * this backbone gateway - no bcast traffic is formwared until the
1773 * situation was resolved
1774 */
1179 atomic_t request_sent; 1775 atomic_t request_sent;
1776
1777 /** @crc: crc16 checksum over all claims */
1180 u16 crc; 1778 u16 crc;
1181 spinlock_t crc_lock; /* protects crc */ 1779
1780 /** @crc_lock: lock protecting crc */
1781 spinlock_t crc_lock;
1782
1783 /** @report_work: work struct for reporting detected loops */
1182 struct work_struct report_work; 1784 struct work_struct report_work;
1785
1786 /** @refcount: number of contexts the object is used */
1183 struct kref refcount; 1787 struct kref refcount;
1788
1789 /** @rcu: struct used for freeing in an RCU-safe manner */
1184 struct rcu_head rcu; 1790 struct rcu_head rcu;
1185}; 1791};
1186 1792
1187/** 1793/**
1188 * struct batadv_bla_claim - claimed non-mesh client structure 1794 * struct batadv_bla_claim - claimed non-mesh client structure
1189 * @addr: mac address of claimed non-mesh client
1190 * @vid: vlan id this client was detected on
1191 * @backbone_gw: pointer to backbone gw claiming this client
1192 * @backbone_lock: lock protecting backbone_gw pointer
1193 * @lasttime: last time we heard of claim (locals only)
1194 * @hash_entry: hlist node for batadv_priv_bla::claim_hash
1195 * @refcount: number of contexts the object is used
1196 * @rcu: struct used for freeing in an RCU-safe manner
1197 */ 1795 */
1198struct batadv_bla_claim { 1796struct batadv_bla_claim {
1797 /** @addr: mac address of claimed non-mesh client */
1199 u8 addr[ETH_ALEN]; 1798 u8 addr[ETH_ALEN];
1799
1800 /** @vid: vlan id this client was detected on */
1200 unsigned short vid; 1801 unsigned short vid;
1802
1803 /** @backbone_gw: pointer to backbone gw claiming this client */
1201 struct batadv_bla_backbone_gw *backbone_gw; 1804 struct batadv_bla_backbone_gw *backbone_gw;
1202 spinlock_t backbone_lock; /* protects backbone_gw */ 1805
1806 /** @backbone_lock: lock protecting backbone_gw pointer */
1807 spinlock_t backbone_lock;
1808
1809 /** @lasttime: last time we heard of claim (locals only) */
1203 unsigned long lasttime; 1810 unsigned long lasttime;
1811
1812 /** @hash_entry: hlist node for &batadv_priv_bla.claim_hash */
1204 struct hlist_node hash_entry; 1813 struct hlist_node hash_entry;
1814
1815 /** @refcount: number of contexts the object is used */
1205 struct rcu_head rcu; 1816 struct rcu_head rcu;
1817
1818 /** @rcu: struct used for freeing in an RCU-safe manner */
1206 struct kref refcount; 1819 struct kref refcount;
1207}; 1820};
1208#endif 1821#endif
1209 1822
1210/** 1823/**
1211 * struct batadv_tt_common_entry - tt local & tt global common data 1824 * struct batadv_tt_common_entry - tt local & tt global common data
1212 * @addr: mac address of non-mesh client
1213 * @vid: VLAN identifier
1214 * @hash_entry: hlist node for batadv_priv_tt::local_hash or for
1215 * batadv_priv_tt::global_hash
1216 * @flags: various state handling flags (see batadv_tt_client_flags)
1217 * @added_at: timestamp used for purging stale tt common entries
1218 * @refcount: number of contexts the object is used
1219 * @rcu: struct used for freeing in an RCU-safe manner
1220 */ 1825 */
1221struct batadv_tt_common_entry { 1826struct batadv_tt_common_entry {
1827 /** @addr: mac address of non-mesh client */
1222 u8 addr[ETH_ALEN]; 1828 u8 addr[ETH_ALEN];
1829
1830 /** @vid: VLAN identifier */
1223 unsigned short vid; 1831 unsigned short vid;
1832
1833 /**
1834 * @hash_entry: hlist node for &batadv_priv_tt.local_hash or for
1835 * &batadv_priv_tt.global_hash
1836 */
1224 struct hlist_node hash_entry; 1837 struct hlist_node hash_entry;
1838
1839 /** @flags: various state handling flags (see batadv_tt_client_flags) */
1225 u16 flags; 1840 u16 flags;
1841
1842 /** @added_at: timestamp used for purging stale tt common entries */
1226 unsigned long added_at; 1843 unsigned long added_at;
1844
1845 /** @refcount: number of contexts the object is used */
1227 struct kref refcount; 1846 struct kref refcount;
1847
1848 /** @rcu: struct used for freeing in an RCU-safe manner */
1228 struct rcu_head rcu; 1849 struct rcu_head rcu;
1229}; 1850};
1230 1851
1231/** 1852/**
1232 * struct batadv_tt_local_entry - translation table local entry data 1853 * struct batadv_tt_local_entry - translation table local entry data
1233 * @common: general translation table data
1234 * @last_seen: timestamp used for purging stale tt local entries
1235 * @vlan: soft-interface vlan of the entry
1236 */ 1854 */
1237struct batadv_tt_local_entry { 1855struct batadv_tt_local_entry {
1856 /** @common: general translation table data */
1238 struct batadv_tt_common_entry common; 1857 struct batadv_tt_common_entry common;
1858
1859 /** @last_seen: timestamp used for purging stale tt local entries */
1239 unsigned long last_seen; 1860 unsigned long last_seen;
1861
1862 /** @vlan: soft-interface vlan of the entry */
1240 struct batadv_softif_vlan *vlan; 1863 struct batadv_softif_vlan *vlan;
1241}; 1864};
1242 1865
1243/** 1866/**
1244 * struct batadv_tt_global_entry - translation table global entry data 1867 * struct batadv_tt_global_entry - translation table global entry data
1245 * @common: general translation table data
1246 * @orig_list: list of orig nodes announcing this non-mesh client
1247 * @orig_list_count: number of items in the orig_list
1248 * @list_lock: lock protecting orig_list
1249 * @roam_at: time at which TT_GLOBAL_ROAM was set
1250 */ 1868 */
1251struct batadv_tt_global_entry { 1869struct batadv_tt_global_entry {
1870 /** @common: general translation table data */
1252 struct batadv_tt_common_entry common; 1871 struct batadv_tt_common_entry common;
1872
1873 /** @orig_list: list of orig nodes announcing this non-mesh client */
1253 struct hlist_head orig_list; 1874 struct hlist_head orig_list;
1875
1876 /** @orig_list_count: number of items in the orig_list */
1254 atomic_t orig_list_count; 1877 atomic_t orig_list_count;
1255 spinlock_t list_lock; /* protects orig_list */ 1878
1879 /** @list_lock: lock protecting orig_list */
1880 spinlock_t list_lock;
1881
1882 /** @roam_at: time at which TT_GLOBAL_ROAM was set */
1256 unsigned long roam_at; 1883 unsigned long roam_at;
1257}; 1884};
1258 1885
1259/** 1886/**
1260 * struct batadv_tt_orig_list_entry - orig node announcing a non-mesh client 1887 * struct batadv_tt_orig_list_entry - orig node announcing a non-mesh client
1261 * @orig_node: pointer to orig node announcing this non-mesh client
1262 * @ttvn: translation table version number which added the non-mesh client
1263 * @flags: per orig entry TT sync flags
1264 * @list: list node for batadv_tt_global_entry::orig_list
1265 * @refcount: number of contexts the object is used
1266 * @rcu: struct used for freeing in an RCU-safe manner
1267 */ 1888 */
1268struct batadv_tt_orig_list_entry { 1889struct batadv_tt_orig_list_entry {
1890 /** @orig_node: pointer to orig node announcing this non-mesh client */
1269 struct batadv_orig_node *orig_node; 1891 struct batadv_orig_node *orig_node;
1892
1893 /**
1894 * @ttvn: translation table version number which added the non-mesh
1895 * client
1896 */
1270 u8 ttvn; 1897 u8 ttvn;
1898
1899 /** @flags: per orig entry TT sync flags */
1271 u8 flags; 1900 u8 flags;
1901
1902 /** @list: list node for &batadv_tt_global_entry.orig_list */
1272 struct hlist_node list; 1903 struct hlist_node list;
1904
1905 /** @refcount: number of contexts the object is used */
1273 struct kref refcount; 1906 struct kref refcount;
1907
1908 /** @rcu: struct used for freeing in an RCU-safe manner */
1274 struct rcu_head rcu; 1909 struct rcu_head rcu;
1275}; 1910};
1276 1911
1277/** 1912/**
1278 * struct batadv_tt_change_node - structure for tt changes occurred 1913 * struct batadv_tt_change_node - structure for tt changes occurred
1279 * @list: list node for batadv_priv_tt::changes_list
1280 * @change: holds the actual translation table diff data
1281 */ 1914 */
1282struct batadv_tt_change_node { 1915struct batadv_tt_change_node {
1916 /** @list: list node for &batadv_priv_tt.changes_list */
1283 struct list_head list; 1917 struct list_head list;
1918
1919 /** @change: holds the actual translation table diff data */
1284 struct batadv_tvlv_tt_change change; 1920 struct batadv_tvlv_tt_change change;
1285}; 1921};
1286 1922
1287/** 1923/**
1288 * struct batadv_tt_req_node - data to keep track of the tt requests in flight 1924 * struct batadv_tt_req_node - data to keep track of the tt requests in flight
1289 * @addr: mac address address of the originator this request was sent to
1290 * @issued_at: timestamp used for purging stale tt requests
1291 * @refcount: number of contexts the object is used by
1292 * @list: list node for batadv_priv_tt::req_list
1293 */ 1925 */
1294struct batadv_tt_req_node { 1926struct batadv_tt_req_node {
1927 /**
1928 * @addr: mac address address of the originator this request was sent to
1929 */
1295 u8 addr[ETH_ALEN]; 1930 u8 addr[ETH_ALEN];
1931
1932 /** @issued_at: timestamp used for purging stale tt requests */
1296 unsigned long issued_at; 1933 unsigned long issued_at;
1934
1935 /** @refcount: number of contexts the object is used by */
1297 struct kref refcount; 1936 struct kref refcount;
1937
1938 /** @list: list node for &batadv_priv_tt.req_list */
1298 struct hlist_node list; 1939 struct hlist_node list;
1299}; 1940};
1300 1941
1301/** 1942/**
1302 * struct batadv_tt_roam_node - roaming client data 1943 * struct batadv_tt_roam_node - roaming client data
1303 * @addr: mac address of the client in the roaming phase
1304 * @counter: number of allowed roaming events per client within a single
1305 * OGM interval (changes are committed with each OGM)
1306 * @first_time: timestamp used for purging stale roaming node entries
1307 * @list: list node for batadv_priv_tt::roam_list
1308 */ 1944 */
1309struct batadv_tt_roam_node { 1945struct batadv_tt_roam_node {
1946 /** @addr: mac address of the client in the roaming phase */
1310 u8 addr[ETH_ALEN]; 1947 u8 addr[ETH_ALEN];
1948
1949 /**
1950 * @counter: number of allowed roaming events per client within a single
1951 * OGM interval (changes are committed with each OGM)
1952 */
1311 atomic_t counter; 1953 atomic_t counter;
1954
1955 /**
1956 * @first_time: timestamp used for purging stale roaming node entries
1957 */
1312 unsigned long first_time; 1958 unsigned long first_time;
1959
1960 /** @list: list node for &batadv_priv_tt.roam_list */
1313 struct list_head list; 1961 struct list_head list;
1314}; 1962};
1315 1963
1316/** 1964/**
1317 * struct batadv_nc_node - network coding node 1965 * struct batadv_nc_node - network coding node
1318 * @list: next and prev pointer for the list handling
1319 * @addr: the node's mac address
1320 * @refcount: number of contexts the object is used by
1321 * @rcu: struct used for freeing in an RCU-safe manner
1322 * @orig_node: pointer to corresponding orig node struct
1323 * @last_seen: timestamp of last ogm received from this node
1324 */ 1966 */
1325struct batadv_nc_node { 1967struct batadv_nc_node {
1968 /** @list: next and prev pointer for the list handling */
1326 struct list_head list; 1969 struct list_head list;
1970
1971 /** @addr: the node's mac address */
1327 u8 addr[ETH_ALEN]; 1972 u8 addr[ETH_ALEN];
1973
1974 /** @refcount: number of contexts the object is used by */
1328 struct kref refcount; 1975 struct kref refcount;
1976
1977 /** @rcu: struct used for freeing in an RCU-safe manner */
1329 struct rcu_head rcu; 1978 struct rcu_head rcu;
1979
1980 /** @orig_node: pointer to corresponding orig node struct */
1330 struct batadv_orig_node *orig_node; 1981 struct batadv_orig_node *orig_node;
1982
1983 /** @last_seen: timestamp of last ogm received from this node */
1331 unsigned long last_seen; 1984 unsigned long last_seen;
1332}; 1985};
1333 1986
1334/** 1987/**
1335 * struct batadv_nc_path - network coding path 1988 * struct batadv_nc_path - network coding path
1336 * @hash_entry: next and prev pointer for the list handling
1337 * @rcu: struct used for freeing in an RCU-safe manner
1338 * @refcount: number of contexts the object is used by
1339 * @packet_list: list of buffered packets for this path
1340 * @packet_list_lock: access lock for packet list
1341 * @next_hop: next hop (destination) of path
1342 * @prev_hop: previous hop (source) of path
1343 * @last_valid: timestamp for last validation of path
1344 */ 1989 */
1345struct batadv_nc_path { 1990struct batadv_nc_path {
1991 /** @hash_entry: next and prev pointer for the list handling */
1346 struct hlist_node hash_entry; 1992 struct hlist_node hash_entry;
1993
1994 /** @rcu: struct used for freeing in an RCU-safe manner */
1347 struct rcu_head rcu; 1995 struct rcu_head rcu;
1996
1997 /** @refcount: number of contexts the object is used by */
1348 struct kref refcount; 1998 struct kref refcount;
1999
2000 /** @packet_list: list of buffered packets for this path */
1349 struct list_head packet_list; 2001 struct list_head packet_list;
1350 spinlock_t packet_list_lock; /* Protects packet_list */ 2002
2003 /** @packet_list_lock: access lock for packet list */
2004 spinlock_t packet_list_lock;
2005
2006 /** @next_hop: next hop (destination) of path */
1351 u8 next_hop[ETH_ALEN]; 2007 u8 next_hop[ETH_ALEN];
2008
2009 /** @prev_hop: previous hop (source) of path */
1352 u8 prev_hop[ETH_ALEN]; 2010 u8 prev_hop[ETH_ALEN];
2011
2012 /** @last_valid: timestamp for last validation of path */
1353 unsigned long last_valid; 2013 unsigned long last_valid;
1354}; 2014};
1355 2015
1356/** 2016/**
1357 * struct batadv_nc_packet - network coding packet used when coding and 2017 * struct batadv_nc_packet - network coding packet used when coding and
1358 * decoding packets 2018 * decoding packets
1359 * @list: next and prev pointer for the list handling
1360 * @packet_id: crc32 checksum of skb data
1361 * @timestamp: field containing the info when the packet was added to path
1362 * @neigh_node: pointer to original next hop neighbor of skb
1363 * @skb: skb which can be encoded or used for decoding
1364 * @nc_path: pointer to path this nc packet is attached to
1365 */ 2019 */
1366struct batadv_nc_packet { 2020struct batadv_nc_packet {
2021 /** @list: next and prev pointer for the list handling */
1367 struct list_head list; 2022 struct list_head list;
2023
2024 /** @packet_id: crc32 checksum of skb data */
1368 __be32 packet_id; 2025 __be32 packet_id;
2026
2027 /**
2028 * @timestamp: field containing the info when the packet was added to
2029 * path
2030 */
1369 unsigned long timestamp; 2031 unsigned long timestamp;
2032
2033 /** @neigh_node: pointer to original next hop neighbor of skb */
1370 struct batadv_neigh_node *neigh_node; 2034 struct batadv_neigh_node *neigh_node;
2035
2036 /** @skb: skb which can be encoded or used for decoding */
1371 struct sk_buff *skb; 2037 struct sk_buff *skb;
2038
2039 /** @nc_path: pointer to path this nc packet is attached to */
1372 struct batadv_nc_path *nc_path; 2040 struct batadv_nc_path *nc_path;
1373}; 2041};
1374 2042
1375/** 2043/**
1376 * struct batadv_skb_cb - control buffer structure used to store private data 2044 * struct batadv_skb_cb - control buffer structure used to store private data
1377 * relevant to batman-adv in the skb->cb buffer in skbs. 2045 * relevant to batman-adv in the skb->cb buffer in skbs.
1378 * @decoded: Marks a skb as decoded, which is checked when searching for coding
1379 * opportunities in network-coding.c
1380 * @num_bcasts: Counter for broadcast packet retransmissions
1381 */ 2046 */
1382struct batadv_skb_cb { 2047struct batadv_skb_cb {
2048 /**
2049 * @decoded: Marks a skb as decoded, which is checked when searching for
2050 * coding opportunities in network-coding.c
2051 */
1383 bool decoded; 2052 bool decoded;
2053
2054 /** @num_bcasts: Counter for broadcast packet retransmissions */
1384 unsigned int num_bcasts; 2055 unsigned int num_bcasts;
1385}; 2056};
1386 2057
1387/** 2058/**
1388 * struct batadv_forw_packet - structure for bcast packets to be sent/forwarded 2059 * struct batadv_forw_packet - structure for bcast packets to be sent/forwarded
1389 * @list: list node for batadv_priv::forw_{bat,bcast}_list
1390 * @cleanup_list: list node for purging functions
1391 * @send_time: execution time for delayed_work (packet sending)
1392 * @own: bool for locally generated packets (local OGMs are re-scheduled after
1393 * sending)
1394 * @skb: bcast packet's skb buffer
1395 * @packet_len: size of aggregated OGM packet inside the skb buffer
1396 * @direct_link_flags: direct link flags for aggregated OGM packets
1397 * @num_packets: counter for aggregated OGMv1 packets
1398 * @delayed_work: work queue callback item for packet sending
1399 * @if_incoming: pointer to incoming hard-iface or primary iface if
1400 * locally generated packet
1401 * @if_outgoing: packet where the packet should be sent to, or NULL if
1402 * unspecified
1403 * @queue_left: The queue (counter) this packet was applied to
1404 */ 2060 */
1405struct batadv_forw_packet { 2061struct batadv_forw_packet {
2062 /**
2063 * @list: list node for &batadv_priv.forw.bcast_list and
2064 * &batadv_priv.forw.bat_list
2065 */
1406 struct hlist_node list; 2066 struct hlist_node list;
2067
2068 /** @cleanup_list: list node for purging functions */
1407 struct hlist_node cleanup_list; 2069 struct hlist_node cleanup_list;
2070
2071 /** @send_time: execution time for delayed_work (packet sending) */
1408 unsigned long send_time; 2072 unsigned long send_time;
2073
2074 /**
2075 * @own: bool for locally generated packets (local OGMs are re-scheduled
2076 * after sending)
2077 */
1409 u8 own; 2078 u8 own;
2079
2080 /** @skb: bcast packet's skb buffer */
1410 struct sk_buff *skb; 2081 struct sk_buff *skb;
2082
2083 /** @packet_len: size of aggregated OGM packet inside the skb buffer */
1411 u16 packet_len; 2084 u16 packet_len;
2085
2086 /** @direct_link_flags: direct link flags for aggregated OGM packets */
1412 u32 direct_link_flags; 2087 u32 direct_link_flags;
2088
2089 /** @num_packets: counter for aggregated OGMv1 packets */
1413 u8 num_packets; 2090 u8 num_packets;
2091
2092 /** @delayed_work: work queue callback item for packet sending */
1414 struct delayed_work delayed_work; 2093 struct delayed_work delayed_work;
2094
2095 /**
2096 * @if_incoming: pointer to incoming hard-iface or primary iface if
2097 * locally generated packet
2098 */
1415 struct batadv_hard_iface *if_incoming; 2099 struct batadv_hard_iface *if_incoming;
2100
2101 /**
2102 * @if_outgoing: packet where the packet should be sent to, or NULL if
2103 * unspecified
2104 */
1416 struct batadv_hard_iface *if_outgoing; 2105 struct batadv_hard_iface *if_outgoing;
2106
2107 /** @queue_left: The queue (counter) this packet was applied to */
1417 atomic_t *queue_left; 2108 atomic_t *queue_left;
1418}; 2109};
1419 2110
1420/** 2111/**
1421 * struct batadv_algo_iface_ops - mesh algorithm callbacks (interface specific) 2112 * struct batadv_algo_iface_ops - mesh algorithm callbacks (interface specific)
1422 * @activate: start routing mechanisms when hard-interface is brought up
1423 * (optional)
1424 * @enable: init routing info when hard-interface is enabled
1425 * @disable: de-init routing info when hard-interface is disabled
1426 * @update_mac: (re-)init mac addresses of the protocol information
1427 * belonging to this hard-interface
1428 * @primary_set: called when primary interface is selected / changed
1429 */ 2113 */
1430struct batadv_algo_iface_ops { 2114struct batadv_algo_iface_ops {
2115 /**
2116 * @activate: start routing mechanisms when hard-interface is brought up
2117 * (optional)
2118 */
1431 void (*activate)(struct batadv_hard_iface *hard_iface); 2119 void (*activate)(struct batadv_hard_iface *hard_iface);
2120
2121 /** @enable: init routing info when hard-interface is enabled */
1432 int (*enable)(struct batadv_hard_iface *hard_iface); 2122 int (*enable)(struct batadv_hard_iface *hard_iface);
2123
2124 /** @disable: de-init routing info when hard-interface is disabled */
1433 void (*disable)(struct batadv_hard_iface *hard_iface); 2125 void (*disable)(struct batadv_hard_iface *hard_iface);
2126
2127 /**
2128 * @update_mac: (re-)init mac addresses of the protocol information
2129 * belonging to this hard-interface
2130 */
1434 void (*update_mac)(struct batadv_hard_iface *hard_iface); 2131 void (*update_mac)(struct batadv_hard_iface *hard_iface);
2132
2133 /** @primary_set: called when primary interface is selected / changed */
1435 void (*primary_set)(struct batadv_hard_iface *hard_iface); 2134 void (*primary_set)(struct batadv_hard_iface *hard_iface);
1436}; 2135};
1437 2136
1438/** 2137/**
1439 * struct batadv_algo_neigh_ops - mesh algorithm callbacks (neighbour specific) 2138 * struct batadv_algo_neigh_ops - mesh algorithm callbacks (neighbour specific)
1440 * @hardif_init: called on creation of single hop entry
1441 * (optional)
1442 * @cmp: compare the metrics of two neighbors for their respective outgoing
1443 * interfaces
1444 * @is_similar_or_better: check if neigh1 is equally similar or better than
1445 * neigh2 for their respective outgoing interface from the metric prospective
1446 * @print: print the single hop neighbor list (optional)
1447 * @dump: dump neighbors to a netlink socket (optional)
1448 */ 2139 */
1449struct batadv_algo_neigh_ops { 2140struct batadv_algo_neigh_ops {
2141 /** @hardif_init: called on creation of single hop entry (optional) */
1450 void (*hardif_init)(struct batadv_hardif_neigh_node *neigh); 2142 void (*hardif_init)(struct batadv_hardif_neigh_node *neigh);
2143
2144 /**
2145 * @cmp: compare the metrics of two neighbors for their respective
2146 * outgoing interfaces
2147 */
1451 int (*cmp)(struct batadv_neigh_node *neigh1, 2148 int (*cmp)(struct batadv_neigh_node *neigh1,
1452 struct batadv_hard_iface *if_outgoing1, 2149 struct batadv_hard_iface *if_outgoing1,
1453 struct batadv_neigh_node *neigh2, 2150 struct batadv_neigh_node *neigh2,
1454 struct batadv_hard_iface *if_outgoing2); 2151 struct batadv_hard_iface *if_outgoing2);
2152
2153 /**
2154 * @is_similar_or_better: check if neigh1 is equally similar or better
2155 * than neigh2 for their respective outgoing interface from the metric
2156 * prospective
2157 */
1455 bool (*is_similar_or_better)(struct batadv_neigh_node *neigh1, 2158 bool (*is_similar_or_better)(struct batadv_neigh_node *neigh1,
1456 struct batadv_hard_iface *if_outgoing1, 2159 struct batadv_hard_iface *if_outgoing1,
1457 struct batadv_neigh_node *neigh2, 2160 struct batadv_neigh_node *neigh2,
1458 struct batadv_hard_iface *if_outgoing2); 2161 struct batadv_hard_iface *if_outgoing2);
2162
1459#ifdef CONFIG_BATMAN_ADV_DEBUGFS 2163#ifdef CONFIG_BATMAN_ADV_DEBUGFS
2164 /** @print: print the single hop neighbor list (optional) */
1460 void (*print)(struct batadv_priv *priv, struct seq_file *seq); 2165 void (*print)(struct batadv_priv *priv, struct seq_file *seq);
1461#endif 2166#endif
2167
2168 /** @dump: dump neighbors to a netlink socket (optional) */
1462 void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, 2169 void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
1463 struct batadv_priv *priv, 2170 struct batadv_priv *priv,
1464 struct batadv_hard_iface *hard_iface); 2171 struct batadv_hard_iface *hard_iface);
@@ -1466,24 +2173,36 @@ struct batadv_algo_neigh_ops {
1466 2173
1467/** 2174/**
1468 * struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific) 2175 * struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific)
1469 * @free: free the resources allocated by the routing algorithm for an orig_node
1470 * object (optional)
1471 * @add_if: ask the routing algorithm to apply the needed changes to the
1472 * orig_node due to a new hard-interface being added into the mesh (optional)
1473 * @del_if: ask the routing algorithm to apply the needed changes to the
1474 * orig_node due to an hard-interface being removed from the mesh (optional)
1475 * @print: print the originator table (optional)
1476 * @dump: dump originators to a netlink socket (optional)
1477 */ 2176 */
1478struct batadv_algo_orig_ops { 2177struct batadv_algo_orig_ops {
2178 /**
2179 * @free: free the resources allocated by the routing algorithm for an
2180 * orig_node object (optional)
2181 */
1479 void (*free)(struct batadv_orig_node *orig_node); 2182 void (*free)(struct batadv_orig_node *orig_node);
2183
2184 /**
2185 * @add_if: ask the routing algorithm to apply the needed changes to the
2186 * orig_node due to a new hard-interface being added into the mesh
2187 * (optional)
2188 */
1480 int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num); 2189 int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num);
2190
2191 /**
2192 * @del_if: ask the routing algorithm to apply the needed changes to the
2193 * orig_node due to an hard-interface being removed from the mesh
2194 * (optional)
2195 */
1481 int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num, 2196 int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num,
1482 int del_if_num); 2197 int del_if_num);
2198
1483#ifdef CONFIG_BATMAN_ADV_DEBUGFS 2199#ifdef CONFIG_BATMAN_ADV_DEBUGFS
2200 /** @print: print the originator table (optional) */
1484 void (*print)(struct batadv_priv *priv, struct seq_file *seq, 2201 void (*print)(struct batadv_priv *priv, struct seq_file *seq,
1485 struct batadv_hard_iface *hard_iface); 2202 struct batadv_hard_iface *hard_iface);
1486#endif 2203#endif
2204
2205 /** @dump: dump originators to a netlink socket (optional) */
1487 void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, 2206 void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
1488 struct batadv_priv *priv, 2207 struct batadv_priv *priv,
1489 struct batadv_hard_iface *hard_iface); 2208 struct batadv_hard_iface *hard_iface);
@@ -1491,158 +2210,213 @@ struct batadv_algo_orig_ops {
1491 2210
1492/** 2211/**
1493 * struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific) 2212 * struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific)
1494 * @init_sel_class: initialize GW selection class (optional)
1495 * @store_sel_class: parse and stores a new GW selection class (optional)
1496 * @show_sel_class: prints the current GW selection class (optional)
1497 * @get_best_gw_node: select the best GW from the list of available nodes
1498 * (optional)
1499 * @is_eligible: check if a newly discovered GW is a potential candidate for
1500 * the election as best GW (optional)
1501 * @print: print the gateway table (optional)
1502 * @dump: dump gateways to a netlink socket (optional)
1503 */ 2213 */
1504struct batadv_algo_gw_ops { 2214struct batadv_algo_gw_ops {
2215 /** @init_sel_class: initialize GW selection class (optional) */
1505 void (*init_sel_class)(struct batadv_priv *bat_priv); 2216 void (*init_sel_class)(struct batadv_priv *bat_priv);
2217
2218 /**
2219 * @store_sel_class: parse and stores a new GW selection class
2220 * (optional)
2221 */
1506 ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff, 2222 ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff,
1507 size_t count); 2223 size_t count);
2224
2225 /** @show_sel_class: prints the current GW selection class (optional) */
1508 ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff); 2226 ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff);
2227
2228 /**
2229 * @get_best_gw_node: select the best GW from the list of available
2230 * nodes (optional)
2231 */
1509 struct batadv_gw_node *(*get_best_gw_node) 2232 struct batadv_gw_node *(*get_best_gw_node)
1510 (struct batadv_priv *bat_priv); 2233 (struct batadv_priv *bat_priv);
2234
2235 /**
2236 * @is_eligible: check if a newly discovered GW is a potential candidate
2237 * for the election as best GW (optional)
2238 */
1511 bool (*is_eligible)(struct batadv_priv *bat_priv, 2239 bool (*is_eligible)(struct batadv_priv *bat_priv,
1512 struct batadv_orig_node *curr_gw_orig, 2240 struct batadv_orig_node *curr_gw_orig,
1513 struct batadv_orig_node *orig_node); 2241 struct batadv_orig_node *orig_node);
2242
1514#ifdef CONFIG_BATMAN_ADV_DEBUGFS 2243#ifdef CONFIG_BATMAN_ADV_DEBUGFS
2244 /** @print: print the gateway table (optional) */
1515 void (*print)(struct batadv_priv *bat_priv, struct seq_file *seq); 2245 void (*print)(struct batadv_priv *bat_priv, struct seq_file *seq);
1516#endif 2246#endif
2247
2248 /** @dump: dump gateways to a netlink socket (optional) */
1517 void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, 2249 void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
1518 struct batadv_priv *priv); 2250 struct batadv_priv *priv);
1519}; 2251};
1520 2252
1521/** 2253/**
1522 * struct batadv_algo_ops - mesh algorithm callbacks 2254 * struct batadv_algo_ops - mesh algorithm callbacks
1523 * @list: list node for the batadv_algo_list
1524 * @name: name of the algorithm
1525 * @iface: callbacks related to interface handling
1526 * @neigh: callbacks related to neighbors handling
1527 * @orig: callbacks related to originators handling
1528 * @gw: callbacks related to GW mode
1529 */ 2255 */
1530struct batadv_algo_ops { 2256struct batadv_algo_ops {
2257 /** @list: list node for the batadv_algo_list */
1531 struct hlist_node list; 2258 struct hlist_node list;
2259
2260 /** @name: name of the algorithm */
1532 char *name; 2261 char *name;
2262
2263 /** @iface: callbacks related to interface handling */
1533 struct batadv_algo_iface_ops iface; 2264 struct batadv_algo_iface_ops iface;
2265
2266 /** @neigh: callbacks related to neighbors handling */
1534 struct batadv_algo_neigh_ops neigh; 2267 struct batadv_algo_neigh_ops neigh;
2268
2269 /** @orig: callbacks related to originators handling */
1535 struct batadv_algo_orig_ops orig; 2270 struct batadv_algo_orig_ops orig;
2271
2272 /** @gw: callbacks related to GW mode */
1536 struct batadv_algo_gw_ops gw; 2273 struct batadv_algo_gw_ops gw;
1537}; 2274};
1538 2275
1539/** 2276/**
1540 * struct batadv_dat_entry - it is a single entry of batman-adv ARP backend. It 2277 * struct batadv_dat_entry - it is a single entry of batman-adv ARP backend. It
1541 * is used to stored ARP entries needed for the global DAT cache 2278 * is used to stored ARP entries needed for the global DAT cache
1542 * @ip: the IPv4 corresponding to this DAT/ARP entry
1543 * @mac_addr: the MAC address associated to the stored IPv4
1544 * @vid: the vlan ID associated to this entry
1545 * @last_update: time in jiffies when this entry was refreshed last time
1546 * @hash_entry: hlist node for batadv_priv_dat::hash
1547 * @refcount: number of contexts the object is used
1548 * @rcu: struct used for freeing in an RCU-safe manner
1549 */ 2279 */
1550struct batadv_dat_entry { 2280struct batadv_dat_entry {
2281 /** @ip: the IPv4 corresponding to this DAT/ARP entry */
1551 __be32 ip; 2282 __be32 ip;
2283
2284 /** @mac_addr: the MAC address associated to the stored IPv4 */
1552 u8 mac_addr[ETH_ALEN]; 2285 u8 mac_addr[ETH_ALEN];
2286
2287 /** @vid: the vlan ID associated to this entry */
1553 unsigned short vid; 2288 unsigned short vid;
2289
2290 /**
2291 * @last_update: time in jiffies when this entry was refreshed last time
2292 */
1554 unsigned long last_update; 2293 unsigned long last_update;
2294
2295 /** @hash_entry: hlist node for &batadv_priv_dat.hash */
1555 struct hlist_node hash_entry; 2296 struct hlist_node hash_entry;
2297
2298 /** @refcount: number of contexts the object is used */
1556 struct kref refcount; 2299 struct kref refcount;
2300
2301 /** @rcu: struct used for freeing in an RCU-safe manner */
1557 struct rcu_head rcu; 2302 struct rcu_head rcu;
1558}; 2303};
1559 2304
1560/** 2305/**
1561 * struct batadv_hw_addr - a list entry for a MAC address 2306 * struct batadv_hw_addr - a list entry for a MAC address
1562 * @list: list node for the linking of entries
1563 * @addr: the MAC address of this list entry
1564 */ 2307 */
1565struct batadv_hw_addr { 2308struct batadv_hw_addr {
2309 /** @list: list node for the linking of entries */
1566 struct hlist_node list; 2310 struct hlist_node list;
2311
2312 /** @addr: the MAC address of this list entry */
1567 unsigned char addr[ETH_ALEN]; 2313 unsigned char addr[ETH_ALEN];
1568}; 2314};
1569 2315
1570/** 2316/**
1571 * struct batadv_dat_candidate - candidate destination for DAT operations 2317 * struct batadv_dat_candidate - candidate destination for DAT operations
1572 * @type: the type of the selected candidate. It can one of the following:
1573 * - BATADV_DAT_CANDIDATE_NOT_FOUND
1574 * - BATADV_DAT_CANDIDATE_ORIG
1575 * @orig_node: if type is BATADV_DAT_CANDIDATE_ORIG this field points to the
1576 * corresponding originator node structure
1577 */ 2318 */
1578struct batadv_dat_candidate { 2319struct batadv_dat_candidate {
2320 /**
2321 * @type: the type of the selected candidate. It can one of the
2322 * following:
2323 * - BATADV_DAT_CANDIDATE_NOT_FOUND
2324 * - BATADV_DAT_CANDIDATE_ORIG
2325 */
1579 int type; 2326 int type;
2327
2328 /**
2329 * @orig_node: if type is BATADV_DAT_CANDIDATE_ORIG this field points to
2330 * the corresponding originator node structure
2331 */
1580 struct batadv_orig_node *orig_node; 2332 struct batadv_orig_node *orig_node;
1581}; 2333};
1582 2334
1583/** 2335/**
1584 * struct batadv_tvlv_container - container for tvlv appended to OGMs 2336 * struct batadv_tvlv_container - container for tvlv appended to OGMs
1585 * @list: hlist node for batadv_priv_tvlv::container_list
1586 * @tvlv_hdr: tvlv header information needed to construct the tvlv
1587 * @refcount: number of contexts the object is used
1588 */ 2337 */
1589struct batadv_tvlv_container { 2338struct batadv_tvlv_container {
2339 /** @list: hlist node for &batadv_priv_tvlv.container_list */
1590 struct hlist_node list; 2340 struct hlist_node list;
2341
2342 /** @tvlv_hdr: tvlv header information needed to construct the tvlv */
1591 struct batadv_tvlv_hdr tvlv_hdr; 2343 struct batadv_tvlv_hdr tvlv_hdr;
2344
2345 /** @refcount: number of contexts the object is used */
1592 struct kref refcount; 2346 struct kref refcount;
1593}; 2347};
1594 2348
1595/** 2349/**
1596 * struct batadv_tvlv_handler - handler for specific tvlv type and version 2350 * struct batadv_tvlv_handler - handler for specific tvlv type and version
1597 * @list: hlist node for batadv_priv_tvlv::handler_list
1598 * @ogm_handler: handler callback which is given the tvlv payload to process on
1599 * incoming OGM packets
1600 * @unicast_handler: handler callback which is given the tvlv payload to process
1601 * on incoming unicast tvlv packets
1602 * @type: tvlv type this handler feels responsible for
1603 * @version: tvlv version this handler feels responsible for
1604 * @flags: tvlv handler flags
1605 * @refcount: number of contexts the object is used
1606 * @rcu: struct used for freeing in an RCU-safe manner
1607 */ 2351 */
1608struct batadv_tvlv_handler { 2352struct batadv_tvlv_handler {
2353 /** @list: hlist node for &batadv_priv_tvlv.handler_list */
1609 struct hlist_node list; 2354 struct hlist_node list;
2355
2356 /**
2357 * @ogm_handler: handler callback which is given the tvlv payload to
2358 * process on incoming OGM packets
2359 */
1610 void (*ogm_handler)(struct batadv_priv *bat_priv, 2360 void (*ogm_handler)(struct batadv_priv *bat_priv,
1611 struct batadv_orig_node *orig, 2361 struct batadv_orig_node *orig,
1612 u8 flags, void *tvlv_value, u16 tvlv_value_len); 2362 u8 flags, void *tvlv_value, u16 tvlv_value_len);
2363
2364 /**
2365 * @unicast_handler: handler callback which is given the tvlv payload to
2366 * process on incoming unicast tvlv packets
2367 */
1613 int (*unicast_handler)(struct batadv_priv *bat_priv, 2368 int (*unicast_handler)(struct batadv_priv *bat_priv,
1614 u8 *src, u8 *dst, 2369 u8 *src, u8 *dst,
1615 void *tvlv_value, u16 tvlv_value_len); 2370 void *tvlv_value, u16 tvlv_value_len);
2371
2372 /** @type: tvlv type this handler feels responsible for */
1616 u8 type; 2373 u8 type;
2374
2375 /** @version: tvlv version this handler feels responsible for */
1617 u8 version; 2376 u8 version;
2377
2378 /** @flags: tvlv handler flags */
1618 u8 flags; 2379 u8 flags;
2380
2381 /** @refcount: number of contexts the object is used */
1619 struct kref refcount; 2382 struct kref refcount;
2383
2384 /** @rcu: struct used for freeing in an RCU-safe manner */
1620 struct rcu_head rcu; 2385 struct rcu_head rcu;
1621}; 2386};
1622 2387
1623/** 2388/**
1624 * enum batadv_tvlv_handler_flags - tvlv handler flags definitions 2389 * enum batadv_tvlv_handler_flags - tvlv handler flags definitions
1625 * @BATADV_TVLV_HANDLER_OGM_CIFNOTFND: tvlv ogm processing function will call
1626 * this handler even if its type was not found (with no data)
1627 * @BATADV_TVLV_HANDLER_OGM_CALLED: interval tvlv handling flag - the API marks
1628 * a handler as being called, so it won't be called if the
1629 * BATADV_TVLV_HANDLER_OGM_CIFNOTFND flag was set
1630 */ 2390 */
1631enum batadv_tvlv_handler_flags { 2391enum batadv_tvlv_handler_flags {
2392 /**
2393 * @BATADV_TVLV_HANDLER_OGM_CIFNOTFND: tvlv ogm processing function
2394 * will call this handler even if its type was not found (with no data)
2395 */
1632 BATADV_TVLV_HANDLER_OGM_CIFNOTFND = BIT(1), 2396 BATADV_TVLV_HANDLER_OGM_CIFNOTFND = BIT(1),
2397
2398 /**
2399 * @BATADV_TVLV_HANDLER_OGM_CALLED: interval tvlv handling flag - the
2400 * API marks a handler as being called, so it won't be called if the
2401 * BATADV_TVLV_HANDLER_OGM_CIFNOTFND flag was set
2402 */
1633 BATADV_TVLV_HANDLER_OGM_CALLED = BIT(2), 2403 BATADV_TVLV_HANDLER_OGM_CALLED = BIT(2),
1634}; 2404};
1635 2405
1636/** 2406/**
1637 * struct batadv_store_mesh_work - Work queue item to detach add/del interface 2407 * struct batadv_store_mesh_work - Work queue item to detach add/del interface
1638 * from sysfs locks 2408 * from sysfs locks
1639 * @net_dev: netdevice to add/remove to/from batman-adv soft-interface
1640 * @soft_iface_name: name of soft-interface to modify
1641 * @work: work queue item
1642 */ 2409 */
1643struct batadv_store_mesh_work { 2410struct batadv_store_mesh_work {
2411 /**
2412 * @net_dev: netdevice to add/remove to/from batman-adv soft-interface
2413 */
1644 struct net_device *net_dev; 2414 struct net_device *net_dev;
2415
2416 /** @soft_iface_name: name of soft-interface to modify */
1645 char soft_iface_name[IFNAMSIZ]; 2417 char soft_iface_name[IFNAMSIZ];
2418
2419 /** @work: work queue item */
1646 struct work_struct work; 2420 struct work_struct work;
1647}; 2421};
1648 2422
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 91e3ba280706..84d92a077834 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -421,7 +421,7 @@ out:
421} 421}
422EXPORT_SYMBOL(bt_sock_stream_recvmsg); 422EXPORT_SYMBOL(bt_sock_stream_recvmsg);
423 423
424static inline unsigned int bt_accept_poll(struct sock *parent) 424static inline __poll_t bt_accept_poll(struct sock *parent)
425{ 425{
426 struct bt_sock *s, *n; 426 struct bt_sock *s, *n;
427 struct sock *sk; 427 struct sock *sk;
@@ -431,17 +431,17 @@ static inline unsigned int bt_accept_poll(struct sock *parent)
431 if (sk->sk_state == BT_CONNECTED || 431 if (sk->sk_state == BT_CONNECTED ||
432 (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags) && 432 (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags) &&
433 sk->sk_state == BT_CONNECT2)) 433 sk->sk_state == BT_CONNECT2))
434 return POLLIN | POLLRDNORM; 434 return EPOLLIN | EPOLLRDNORM;
435 } 435 }
436 436
437 return 0; 437 return 0;
438} 438}
439 439
440unsigned int bt_sock_poll(struct file *file, struct socket *sock, 440__poll_t bt_sock_poll(struct file *file, struct socket *sock,
441 poll_table *wait) 441 poll_table *wait)
442{ 442{
443 struct sock *sk = sock->sk; 443 struct sock *sk = sock->sk;
444 unsigned int mask = 0; 444 __poll_t mask = 0;
445 445
446 BT_DBG("sock %p, sk %p", sock, sk); 446 BT_DBG("sock %p, sk %p", sock, sk);
447 447
@@ -451,20 +451,20 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock,
451 return bt_accept_poll(sk); 451 return bt_accept_poll(sk);
452 452
453 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 453 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
454 mask |= POLLERR | 454 mask |= EPOLLERR |
455 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); 455 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
456 456
457 if (sk->sk_shutdown & RCV_SHUTDOWN) 457 if (sk->sk_shutdown & RCV_SHUTDOWN)
458 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 458 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
459 459
460 if (sk->sk_shutdown == SHUTDOWN_MASK) 460 if (sk->sk_shutdown == SHUTDOWN_MASK)
461 mask |= POLLHUP; 461 mask |= EPOLLHUP;
462 462
463 if (!skb_queue_empty(&sk->sk_receive_queue)) 463 if (!skb_queue_empty(&sk->sk_receive_queue))
464 mask |= POLLIN | POLLRDNORM; 464 mask |= EPOLLIN | EPOLLRDNORM;
465 465
466 if (sk->sk_state == BT_CLOSED) 466 if (sk->sk_state == BT_CLOSED)
467 mask |= POLLHUP; 467 mask |= EPOLLHUP;
468 468
469 if (sk->sk_state == BT_CONNECT || 469 if (sk->sk_state == BT_CONNECT ||
470 sk->sk_state == BT_CONNECT2 || 470 sk->sk_state == BT_CONNECT2 ||
@@ -472,7 +472,7 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock,
472 return mask; 472 return mask;
473 473
474 if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk)) 474 if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk))
475 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 475 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
476 else 476 else
477 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 477 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
478 478
@@ -766,43 +766,39 @@ static int __init bt_init(void)
766 return err; 766 return err;
767 767
768 err = sock_register(&bt_sock_family_ops); 768 err = sock_register(&bt_sock_family_ops);
769 if (err < 0) { 769 if (err)
770 bt_sysfs_cleanup(); 770 goto cleanup_sysfs;
771 return err;
772 }
773 771
774 BT_INFO("HCI device and connection manager initialized"); 772 BT_INFO("HCI device and connection manager initialized");
775 773
776 err = hci_sock_init(); 774 err = hci_sock_init();
777 if (err < 0) 775 if (err)
778 goto error; 776 goto unregister_socket;
779 777
780 err = l2cap_init(); 778 err = l2cap_init();
781 if (err < 0) 779 if (err)
782 goto sock_err; 780 goto cleanup_socket;
783 781
784 err = sco_init(); 782 err = sco_init();
785 if (err < 0) { 783 if (err)
786 l2cap_exit(); 784 goto cleanup_cap;
787 goto sock_err;
788 }
789 785
790 err = mgmt_init(); 786 err = mgmt_init();
791 if (err < 0) { 787 if (err)
792 sco_exit(); 788 goto cleanup_sco;
793 l2cap_exit();
794 goto sock_err;
795 }
796 789
797 return 0; 790 return 0;
798 791
799sock_err: 792cleanup_sco:
793 sco_exit();
794cleanup_cap:
795 l2cap_exit();
796cleanup_socket:
800 hci_sock_cleanup(); 797 hci_sock_cleanup();
801 798unregister_socket:
802error:
803 sock_unregister(PF_BLUETOOTH); 799 sock_unregister(PF_BLUETOOTH);
800cleanup_sysfs:
804 bt_sysfs_cleanup(); 801 bt_sysfs_cleanup();
805
806 return err; 802 return err;
807} 803}
808 804
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index bb308224099c..426a92f02db4 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -527,7 +527,6 @@ static int cmtp_proc_open(struct inode *inode, struct file *file)
527} 527}
528 528
529static const struct file_operations cmtp_proc_fops = { 529static const struct file_operations cmtp_proc_fops = {
530 .owner = THIS_MODULE,
531 .open = cmtp_proc_open, 530 .open = cmtp_proc_open,
532 .read = seq_read, 531 .read = seq_read,
533 .llseek = seq_lseek, 532 .llseek = seq_lseek,
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 63df63ebfb24..418b76e557b0 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -88,17 +88,7 @@ static int __name ## _show(struct seq_file *f, void *ptr) \
88 return 0; \ 88 return 0; \
89} \ 89} \
90 \ 90 \
91static int __name ## _open(struct inode *inode, struct file *file) \ 91DEFINE_SHOW_ATTRIBUTE(__name)
92{ \
93 return single_open(file, __name ## _show, inode->i_private); \
94} \
95 \
96static const struct file_operations __name ## _fops = { \
97 .open = __name ## _open, \
98 .read = seq_read, \
99 .llseek = seq_lseek, \
100 .release = single_release, \
101} \
102 92
103static int features_show(struct seq_file *f, void *ptr) 93static int features_show(struct seq_file *f, void *ptr)
104{ 94{
@@ -106,37 +96,16 @@ static int features_show(struct seq_file *f, void *ptr)
106 u8 p; 96 u8 p;
107 97
108 hci_dev_lock(hdev); 98 hci_dev_lock(hdev);
109 for (p = 0; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) { 99 for (p = 0; p < HCI_MAX_PAGES && p <= hdev->max_page; p++)
110 seq_printf(f, "%2u: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x " 100 seq_printf(f, "%2u: %8ph\n", p, hdev->features[p]);
111 "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", p,
112 hdev->features[p][0], hdev->features[p][1],
113 hdev->features[p][2], hdev->features[p][3],
114 hdev->features[p][4], hdev->features[p][5],
115 hdev->features[p][6], hdev->features[p][7]);
116 }
117 if (lmp_le_capable(hdev)) 101 if (lmp_le_capable(hdev))
118 seq_printf(f, "LE: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x " 102 seq_printf(f, "LE: %8ph\n", hdev->le_features);
119 "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n",
120 hdev->le_features[0], hdev->le_features[1],
121 hdev->le_features[2], hdev->le_features[3],
122 hdev->le_features[4], hdev->le_features[5],
123 hdev->le_features[6], hdev->le_features[7]);
124 hci_dev_unlock(hdev); 103 hci_dev_unlock(hdev);
125 104
126 return 0; 105 return 0;
127} 106}
128 107
129static int features_open(struct inode *inode, struct file *file) 108DEFINE_SHOW_ATTRIBUTE(features);
130{
131 return single_open(file, features_show, inode->i_private);
132}
133
134static const struct file_operations features_fops = {
135 .open = features_open,
136 .read = seq_read,
137 .llseek = seq_lseek,
138 .release = single_release,
139};
140 109
141static int device_id_show(struct seq_file *f, void *ptr) 110static int device_id_show(struct seq_file *f, void *ptr)
142{ 111{
@@ -150,17 +119,7 @@ static int device_id_show(struct seq_file *f, void *ptr)
150 return 0; 119 return 0;
151} 120}
152 121
153static int device_id_open(struct inode *inode, struct file *file) 122DEFINE_SHOW_ATTRIBUTE(device_id);
154{
155 return single_open(file, device_id_show, inode->i_private);
156}
157
158static const struct file_operations device_id_fops = {
159 .open = device_id_open,
160 .read = seq_read,
161 .llseek = seq_lseek,
162 .release = single_release,
163};
164 123
165static int device_list_show(struct seq_file *f, void *ptr) 124static int device_list_show(struct seq_file *f, void *ptr)
166{ 125{
@@ -180,17 +139,7 @@ static int device_list_show(struct seq_file *f, void *ptr)
180 return 0; 139 return 0;
181} 140}
182 141
183static int device_list_open(struct inode *inode, struct file *file) 142DEFINE_SHOW_ATTRIBUTE(device_list);
184{
185 return single_open(file, device_list_show, inode->i_private);
186}
187
188static const struct file_operations device_list_fops = {
189 .open = device_list_open,
190 .read = seq_read,
191 .llseek = seq_lseek,
192 .release = single_release,
193};
194 143
195static int blacklist_show(struct seq_file *f, void *p) 144static int blacklist_show(struct seq_file *f, void *p)
196{ 145{
@@ -205,17 +154,7 @@ static int blacklist_show(struct seq_file *f, void *p)
205 return 0; 154 return 0;
206} 155}
207 156
208static int blacklist_open(struct inode *inode, struct file *file) 157DEFINE_SHOW_ATTRIBUTE(blacklist);
209{
210 return single_open(file, blacklist_show, inode->i_private);
211}
212
213static const struct file_operations blacklist_fops = {
214 .open = blacklist_open,
215 .read = seq_read,
216 .llseek = seq_lseek,
217 .release = single_release,
218};
219 158
220static int uuids_show(struct seq_file *f, void *p) 159static int uuids_show(struct seq_file *f, void *p)
221{ 160{
@@ -240,17 +179,7 @@ static int uuids_show(struct seq_file *f, void *p)
240 return 0; 179 return 0;
241} 180}
242 181
243static int uuids_open(struct inode *inode, struct file *file) 182DEFINE_SHOW_ATTRIBUTE(uuids);
244{
245 return single_open(file, uuids_show, inode->i_private);
246}
247
248static const struct file_operations uuids_fops = {
249 .open = uuids_open,
250 .read = seq_read,
251 .llseek = seq_lseek,
252 .release = single_release,
253};
254 183
255static int remote_oob_show(struct seq_file *f, void *ptr) 184static int remote_oob_show(struct seq_file *f, void *ptr)
256{ 185{
@@ -269,17 +198,7 @@ static int remote_oob_show(struct seq_file *f, void *ptr)
269 return 0; 198 return 0;
270} 199}
271 200
272static int remote_oob_open(struct inode *inode, struct file *file) 201DEFINE_SHOW_ATTRIBUTE(remote_oob);
273{
274 return single_open(file, remote_oob_show, inode->i_private);
275}
276
277static const struct file_operations remote_oob_fops = {
278 .open = remote_oob_open,
279 .read = seq_read,
280 .llseek = seq_lseek,
281 .release = single_release,
282};
283 202
284static int conn_info_min_age_set(void *data, u64 val) 203static int conn_info_min_age_set(void *data, u64 val)
285{ 204{
@@ -443,17 +362,7 @@ static int inquiry_cache_show(struct seq_file *f, void *p)
443 return 0; 362 return 0;
444} 363}
445 364
446static int inquiry_cache_open(struct inode *inode, struct file *file) 365DEFINE_SHOW_ATTRIBUTE(inquiry_cache);
447{
448 return single_open(file, inquiry_cache_show, inode->i_private);
449}
450
451static const struct file_operations inquiry_cache_fops = {
452 .open = inquiry_cache_open,
453 .read = seq_read,
454 .llseek = seq_lseek,
455 .release = single_release,
456};
457 366
458static int link_keys_show(struct seq_file *f, void *ptr) 367static int link_keys_show(struct seq_file *f, void *ptr)
459{ 368{
@@ -469,17 +378,7 @@ static int link_keys_show(struct seq_file *f, void *ptr)
469 return 0; 378 return 0;
470} 379}
471 380
472static int link_keys_open(struct inode *inode, struct file *file) 381DEFINE_SHOW_ATTRIBUTE(link_keys);
473{
474 return single_open(file, link_keys_show, inode->i_private);
475}
476
477static const struct file_operations link_keys_fops = {
478 .open = link_keys_open,
479 .read = seq_read,
480 .llseek = seq_lseek,
481 .release = single_release,
482};
483 382
484static int dev_class_show(struct seq_file *f, void *ptr) 383static int dev_class_show(struct seq_file *f, void *ptr)
485{ 384{
@@ -493,17 +392,7 @@ static int dev_class_show(struct seq_file *f, void *ptr)
493 return 0; 392 return 0;
494} 393}
495 394
496static int dev_class_open(struct inode *inode, struct file *file) 395DEFINE_SHOW_ATTRIBUTE(dev_class);
497{
498 return single_open(file, dev_class_show, inode->i_private);
499}
500
501static const struct file_operations dev_class_fops = {
502 .open = dev_class_open,
503 .read = seq_read,
504 .llseek = seq_lseek,
505 .release = single_release,
506};
507 396
508static int voice_setting_get(void *data, u64 *val) 397static int voice_setting_get(void *data, u64 *val)
509{ 398{
@@ -692,17 +581,7 @@ static int identity_show(struct seq_file *f, void *p)
692 return 0; 581 return 0;
693} 582}
694 583
695static int identity_open(struct inode *inode, struct file *file) 584DEFINE_SHOW_ATTRIBUTE(identity);
696{
697 return single_open(file, identity_show, inode->i_private);
698}
699
700static const struct file_operations identity_fops = {
701 .open = identity_open,
702 .read = seq_read,
703 .llseek = seq_lseek,
704 .release = single_release,
705};
706 585
707static int rpa_timeout_set(void *data, u64 val) 586static int rpa_timeout_set(void *data, u64 val)
708{ 587{
@@ -746,17 +625,7 @@ static int random_address_show(struct seq_file *f, void *p)
746 return 0; 625 return 0;
747} 626}
748 627
749static int random_address_open(struct inode *inode, struct file *file) 628DEFINE_SHOW_ATTRIBUTE(random_address);
750{
751 return single_open(file, random_address_show, inode->i_private);
752}
753
754static const struct file_operations random_address_fops = {
755 .open = random_address_open,
756 .read = seq_read,
757 .llseek = seq_lseek,
758 .release = single_release,
759};
760 629
761static int static_address_show(struct seq_file *f, void *p) 630static int static_address_show(struct seq_file *f, void *p)
762{ 631{
@@ -769,17 +638,7 @@ static int static_address_show(struct seq_file *f, void *p)
769 return 0; 638 return 0;
770} 639}
771 640
772static int static_address_open(struct inode *inode, struct file *file) 641DEFINE_SHOW_ATTRIBUTE(static_address);
773{
774 return single_open(file, static_address_show, inode->i_private);
775}
776
777static const struct file_operations static_address_fops = {
778 .open = static_address_open,
779 .read = seq_read,
780 .llseek = seq_lseek,
781 .release = single_release,
782};
783 642
784static ssize_t force_static_address_read(struct file *file, 643static ssize_t force_static_address_read(struct file *file,
785 char __user *user_buf, 644 char __user *user_buf,
@@ -841,17 +700,7 @@ static int white_list_show(struct seq_file *f, void *ptr)
841 return 0; 700 return 0;
842} 701}
843 702
844static int white_list_open(struct inode *inode, struct file *file) 703DEFINE_SHOW_ATTRIBUTE(white_list);
845{
846 return single_open(file, white_list_show, inode->i_private);
847}
848
849static const struct file_operations white_list_fops = {
850 .open = white_list_open,
851 .read = seq_read,
852 .llseek = seq_lseek,
853 .release = single_release,
854};
855 704
856static int identity_resolving_keys_show(struct seq_file *f, void *ptr) 705static int identity_resolving_keys_show(struct seq_file *f, void *ptr)
857{ 706{
@@ -869,18 +718,7 @@ static int identity_resolving_keys_show(struct seq_file *f, void *ptr)
869 return 0; 718 return 0;
870} 719}
871 720
872static int identity_resolving_keys_open(struct inode *inode, struct file *file) 721DEFINE_SHOW_ATTRIBUTE(identity_resolving_keys);
873{
874 return single_open(file, identity_resolving_keys_show,
875 inode->i_private);
876}
877
878static const struct file_operations identity_resolving_keys_fops = {
879 .open = identity_resolving_keys_open,
880 .read = seq_read,
881 .llseek = seq_lseek,
882 .release = single_release,
883};
884 722
885static int long_term_keys_show(struct seq_file *f, void *ptr) 723static int long_term_keys_show(struct seq_file *f, void *ptr)
886{ 724{
@@ -898,17 +736,7 @@ static int long_term_keys_show(struct seq_file *f, void *ptr)
898 return 0; 736 return 0;
899} 737}
900 738
901static int long_term_keys_open(struct inode *inode, struct file *file) 739DEFINE_SHOW_ATTRIBUTE(long_term_keys);
902{
903 return single_open(file, long_term_keys_show, inode->i_private);
904}
905
906static const struct file_operations long_term_keys_fops = {
907 .open = long_term_keys_open,
908 .read = seq_read,
909 .llseek = seq_lseek,
910 .release = single_release,
911};
912 740
913static int conn_min_interval_set(void *data, u64 val) 741static int conn_min_interval_set(void *data, u64 val)
914{ 742{
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index abc0f3224dd1..3394e6791673 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -919,6 +919,43 @@ static bool adv_use_rpa(struct hci_dev *hdev, uint32_t flags)
919 return true; 919 return true;
920} 920}
921 921
922static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
923{
924 /* If there is no connection we are OK to advertise. */
925 if (hci_conn_num(hdev, LE_LINK) == 0)
926 return true;
927
928 /* Check le_states if there is any connection in slave role. */
929 if (hdev->conn_hash.le_num_slave > 0) {
930 /* Slave connection state and non connectable mode bit 20. */
931 if (!connectable && !(hdev->le_states[2] & 0x10))
932 return false;
933
934 /* Slave connection state and connectable mode bit 38
935 * and scannable bit 21.
936 */
937 if (connectable && (!(hdev->le_states[4] & 0x01) ||
938 !(hdev->le_states[2] & 0x40)))
939 return false;
940 }
941
942 /* Check le_states if there is any connection in master role. */
943 if (hci_conn_num(hdev, LE_LINK) != hdev->conn_hash.le_num_slave) {
944 /* Master connection state and non connectable mode bit 18. */
945 if (!connectable && !(hdev->le_states[2] & 0x02))
946 return false;
947
948 /* Master connection state and connectable mode bit 35 and
949 * scannable 19.
950 */
951 if (connectable && (!(hdev->le_states[4] & 0x10) ||
952 !(hdev->le_states[2] & 0x08)))
953 return false;
954 }
955
956 return true;
957}
958
922void __hci_req_enable_advertising(struct hci_request *req) 959void __hci_req_enable_advertising(struct hci_request *req)
923{ 960{
924 struct hci_dev *hdev = req->hdev; 961 struct hci_dev *hdev = req->hdev;
@@ -927,7 +964,15 @@ void __hci_req_enable_advertising(struct hci_request *req)
927 bool connectable; 964 bool connectable;
928 u32 flags; 965 u32 flags;
929 966
930 if (hci_conn_num(hdev, LE_LINK) > 0) 967 flags = get_adv_instance_flags(hdev, hdev->cur_adv_instance);
968
969 /* If the "connectable" instance flag was not set, then choose between
970 * ADV_IND and ADV_NONCONN_IND based on the global connectable setting.
971 */
972 connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) ||
973 mgmt_get_connectable(hdev);
974
975 if (!is_advertising_allowed(hdev, connectable))
931 return; 976 return;
932 977
933 if (hci_dev_test_flag(hdev, HCI_LE_ADV)) 978 if (hci_dev_test_flag(hdev, HCI_LE_ADV))
@@ -940,14 +985,6 @@ void __hci_req_enable_advertising(struct hci_request *req)
940 */ 985 */
941 hci_dev_clear_flag(hdev, HCI_LE_ADV); 986 hci_dev_clear_flag(hdev, HCI_LE_ADV);
942 987
943 flags = get_adv_instance_flags(hdev, hdev->cur_adv_instance);
944
945 /* If the "connectable" instance flag was not set, then choose between
946 * ADV_IND and ADV_NONCONN_IND based on the global connectable setting.
947 */
948 connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) ||
949 mgmt_get_connectable(hdev);
950
951 /* Set require_privacy to true only when non-connectable 988 /* Set require_privacy to true only when non-connectable
952 * advertising is used. In that case it is fine to use a 989 * advertising is used. In that case it is fine to use a
953 * non-resolvable private address. 990 * non-resolvable private address.
@@ -1985,13 +2022,6 @@ unlock:
1985 hci_dev_unlock(hdev); 2022 hci_dev_unlock(hdev);
1986} 2023}
1987 2024
1988static void disable_advertising(struct hci_request *req)
1989{
1990 u8 enable = 0x00;
1991
1992 hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
1993}
1994
1995static int active_scan(struct hci_request *req, unsigned long opt) 2025static int active_scan(struct hci_request *req, unsigned long opt)
1996{ 2026{
1997 uint16_t interval = opt; 2027 uint16_t interval = opt;
@@ -2017,7 +2047,7 @@ static int active_scan(struct hci_request *req, unsigned long opt)
2017 cancel_adv_timeout(hdev); 2047 cancel_adv_timeout(hdev);
2018 hci_dev_unlock(hdev); 2048 hci_dev_unlock(hdev);
2019 2049
2020 disable_advertising(req); 2050 __hci_req_disable_advertising(req);
2021 } 2051 }
2022 2052
2023 /* If controller is scanning, it means the background scanning is 2053 /* If controller is scanning, it means the background scanning is
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index f2cec70d520c..1036e4fa1ea2 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -789,7 +789,7 @@ static int hidp_setup_hid(struct hidp_session *session,
789 hid->dev.parent = &session->conn->hcon->dev; 789 hid->dev.parent = &session->conn->hcon->dev;
790 hid->ll_driver = &hidp_hid_driver; 790 hid->ll_driver = &hidp_hid_driver;
791 791
792 /* True if device is blacklisted in drivers/hid/hid-core.c */ 792 /* True if device is blacklisted in drivers/hid/hid-quirks.c */
793 if (hid_ignore(hid)) { 793 if (hid_ignore(hid)) {
794 hid_destroy_device(session->hid); 794 hid_destroy_device(session->hid);
795 session->hid = NULL; 795 session->hid = NULL;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index a86e6687026e..2ced48662c1f 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -151,6 +151,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
151{ 151{
152 u32 size = kattr->test.data_size_in; 152 u32 size = kattr->test.data_size_in;
153 u32 repeat = kattr->test.repeat; 153 u32 repeat = kattr->test.repeat;
154 struct netdev_rx_queue *rxqueue;
154 struct xdp_buff xdp = {}; 155 struct xdp_buff xdp = {};
155 u32 retval, duration; 156 u32 retval, duration;
156 void *data; 157 void *data;
@@ -165,6 +166,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
165 xdp.data_meta = xdp.data; 166 xdp.data_meta = xdp.data;
166 xdp.data_end = xdp.data + size; 167 xdp.data_end = xdp.data + size;
167 168
169 rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
170 xdp.rxq = &rxqueue->xdp_rxq;
171
168 retval = bpf_test_run(prog, &xdp, repeat, &duration); 172 retval = bpf_test_run(prog, &xdp, repeat, &duration);
169 if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN) 173 if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN)
170 size = xdp.data_end - xdp.data; 174 size = xdp.data_end - xdp.data;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index af5b8c87f590..1285ca30ab0a 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -125,9 +125,16 @@ static int br_dev_init(struct net_device *dev)
125 if (!br->stats) 125 if (!br->stats)
126 return -ENOMEM; 126 return -ENOMEM;
127 127
128 err = br_fdb_hash_init(br);
129 if (err) {
130 free_percpu(br->stats);
131 return err;
132 }
133
128 err = br_vlan_init(br); 134 err = br_vlan_init(br);
129 if (err) { 135 if (err) {
130 free_percpu(br->stats); 136 free_percpu(br->stats);
137 br_fdb_hash_fini(br);
131 return err; 138 return err;
132 } 139 }
133 140
@@ -135,6 +142,7 @@ static int br_dev_init(struct net_device *dev)
135 if (err) { 142 if (err) {
136 free_percpu(br->stats); 143 free_percpu(br->stats);
137 br_vlan_flush(br); 144 br_vlan_flush(br);
145 br_fdb_hash_fini(br);
138 } 146 }
139 br_set_lockdep_class(dev); 147 br_set_lockdep_class(dev);
140 148
@@ -148,6 +156,7 @@ static void br_dev_uninit(struct net_device *dev)
148 br_multicast_dev_del(br); 156 br_multicast_dev_del(br);
149 br_multicast_uninit_stats(br); 157 br_multicast_uninit_stats(br);
150 br_vlan_flush(br); 158 br_vlan_flush(br);
159 br_fdb_hash_fini(br);
151 free_percpu(br->stats); 160 free_percpu(br->stats);
152} 161}
153 162
@@ -416,6 +425,7 @@ void br_dev_setup(struct net_device *dev)
416 br->dev = dev; 425 br->dev = dev;
417 spin_lock_init(&br->lock); 426 spin_lock_init(&br->lock);
418 INIT_LIST_HEAD(&br->port_list); 427 INIT_LIST_HEAD(&br->port_list);
428 INIT_HLIST_HEAD(&br->fdb_list);
419 spin_lock_init(&br->hash_lock); 429 spin_lock_init(&br->hash_lock);
420 430
421 br->bridge_id.prio[0] = 0x80; 431 br->bridge_id.prio[0] = 0x80;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 4ea5c8bbe286..d9e69e4514be 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -28,14 +28,20 @@
28#include <trace/events/bridge.h> 28#include <trace/events/bridge.h>
29#include "br_private.h" 29#include "br_private.h"
30 30
31static const struct rhashtable_params br_fdb_rht_params = {
32 .head_offset = offsetof(struct net_bridge_fdb_entry, rhnode),
33 .key_offset = offsetof(struct net_bridge_fdb_entry, key),
34 .key_len = sizeof(struct net_bridge_fdb_key),
35 .automatic_shrinking = true,
36 .locks_mul = 1,
37};
38
31static struct kmem_cache *br_fdb_cache __read_mostly; 39static struct kmem_cache *br_fdb_cache __read_mostly;
32static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, 40static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
33 const unsigned char *addr, u16 vid); 41 const unsigned char *addr, u16 vid);
34static void fdb_notify(struct net_bridge *br, 42static void fdb_notify(struct net_bridge *br,
35 const struct net_bridge_fdb_entry *, int); 43 const struct net_bridge_fdb_entry *, int);
36 44
37static u32 fdb_salt __read_mostly;
38
39int __init br_fdb_init(void) 45int __init br_fdb_init(void)
40{ 46{
41 br_fdb_cache = kmem_cache_create("bridge_fdb_cache", 47 br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
@@ -45,7 +51,6 @@ int __init br_fdb_init(void)
45 if (!br_fdb_cache) 51 if (!br_fdb_cache)
46 return -ENOMEM; 52 return -ENOMEM;
47 53
48 get_random_bytes(&fdb_salt, sizeof(fdb_salt));
49 return 0; 54 return 0;
50} 55}
51 56
@@ -54,6 +59,15 @@ void br_fdb_fini(void)
54 kmem_cache_destroy(br_fdb_cache); 59 kmem_cache_destroy(br_fdb_cache);
55} 60}
56 61
62int br_fdb_hash_init(struct net_bridge *br)
63{
64 return rhashtable_init(&br->fdb_hash_tbl, &br_fdb_rht_params);
65}
66
67void br_fdb_hash_fini(struct net_bridge *br)
68{
69 rhashtable_destroy(&br->fdb_hash_tbl);
70}
57 71
58/* if topology_changing then use forward_delay (default 15 sec) 72/* if topology_changing then use forward_delay (default 15 sec)
59 * otherwise keep longer (default 5 minutes) 73 * otherwise keep longer (default 5 minutes)
@@ -70,13 +84,6 @@ static inline int has_expired(const struct net_bridge *br,
70 time_before_eq(fdb->updated + hold_time(br), jiffies); 84 time_before_eq(fdb->updated + hold_time(br), jiffies);
71} 85}
72 86
73static inline int br_mac_hash(const unsigned char *mac, __u16 vid)
74{
75 /* use 1 byte of OUI and 3 bytes of NIC */
76 u32 key = get_unaligned((u32 *)(mac + 2));
77 return jhash_2words(key, vid, fdb_salt) & (BR_HASH_SIZE - 1);
78}
79
80static void fdb_rcu_free(struct rcu_head *head) 87static void fdb_rcu_free(struct rcu_head *head)
81{ 88{
82 struct net_bridge_fdb_entry *ent 89 struct net_bridge_fdb_entry *ent
@@ -84,19 +91,18 @@ static void fdb_rcu_free(struct rcu_head *head)
84 kmem_cache_free(br_fdb_cache, ent); 91 kmem_cache_free(br_fdb_cache, ent);
85} 92}
86 93
87static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, 94static struct net_bridge_fdb_entry *fdb_find_rcu(struct rhashtable *tbl,
88 const unsigned char *addr, 95 const unsigned char *addr,
89 __u16 vid) 96 __u16 vid)
90{ 97{
91 struct net_bridge_fdb_entry *f; 98 struct net_bridge_fdb_key key;
92 99
93 WARN_ON_ONCE(!rcu_read_lock_held()); 100 WARN_ON_ONCE(!rcu_read_lock_held());
94 101
95 hlist_for_each_entry_rcu(f, head, hlist) 102 key.vlan_id = vid;
96 if (ether_addr_equal(f->addr.addr, addr) && f->vlan_id == vid) 103 memcpy(key.addr.addr, addr, sizeof(key.addr.addr));
97 break;
98 104
99 return f; 105 return rhashtable_lookup(tbl, &key, br_fdb_rht_params);
100} 106}
101 107
102/* requires bridge hash_lock */ 108/* requires bridge hash_lock */
@@ -104,13 +110,12 @@ static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br,
104 const unsigned char *addr, 110 const unsigned char *addr,
105 __u16 vid) 111 __u16 vid)
106{ 112{
107 struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
108 struct net_bridge_fdb_entry *fdb; 113 struct net_bridge_fdb_entry *fdb;
109 114
110 lockdep_assert_held_once(&br->hash_lock); 115 lockdep_assert_held_once(&br->hash_lock);
111 116
112 rcu_read_lock(); 117 rcu_read_lock();
113 fdb = fdb_find_rcu(head, addr, vid); 118 fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid);
114 rcu_read_unlock(); 119 rcu_read_unlock();
115 120
116 return fdb; 121 return fdb;
@@ -120,9 +125,7 @@ struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br,
120 const unsigned char *addr, 125 const unsigned char *addr,
121 __u16 vid) 126 __u16 vid)
122{ 127{
123 struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; 128 return fdb_find_rcu(&br->fdb_hash_tbl, addr, vid);
124
125 return fdb_find_rcu(head, addr, vid);
126} 129}
127 130
128/* When a static FDB entry is added, the mac address from the entry is 131/* When a static FDB entry is added, the mac address from the entry is
@@ -175,9 +178,11 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
175 trace_fdb_delete(br, f); 178 trace_fdb_delete(br, f);
176 179
177 if (f->is_static) 180 if (f->is_static)
178 fdb_del_hw_addr(br, f->addr.addr); 181 fdb_del_hw_addr(br, f->key.addr.addr);
179 182
180 hlist_del_init_rcu(&f->hlist); 183 hlist_del_init_rcu(&f->fdb_node);
184 rhashtable_remove_fast(&br->fdb_hash_tbl, &f->rhnode,
185 br_fdb_rht_params);
181 fdb_notify(br, f, RTM_DELNEIGH); 186 fdb_notify(br, f, RTM_DELNEIGH);
182 call_rcu(&f->rcu, fdb_rcu_free); 187 call_rcu(&f->rcu, fdb_rcu_free);
183} 188}
@@ -187,11 +192,11 @@ static void fdb_delete_local(struct net_bridge *br,
187 const struct net_bridge_port *p, 192 const struct net_bridge_port *p,
188 struct net_bridge_fdb_entry *f) 193 struct net_bridge_fdb_entry *f)
189{ 194{
190 const unsigned char *addr = f->addr.addr; 195 const unsigned char *addr = f->key.addr.addr;
191 struct net_bridge_vlan_group *vg; 196 struct net_bridge_vlan_group *vg;
192 const struct net_bridge_vlan *v; 197 const struct net_bridge_vlan *v;
193 struct net_bridge_port *op; 198 struct net_bridge_port *op;
194 u16 vid = f->vlan_id; 199 u16 vid = f->key.vlan_id;
195 200
196 /* Maybe another port has same hw addr? */ 201 /* Maybe another port has same hw addr? */
197 list_for_each_entry(op, &br->port_list, list) { 202 list_for_each_entry(op, &br->port_list, list) {
@@ -233,31 +238,23 @@ void br_fdb_find_delete_local(struct net_bridge *br,
233void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) 238void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
234{ 239{
235 struct net_bridge_vlan_group *vg; 240 struct net_bridge_vlan_group *vg;
241 struct net_bridge_fdb_entry *f;
236 struct net_bridge *br = p->br; 242 struct net_bridge *br = p->br;
237 struct net_bridge_vlan *v; 243 struct net_bridge_vlan *v;
238 int i;
239 244
240 spin_lock_bh(&br->hash_lock); 245 spin_lock_bh(&br->hash_lock);
241
242 vg = nbp_vlan_group(p); 246 vg = nbp_vlan_group(p);
243 /* Search all chains since old address/hash is unknown */ 247 hlist_for_each_entry(f, &br->fdb_list, fdb_node) {
244 for (i = 0; i < BR_HASH_SIZE; i++) { 248 if (f->dst == p && f->is_local && !f->added_by_user) {
245 struct hlist_node *h; 249 /* delete old one */
246 hlist_for_each(h, &br->hash[i]) { 250 fdb_delete_local(br, p, f);
247 struct net_bridge_fdb_entry *f; 251
248 252 /* if this port has no vlan information
249 f = hlist_entry(h, struct net_bridge_fdb_entry, hlist); 253 * configured, we can safely be done at
250 if (f->dst == p && f->is_local && !f->added_by_user) { 254 * this point.
251 /* delete old one */ 255 */
252 fdb_delete_local(br, p, f); 256 if (!vg || !vg->num_vlans)
253 257 goto insert;
254 /* if this port has no vlan information
255 * configured, we can safely be done at
256 * this point.
257 */
258 if (!vg || !vg->num_vlans)
259 goto insert;
260 }
261 } 258 }
262 } 259 }
263 260
@@ -316,35 +313,32 @@ void br_fdb_cleanup(struct work_struct *work)
316{ 313{
317 struct net_bridge *br = container_of(work, struct net_bridge, 314 struct net_bridge *br = container_of(work, struct net_bridge,
318 gc_work.work); 315 gc_work.work);
316 struct net_bridge_fdb_entry *f = NULL;
319 unsigned long delay = hold_time(br); 317 unsigned long delay = hold_time(br);
320 unsigned long work_delay = delay; 318 unsigned long work_delay = delay;
321 unsigned long now = jiffies; 319 unsigned long now = jiffies;
322 int i;
323 320
324 for (i = 0; i < BR_HASH_SIZE; i++) { 321 /* this part is tricky, in order to avoid blocking learning and
325 struct net_bridge_fdb_entry *f; 322 * consequently forwarding, we rely on rcu to delete objects with
326 struct hlist_node *n; 323 * delayed freeing allowing us to continue traversing
324 */
325 rcu_read_lock();
326 hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
327 unsigned long this_timer;
327 328
328 if (!br->hash[i].first) 329 if (f->is_static || f->added_by_external_learn)
329 continue; 330 continue;
330 331 this_timer = f->updated + delay;
331 spin_lock_bh(&br->hash_lock); 332 if (time_after(this_timer, now)) {
332 hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { 333 work_delay = min(work_delay, this_timer - now);
333 unsigned long this_timer; 334 } else {
334 335 spin_lock_bh(&br->hash_lock);
335 if (f->is_static) 336 if (!hlist_unhashed(&f->fdb_node))
336 continue;
337 if (f->added_by_external_learn)
338 continue;
339 this_timer = f->updated + delay;
340 if (time_after(this_timer, now))
341 work_delay = min(work_delay, this_timer - now);
342 else
343 fdb_delete(br, f); 337 fdb_delete(br, f);
338 spin_unlock_bh(&br->hash_lock);
344 } 339 }
345 spin_unlock_bh(&br->hash_lock);
346 cond_resched();
347 } 340 }
341 rcu_read_unlock();
348 342
349 /* Cleanup minimum 10 milliseconds apart */ 343 /* Cleanup minimum 10 milliseconds apart */
350 work_delay = max_t(unsigned long, work_delay, msecs_to_jiffies(10)); 344 work_delay = max_t(unsigned long, work_delay, msecs_to_jiffies(10));
@@ -354,16 +348,13 @@ void br_fdb_cleanup(struct work_struct *work)
354/* Completely flush all dynamic entries in forwarding database.*/ 348/* Completely flush all dynamic entries in forwarding database.*/
355void br_fdb_flush(struct net_bridge *br) 349void br_fdb_flush(struct net_bridge *br)
356{ 350{
357 int i; 351 struct net_bridge_fdb_entry *f;
352 struct hlist_node *tmp;
358 353
359 spin_lock_bh(&br->hash_lock); 354 spin_lock_bh(&br->hash_lock);
360 for (i = 0; i < BR_HASH_SIZE; i++) { 355 hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) {
361 struct net_bridge_fdb_entry *f; 356 if (!f->is_static)
362 struct hlist_node *n; 357 fdb_delete(br, f);
363 hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) {
364 if (!f->is_static)
365 fdb_delete(br, f);
366 }
367 } 358 }
368 spin_unlock_bh(&br->hash_lock); 359 spin_unlock_bh(&br->hash_lock);
369} 360}
@@ -377,27 +368,22 @@ void br_fdb_delete_by_port(struct net_bridge *br,
377 u16 vid, 368 u16 vid,
378 int do_all) 369 int do_all)
379{ 370{
380 int i; 371 struct net_bridge_fdb_entry *f;
372 struct hlist_node *tmp;
381 373
382 spin_lock_bh(&br->hash_lock); 374 spin_lock_bh(&br->hash_lock);
383 for (i = 0; i < BR_HASH_SIZE; i++) { 375 hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) {
384 struct hlist_node *h, *g; 376 if (f->dst != p)
377 continue;
385 378
386 hlist_for_each_safe(h, g, &br->hash[i]) { 379 if (!do_all)
387 struct net_bridge_fdb_entry *f 380 if (f->is_static || (vid && f->key.vlan_id != vid))
388 = hlist_entry(h, struct net_bridge_fdb_entry, hlist);
389 if (f->dst != p)
390 continue; 381 continue;
391 382
392 if (!do_all) 383 if (f->is_local)
393 if (f->is_static || (vid && f->vlan_id != vid)) 384 fdb_delete_local(br, p, f);
394 continue; 385 else
395 386 fdb_delete(br, f);
396 if (f->is_local)
397 fdb_delete_local(br, p, f);
398 else
399 fdb_delete(br, f);
400 }
401 } 387 }
402 spin_unlock_bh(&br->hash_lock); 388 spin_unlock_bh(&br->hash_lock);
403} 389}
@@ -433,52 +419,48 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
433int br_fdb_fillbuf(struct net_bridge *br, void *buf, 419int br_fdb_fillbuf(struct net_bridge *br, void *buf,
434 unsigned long maxnum, unsigned long skip) 420 unsigned long maxnum, unsigned long skip)
435{ 421{
436 struct __fdb_entry *fe = buf;
437 int i, num = 0;
438 struct net_bridge_fdb_entry *f; 422 struct net_bridge_fdb_entry *f;
423 struct __fdb_entry *fe = buf;
424 int num = 0;
439 425
440 memset(buf, 0, maxnum*sizeof(struct __fdb_entry)); 426 memset(buf, 0, maxnum*sizeof(struct __fdb_entry));
441 427
442 rcu_read_lock(); 428 rcu_read_lock();
443 for (i = 0; i < BR_HASH_SIZE; i++) { 429 hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
444 hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { 430 if (num >= maxnum)
445 if (num >= maxnum) 431 break;
446 goto out;
447 432
448 if (has_expired(br, f)) 433 if (has_expired(br, f))
449 continue; 434 continue;
450 435
451 /* ignore pseudo entry for local MAC address */ 436 /* ignore pseudo entry for local MAC address */
452 if (!f->dst) 437 if (!f->dst)
453 continue; 438 continue;
454 439
455 if (skip) { 440 if (skip) {
456 --skip; 441 --skip;
457 continue; 442 continue;
458 } 443 }
459 444
460 /* convert from internal format to API */ 445 /* convert from internal format to API */
461 memcpy(fe->mac_addr, f->addr.addr, ETH_ALEN); 446 memcpy(fe->mac_addr, f->key.addr.addr, ETH_ALEN);
462 447
463 /* due to ABI compat need to split into hi/lo */ 448 /* due to ABI compat need to split into hi/lo */
464 fe->port_no = f->dst->port_no; 449 fe->port_no = f->dst->port_no;
465 fe->port_hi = f->dst->port_no >> 8; 450 fe->port_hi = f->dst->port_no >> 8;
466 451
467 fe->is_local = f->is_local; 452 fe->is_local = f->is_local;
468 if (!f->is_static) 453 if (!f->is_static)
469 fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated); 454 fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated);
470 ++fe; 455 ++fe;
471 ++num; 456 ++num;
472 }
473 } 457 }
474
475 out:
476 rcu_read_unlock(); 458 rcu_read_unlock();
477 459
478 return num; 460 return num;
479} 461}
480 462
481static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, 463static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
482 struct net_bridge_port *source, 464 struct net_bridge_port *source,
483 const unsigned char *addr, 465 const unsigned char *addr,
484 __u16 vid, 466 __u16 vid,
@@ -489,16 +471,23 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
489 471
490 fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC); 472 fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC);
491 if (fdb) { 473 if (fdb) {
492 memcpy(fdb->addr.addr, addr, ETH_ALEN); 474 memcpy(fdb->key.addr.addr, addr, ETH_ALEN);
493 fdb->dst = source; 475 fdb->dst = source;
494 fdb->vlan_id = vid; 476 fdb->key.vlan_id = vid;
495 fdb->is_local = is_local; 477 fdb->is_local = is_local;
496 fdb->is_static = is_static; 478 fdb->is_static = is_static;
497 fdb->added_by_user = 0; 479 fdb->added_by_user = 0;
498 fdb->added_by_external_learn = 0; 480 fdb->added_by_external_learn = 0;
499 fdb->offloaded = 0; 481 fdb->offloaded = 0;
500 fdb->updated = fdb->used = jiffies; 482 fdb->updated = fdb->used = jiffies;
501 hlist_add_head_rcu(&fdb->hlist, head); 483 if (rhashtable_lookup_insert_fast(&br->fdb_hash_tbl,
484 &fdb->rhnode,
485 br_fdb_rht_params)) {
486 kmem_cache_free(br_fdb_cache, fdb);
487 fdb = NULL;
488 } else {
489 hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list);
490 }
502 } 491 }
503 return fdb; 492 return fdb;
504} 493}
@@ -506,7 +495,6 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
506static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, 495static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
507 const unsigned char *addr, u16 vid) 496 const unsigned char *addr, u16 vid)
508{ 497{
509 struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
510 struct net_bridge_fdb_entry *fdb; 498 struct net_bridge_fdb_entry *fdb;
511 499
512 if (!is_valid_ether_addr(addr)) 500 if (!is_valid_ether_addr(addr))
@@ -524,7 +512,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
524 fdb_delete(br, fdb); 512 fdb_delete(br, fdb);
525 } 513 }
526 514
527 fdb = fdb_create(head, source, addr, vid, 1, 1); 515 fdb = fdb_create(br, source, addr, vid, 1, 1);
528 if (!fdb) 516 if (!fdb)
529 return -ENOMEM; 517 return -ENOMEM;
530 518
@@ -548,7 +536,6 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
548void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, 536void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
549 const unsigned char *addr, u16 vid, bool added_by_user) 537 const unsigned char *addr, u16 vid, bool added_by_user)
550{ 538{
551 struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
552 struct net_bridge_fdb_entry *fdb; 539 struct net_bridge_fdb_entry *fdb;
553 bool fdb_modified = false; 540 bool fdb_modified = false;
554 541
@@ -561,7 +548,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
561 source->state == BR_STATE_FORWARDING)) 548 source->state == BR_STATE_FORWARDING))
562 return; 549 return;
563 550
564 fdb = fdb_find_rcu(head, addr, vid); 551 fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid);
565 if (likely(fdb)) { 552 if (likely(fdb)) {
566 /* attempt to update an entry for a local interface */ 553 /* attempt to update an entry for a local interface */
567 if (unlikely(fdb->is_local)) { 554 if (unlikely(fdb->is_local)) {
@@ -590,14 +577,13 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
590 } 577 }
591 } else { 578 } else {
592 spin_lock(&br->hash_lock); 579 spin_lock(&br->hash_lock);
593 if (likely(!fdb_find_rcu(head, addr, vid))) { 580 fdb = fdb_create(br, source, addr, vid, 0, 0);
594 fdb = fdb_create(head, source, addr, vid, 0, 0); 581 if (fdb) {
595 if (fdb) { 582 if (unlikely(added_by_user))
596 if (unlikely(added_by_user)) 583 fdb->added_by_user = 1;
597 fdb->added_by_user = 1; 584 trace_br_fdb_update(br, source, addr, vid,
598 trace_br_fdb_update(br, source, addr, vid, added_by_user); 585 added_by_user);
599 fdb_notify(br, fdb, RTM_NEWNEIGH); 586 fdb_notify(br, fdb, RTM_NEWNEIGH);
600 }
601 } 587 }
602 /* else we lose race and someone else inserts 588 /* else we lose race and someone else inserts
603 * it first, don't bother updating 589 * it first, don't bother updating
@@ -646,7 +632,7 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
646 if (fdb->added_by_external_learn) 632 if (fdb->added_by_external_learn)
647 ndm->ndm_flags |= NTF_EXT_LEARNED; 633 ndm->ndm_flags |= NTF_EXT_LEARNED;
648 634
649 if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr)) 635 if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr))
650 goto nla_put_failure; 636 goto nla_put_failure;
651 if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex)) 637 if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex))
652 goto nla_put_failure; 638 goto nla_put_failure;
@@ -657,7 +643,8 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
657 if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) 643 if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
658 goto nla_put_failure; 644 goto nla_put_failure;
659 645
660 if (fdb->vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) 646 if (fdb->key.vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16),
647 &fdb->key.vlan_id))
661 goto nla_put_failure; 648 goto nla_put_failure;
662 649
663 nlmsg_end(skb, nlh); 650 nlmsg_end(skb, nlh);
@@ -711,54 +698,48 @@ int br_fdb_dump(struct sk_buff *skb,
711 int *idx) 698 int *idx)
712{ 699{
713 struct net_bridge *br = netdev_priv(dev); 700 struct net_bridge *br = netdev_priv(dev);
701 struct net_bridge_fdb_entry *f;
714 int err = 0; 702 int err = 0;
715 int i;
716 703
717 if (!(dev->priv_flags & IFF_EBRIDGE)) 704 if (!(dev->priv_flags & IFF_EBRIDGE))
718 goto out; 705 return err;
719 706
720 if (!filter_dev) { 707 if (!filter_dev) {
721 err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); 708 err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
722 if (err < 0) 709 if (err < 0)
723 goto out; 710 return err;
724 } 711 }
725 712
726 for (i = 0; i < BR_HASH_SIZE; i++) { 713 rcu_read_lock();
727 struct net_bridge_fdb_entry *f; 714 hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
728 715 if (*idx < cb->args[2])
729 hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { 716 goto skip;
730 717 if (filter_dev && (!f->dst || f->dst->dev != filter_dev)) {
731 if (*idx < cb->args[2]) 718 if (filter_dev != dev)
732 goto skip; 719 goto skip;
733 720 /* !f->dst is a special case for bridge
734 if (filter_dev && 721 * It means the MAC belongs to the bridge
735 (!f->dst || f->dst->dev != filter_dev)) { 722 * Therefore need a little more filtering
736 if (filter_dev != dev) 723 * we only want to dump the !f->dst case
737 goto skip; 724 */
738 /* !f->dst is a special case for bridge 725 if (f->dst)
739 * It means the MAC belongs to the bridge
740 * Therefore need a little more filtering
741 * we only want to dump the !f->dst case
742 */
743 if (f->dst)
744 goto skip;
745 }
746 if (!filter_dev && f->dst)
747 goto skip; 726 goto skip;
748
749 err = fdb_fill_info(skb, br, f,
750 NETLINK_CB(cb->skb).portid,
751 cb->nlh->nlmsg_seq,
752 RTM_NEWNEIGH,
753 NLM_F_MULTI);
754 if (err < 0)
755 goto out;
756skip:
757 *idx += 1;
758 } 727 }
728 if (!filter_dev && f->dst)
729 goto skip;
730
731 err = fdb_fill_info(skb, br, f,
732 NETLINK_CB(cb->skb).portid,
733 cb->nlh->nlmsg_seq,
734 RTM_NEWNEIGH,
735 NLM_F_MULTI);
736 if (err < 0)
737 break;
738skip:
739 *idx += 1;
759 } 740 }
741 rcu_read_unlock();
760 742
761out:
762 return err; 743 return err;
763} 744}
764 745
@@ -766,7 +747,6 @@ out:
766static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, 747static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
767 const __u8 *addr, __u16 state, __u16 flags, __u16 vid) 748 const __u8 *addr, __u16 state, __u16 flags, __u16 vid)
768{ 749{
769 struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
770 struct net_bridge_fdb_entry *fdb; 750 struct net_bridge_fdb_entry *fdb;
771 bool modified = false; 751 bool modified = false;
772 752
@@ -787,7 +767,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
787 if (!(flags & NLM_F_CREATE)) 767 if (!(flags & NLM_F_CREATE))
788 return -ENOENT; 768 return -ENOENT;
789 769
790 fdb = fdb_create(head, source, addr, vid, 0, 0); 770 fdb = fdb_create(br, source, addr, vid, 0, 0);
791 if (!fdb) 771 if (!fdb)
792 return -ENOMEM; 772 return -ENOMEM;
793 773
@@ -1012,65 +992,60 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
1012 992
1013int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p) 993int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p)
1014{ 994{
1015 struct net_bridge_fdb_entry *fdb, *tmp; 995 struct net_bridge_fdb_entry *f, *tmp;
1016 int i; 996 int err = 0;
1017 int err;
1018 997
1019 ASSERT_RTNL(); 998 ASSERT_RTNL();
1020 999
1021 for (i = 0; i < BR_HASH_SIZE; i++) { 1000 /* the key here is that static entries change only under rtnl */
1022 hlist_for_each_entry(fdb, &br->hash[i], hlist) { 1001 rcu_read_lock();
1023 /* We only care for static entries */ 1002 hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
1024 if (!fdb->is_static) 1003 /* We only care for static entries */
1025 continue; 1004 if (!f->is_static)
1026 1005 continue;
1027 err = dev_uc_add(p->dev, fdb->addr.addr); 1006 err = dev_uc_add(p->dev, f->key.addr.addr);
1028 if (err) 1007 if (err)
1029 goto rollback; 1008 goto rollback;
1030 }
1031 } 1009 }
1032 return 0; 1010done:
1011 rcu_read_unlock();
1033 1012
1034rollback: 1013 return err;
1035 for (i = 0; i < BR_HASH_SIZE; i++) {
1036 hlist_for_each_entry(tmp, &br->hash[i], hlist) {
1037 /* If we reached the fdb that failed, we can stop */
1038 if (tmp == fdb)
1039 break;
1040
1041 /* We only care for static entries */
1042 if (!tmp->is_static)
1043 continue;
1044 1014
1045 dev_uc_del(p->dev, tmp->addr.addr); 1015rollback:
1046 } 1016 hlist_for_each_entry_rcu(tmp, &br->fdb_list, fdb_node) {
1017 /* We only care for static entries */
1018 if (!tmp->is_static)
1019 continue;
1020 if (tmp == f)
1021 break;
1022 dev_uc_del(p->dev, tmp->key.addr.addr);
1047 } 1023 }
1048 return err; 1024
1025 goto done;
1049} 1026}
1050 1027
1051void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) 1028void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
1052{ 1029{
1053 struct net_bridge_fdb_entry *fdb; 1030 struct net_bridge_fdb_entry *f;
1054 int i;
1055 1031
1056 ASSERT_RTNL(); 1032 ASSERT_RTNL();
1057 1033
1058 for (i = 0; i < BR_HASH_SIZE; i++) { 1034 rcu_read_lock();
1059 hlist_for_each_entry_rcu(fdb, &br->hash[i], hlist) { 1035 hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
1060 /* We only care for static entries */ 1036 /* We only care for static entries */
1061 if (!fdb->is_static) 1037 if (!f->is_static)
1062 continue; 1038 continue;
1063 1039
1064 dev_uc_del(p->dev, fdb->addr.addr); 1040 dev_uc_del(p->dev, f->key.addr.addr);
1065 }
1066 } 1041 }
1042 rcu_read_unlock();
1067} 1043}
1068 1044
1069int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, 1045int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
1070 const unsigned char *addr, u16 vid) 1046 const unsigned char *addr, u16 vid)
1071{ 1047{
1072 struct net_bridge_fdb_entry *fdb; 1048 struct net_bridge_fdb_entry *fdb;
1073 struct hlist_head *head;
1074 bool modified = false; 1049 bool modified = false;
1075 int err = 0; 1050 int err = 0;
1076 1051
@@ -1078,10 +1053,9 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
1078 1053
1079 spin_lock_bh(&br->hash_lock); 1054 spin_lock_bh(&br->hash_lock);
1080 1055
1081 head = &br->hash[br_mac_hash(addr, vid)];
1082 fdb = br_fdb_find(br, addr, vid); 1056 fdb = br_fdb_find(br, addr, vid);
1083 if (!fdb) { 1057 if (!fdb) {
1084 fdb = fdb_create(head, p, addr, vid, 0, 0); 1058 fdb = fdb_create(br, p, addr, vid, 0, 0);
1085 if (!fdb) { 1059 if (!fdb) {
1086 err = -ENOMEM; 1060 err = -ENOMEM;
1087 goto err_unlock; 1061 goto err_unlock;
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index b0f4c734900b..6d9f48bd374a 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -760,9 +760,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
760 760
761void br_mdb_init(void) 761void br_mdb_init(void)
762{ 762{
763 rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0); 763 rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0);
764 rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0); 764 rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0);
765 rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0); 765 rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0);
766} 766}
767 767
768void br_mdb_uninit(void) 768void br_mdb_uninit(void)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index c2eea1b8737a..27f1d4f2114a 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -991,7 +991,7 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
991 unsigned int i; 991 unsigned int i;
992 int ret; 992 int ret;
993 993
994 e = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]); 994 e = rcu_dereference(net->nf.hooks_bridge[hook]);
995 if (!e) 995 if (!e)
996 return okfn(net, sk, skb); 996 return okfn(net, sk, skb);
997 997
diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
index 20cbb727df4d..8e2d7cfa4e16 100644
--- a/net/bridge/br_nf_core.c
+++ b/net/bridge/br_nf_core.c
@@ -78,7 +78,6 @@ void br_netfilter_rtable_init(struct net_bridge *br)
78 78
79 atomic_set(&rt->dst.__refcnt, 1); 79 atomic_set(&rt->dst.__refcnt, 1);
80 rt->dst.dev = br->dev; 80 rt->dst.dev = br->dev;
81 rt->dst.path = &rt->dst;
82 dst_init_metrics(&rt->dst, br_dst_default_metrics, true); 81 dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
83 rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE; 82 rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE;
84 rt->dst.ops = &fake_dst_ops; 83 rt->dst.ops = &fake_dst_ops;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1312b8d20ec3..8e13a64d8c99 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -168,12 +168,17 @@ struct net_bridge_vlan_group {
168 u16 pvid; 168 u16 pvid;
169}; 169};
170 170
171struct net_bridge_fdb_key {
172 mac_addr addr;
173 u16 vlan_id;
174};
175
171struct net_bridge_fdb_entry { 176struct net_bridge_fdb_entry {
172 struct hlist_node hlist; 177 struct rhash_head rhnode;
173 struct net_bridge_port *dst; 178 struct net_bridge_port *dst;
174 179
175 mac_addr addr; 180 struct net_bridge_fdb_key key;
176 __u16 vlan_id; 181 struct hlist_node fdb_node;
177 unsigned char is_local:1, 182 unsigned char is_local:1,
178 is_static:1, 183 is_static:1,
179 added_by_user:1, 184 added_by_user:1,
@@ -315,7 +320,7 @@ struct net_bridge {
315 struct net_bridge_vlan_group __rcu *vlgrp; 320 struct net_bridge_vlan_group __rcu *vlgrp;
316#endif 321#endif
317 322
318 struct hlist_head hash[BR_HASH_SIZE]; 323 struct rhashtable fdb_hash_tbl;
319#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 324#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
320 union { 325 union {
321 struct rtable fake_rtable; 326 struct rtable fake_rtable;
@@ -405,6 +410,7 @@ struct net_bridge {
405 int offload_fwd_mark; 410 int offload_fwd_mark;
406#endif 411#endif
407 bool neigh_suppress_enabled; 412 bool neigh_suppress_enabled;
413 struct hlist_head fdb_list;
408}; 414};
409 415
410struct br_input_skb_cb { 416struct br_input_skb_cb {
@@ -515,6 +521,8 @@ static inline void br_netpoll_disable(struct net_bridge_port *p)
515/* br_fdb.c */ 521/* br_fdb.c */
516int br_fdb_init(void); 522int br_fdb_init(void);
517void br_fdb_fini(void); 523void br_fdb_fini(void);
524int br_fdb_hash_init(struct net_bridge *br);
525void br_fdb_hash_fini(struct net_bridge *br);
518void br_fdb_flush(struct net_bridge *br); 526void br_fdb_flush(struct net_bridge *br);
519void br_fdb_find_delete_local(struct net_bridge *br, 527void br_fdb_find_delete_local(struct net_bridge *br,
520 const struct net_bridge_port *p, 528 const struct net_bridge_port *p,
@@ -752,7 +760,7 @@ static inline void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
752 760
753static inline bool br_multicast_is_router(struct net_bridge *br) 761static inline bool br_multicast_is_router(struct net_bridge *br)
754{ 762{
755 return 0; 763 return false;
756} 764}
757 765
758static inline bool br_multicast_querier_exists(struct net_bridge *br, 766static inline bool br_multicast_querier_exists(struct net_bridge *br,
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 9700e0f3307b..ee775f4ff76c 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -121,13 +121,13 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
121 121
122 switch (type) { 122 switch (type) {
123 case RTM_DELNEIGH: 123 case RTM_DELNEIGH:
124 br_switchdev_fdb_call_notifiers(false, fdb->addr.addr, 124 br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr,
125 fdb->vlan_id, 125 fdb->key.vlan_id,
126 fdb->dst->dev); 126 fdb->dst->dev);
127 break; 127 break;
128 case RTM_NEWNEIGH: 128 case RTM_NEWNEIGH:
129 br_switchdev_fdb_call_notifiers(true, fdb->addr.addr, 129 br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr,
130 fdb->vlan_id, 130 fdb->key.vlan_id,
131 fdb->dst->dev); 131 fdb->dst->dev);
132 break; 132 break;
133 } 133 }
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 723f25eed8ea..b1be0dcfba6b 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -272,10 +272,7 @@ static ssize_t group_addr_show(struct device *d,
272 struct device_attribute *attr, char *buf) 272 struct device_attribute *attr, char *buf)
273{ 273{
274 struct net_bridge *br = to_bridge(d); 274 struct net_bridge *br = to_bridge(d);
275 return sprintf(buf, "%x:%x:%x:%x:%x:%x\n", 275 return sprintf(buf, "%pM\n", br->group_addr);
276 br->group_addr[0], br->group_addr[1],
277 br->group_addr[2], br->group_addr[3],
278 br->group_addr[4], br->group_addr[5]);
279} 276}
280 277
281static ssize_t group_addr_store(struct device *d, 278static ssize_t group_addr_store(struct device *d,
@@ -284,14 +281,11 @@ static ssize_t group_addr_store(struct device *d,
284{ 281{
285 struct net_bridge *br = to_bridge(d); 282 struct net_bridge *br = to_bridge(d);
286 u8 new_addr[6]; 283 u8 new_addr[6];
287 int i;
288 284
289 if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) 285 if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN))
290 return -EPERM; 286 return -EPERM;
291 287
292 if (sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 288 if (!mac_pton(buf, new_addr))
293 &new_addr[0], &new_addr[1], &new_addr[2],
294 &new_addr[3], &new_addr[4], &new_addr[5]) != 6)
295 return -EINVAL; 289 return -EINVAL;
296 290
297 if (!is_link_local_ether_addr(new_addr)) 291 if (!is_link_local_ether_addr(new_addr))
@@ -306,8 +300,7 @@ static ssize_t group_addr_store(struct device *d,
306 return restart_syscall(); 300 return restart_syscall();
307 301
308 spin_lock_bh(&br->lock); 302 spin_lock_bh(&br->lock);
309 for (i = 0; i < 6; i++) 303 ether_addr_copy(br->group_addr, new_addr);
310 br->group_addr[i] = new_addr[i];
311 spin_unlock_bh(&br->lock); 304 spin_unlock_bh(&br->lock);
312 305
313 br->group_addr_set = true; 306 br->group_addr_set = true;
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index e7ef1a1ef3a6..225d1668dfdd 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -4,6 +4,7 @@
4# 4#
5menuconfig NF_TABLES_BRIDGE 5menuconfig NF_TABLES_BRIDGE
6 depends on BRIDGE && NETFILTER && NF_TABLES 6 depends on BRIDGE && NETFILTER && NF_TABLES
7 select NETFILTER_FAMILY_BRIDGE
7 tristate "Ethernet Bridge nf_tables support" 8 tristate "Ethernet Bridge nf_tables support"
8 9
9if NF_TABLES_BRIDGE 10if NF_TABLES_BRIDGE
@@ -29,6 +30,7 @@ endif # NF_TABLES_BRIDGE
29menuconfig BRIDGE_NF_EBTABLES 30menuconfig BRIDGE_NF_EBTABLES
30 tristate "Ethernet Bridge tables (ebtables) support" 31 tristate "Ethernet Bridge tables (ebtables) support"
31 depends on BRIDGE && NETFILTER && NETFILTER_XTABLES 32 depends on BRIDGE && NETFILTER && NETFILTER_XTABLES
33 select NETFILTER_FAMILY_BRIDGE
32 help 34 help
33 ebtables is a general, extensible frame/packet identification 35 ebtables is a general, extensible frame/packet identification
34 framework. Say 'Y' or 'M' here if you want to do Ethernet 36 framework. Say 'Y' or 'M' here if you want to do Ethernet
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 37817d25b63d..02c4b409d317 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2445,7 +2445,6 @@ static int __init ebtables_init(void)
2445 return ret; 2445 return ret;
2446 } 2446 }
2447 2447
2448 printk(KERN_INFO "Ebtables v2.0 registered\n");
2449 return 0; 2448 return 0;
2450} 2449}
2451 2450
@@ -2453,7 +2452,6 @@ static void __exit ebtables_fini(void)
2453{ 2452{
2454 nf_unregister_sockopt(&ebt_sockopts); 2453 nf_unregister_sockopt(&ebt_sockopts);
2455 xt_unregister_target(&ebt_standard_target); 2454 xt_unregister_target(&ebt_standard_target);
2456 printk(KERN_INFO "Ebtables v2.0 unregistered\n");
2457} 2455}
2458 2456
2459EXPORT_SYMBOL(ebt_register_table); 2457EXPORT_SYMBOL(ebt_register_table);
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
index 97afdc0744e6..5160cf614176 100644
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -25,63 +25,23 @@ nft_do_chain_bridge(void *priv,
25{ 25{
26 struct nft_pktinfo pkt; 26 struct nft_pktinfo pkt;
27 27
28 nft_set_pktinfo(&pkt, skb, state);
29
28 switch (eth_hdr(skb)->h_proto) { 30 switch (eth_hdr(skb)->h_proto) {
29 case htons(ETH_P_IP): 31 case htons(ETH_P_IP):
30 nft_set_pktinfo_ipv4_validate(&pkt, skb, state); 32 nft_set_pktinfo_ipv4_validate(&pkt, skb);
31 break; 33 break;
32 case htons(ETH_P_IPV6): 34 case htons(ETH_P_IPV6):
33 nft_set_pktinfo_ipv6_validate(&pkt, skb, state); 35 nft_set_pktinfo_ipv6_validate(&pkt, skb);
34 break; 36 break;
35 default: 37 default:
36 nft_set_pktinfo_unspec(&pkt, skb, state); 38 nft_set_pktinfo_unspec(&pkt, skb);
37 break; 39 break;
38 } 40 }
39 41
40 return nft_do_chain(&pkt, priv); 42 return nft_do_chain(&pkt, priv);
41} 43}
42 44
43static struct nft_af_info nft_af_bridge __read_mostly = {
44 .family = NFPROTO_BRIDGE,
45 .nhooks = NF_BR_NUMHOOKS,
46 .owner = THIS_MODULE,
47 .nops = 1,
48 .hooks = {
49 [NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
50 [NF_BR_LOCAL_IN] = nft_do_chain_bridge,
51 [NF_BR_FORWARD] = nft_do_chain_bridge,
52 [NF_BR_LOCAL_OUT] = nft_do_chain_bridge,
53 [NF_BR_POST_ROUTING] = nft_do_chain_bridge,
54 },
55};
56
57static int nf_tables_bridge_init_net(struct net *net)
58{
59 net->nft.bridge = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
60 if (net->nft.bridge == NULL)
61 return -ENOMEM;
62
63 memcpy(net->nft.bridge, &nft_af_bridge, sizeof(nft_af_bridge));
64
65 if (nft_register_afinfo(net, net->nft.bridge) < 0)
66 goto err;
67
68 return 0;
69err:
70 kfree(net->nft.bridge);
71 return -ENOMEM;
72}
73
74static void nf_tables_bridge_exit_net(struct net *net)
75{
76 nft_unregister_afinfo(net, net->nft.bridge);
77 kfree(net->nft.bridge);
78}
79
80static struct pernet_operations nf_tables_bridge_net_ops = {
81 .init = nf_tables_bridge_init_net,
82 .exit = nf_tables_bridge_exit_net,
83};
84
85static const struct nf_chain_type filter_bridge = { 45static const struct nf_chain_type filter_bridge = {
86 .name = "filter", 46 .name = "filter",
87 .type = NFT_CHAIN_T_DEFAULT, 47 .type = NFT_CHAIN_T_DEFAULT,
@@ -92,75 +52,23 @@ static const struct nf_chain_type filter_bridge = {
92 (1 << NF_BR_FORWARD) | 52 (1 << NF_BR_FORWARD) |
93 (1 << NF_BR_LOCAL_OUT) | 53 (1 << NF_BR_LOCAL_OUT) |
94 (1 << NF_BR_POST_ROUTING), 54 (1 << NF_BR_POST_ROUTING),
95}; 55 .hooks = {
96 56 [NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
97static void nf_br_saveroute(const struct sk_buff *skb, 57 [NF_BR_LOCAL_IN] = nft_do_chain_bridge,
98 struct nf_queue_entry *entry) 58 [NF_BR_FORWARD] = nft_do_chain_bridge,
99{ 59 [NF_BR_LOCAL_OUT] = nft_do_chain_bridge,
100} 60 [NF_BR_POST_ROUTING] = nft_do_chain_bridge,
101 61 },
102static int nf_br_reroute(struct net *net, struct sk_buff *skb,
103 const struct nf_queue_entry *entry)
104{
105 return 0;
106}
107
108static __sum16 nf_br_checksum(struct sk_buff *skb, unsigned int hook,
109 unsigned int dataoff, u_int8_t protocol)
110{
111 return 0;
112}
113
114static __sum16 nf_br_checksum_partial(struct sk_buff *skb, unsigned int hook,
115 unsigned int dataoff, unsigned int len,
116 u_int8_t protocol)
117{
118 return 0;
119}
120
121static int nf_br_route(struct net *net, struct dst_entry **dst,
122 struct flowi *fl, bool strict __always_unused)
123{
124 return 0;
125}
126
127static const struct nf_afinfo nf_br_afinfo = {
128 .family = AF_BRIDGE,
129 .checksum = nf_br_checksum,
130 .checksum_partial = nf_br_checksum_partial,
131 .route = nf_br_route,
132 .saveroute = nf_br_saveroute,
133 .reroute = nf_br_reroute,
134 .route_key_size = 0,
135}; 62};
136 63
137static int __init nf_tables_bridge_init(void) 64static int __init nf_tables_bridge_init(void)
138{ 65{
139 int ret; 66 return nft_register_chain_type(&filter_bridge);
140
141 nf_register_afinfo(&nf_br_afinfo);
142 ret = nft_register_chain_type(&filter_bridge);
143 if (ret < 0)
144 goto err1;
145
146 ret = register_pernet_subsys(&nf_tables_bridge_net_ops);
147 if (ret < 0)
148 goto err2;
149
150 return ret;
151
152err2:
153 nft_unregister_chain_type(&filter_bridge);
154err1:
155 nf_unregister_afinfo(&nf_br_afinfo);
156 return ret;
157} 67}
158 68
159static void __exit nf_tables_bridge_exit(void) 69static void __exit nf_tables_bridge_exit(void)
160{ 70{
161 unregister_pernet_subsys(&nf_tables_bridge_net_ops);
162 nft_unregister_chain_type(&filter_bridge); 71 nft_unregister_chain_type(&filter_bridge);
163 nf_unregister_afinfo(&nf_br_afinfo);
164} 72}
165 73
166module_init(nf_tables_bridge_init); 74module_init(nf_tables_bridge_init);
@@ -168,4 +76,4 @@ module_exit(nf_tables_bridge_exit);
168 76
169MODULE_LICENSE("GPL"); 77MODULE_LICENSE("GPL");
170MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 78MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
171MODULE_ALIAS_NFT_FAMILY(AF_BRIDGE); 79MODULE_ALIAS_NFT_CHAIN(AF_BRIDGE, "filter");
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 632d5a416d97..a6fb1b3bcad9 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -924,7 +924,7 @@ static int caif_release(struct socket *sock)
924 924
925 caif_disconnect_client(sock_net(sk), &cf_sk->layer); 925 caif_disconnect_client(sock_net(sk), &cf_sk->layer);
926 cf_sk->sk.sk_socket->state = SS_DISCONNECTING; 926 cf_sk->sk.sk_socket->state = SS_DISCONNECTING;
927 wake_up_interruptible_poll(sk_sleep(sk), POLLERR|POLLHUP); 927 wake_up_interruptible_poll(sk_sleep(sk), EPOLLERR|EPOLLHUP);
928 928
929 sock_orphan(sk); 929 sock_orphan(sk);
930 sk_stream_kill_queues(&cf_sk->sk); 930 sk_stream_kill_queues(&cf_sk->sk);
@@ -934,11 +934,11 @@ static int caif_release(struct socket *sock)
934} 934}
935 935
936/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */ 936/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
937static unsigned int caif_poll(struct file *file, 937static __poll_t caif_poll(struct file *file,
938 struct socket *sock, poll_table *wait) 938 struct socket *sock, poll_table *wait)
939{ 939{
940 struct sock *sk = sock->sk; 940 struct sock *sk = sock->sk;
941 unsigned int mask; 941 __poll_t mask;
942 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); 942 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
943 943
944 sock_poll_wait(file, sk_sleep(sk), wait); 944 sock_poll_wait(file, sk_sleep(sk), wait);
@@ -946,23 +946,23 @@ static unsigned int caif_poll(struct file *file,
946 946
947 /* exceptional events? */ 947 /* exceptional events? */
948 if (sk->sk_err) 948 if (sk->sk_err)
949 mask |= POLLERR; 949 mask |= EPOLLERR;
950 if (sk->sk_shutdown == SHUTDOWN_MASK) 950 if (sk->sk_shutdown == SHUTDOWN_MASK)
951 mask |= POLLHUP; 951 mask |= EPOLLHUP;
952 if (sk->sk_shutdown & RCV_SHUTDOWN) 952 if (sk->sk_shutdown & RCV_SHUTDOWN)
953 mask |= POLLRDHUP; 953 mask |= EPOLLRDHUP;
954 954
955 /* readable? */ 955 /* readable? */
956 if (!skb_queue_empty(&sk->sk_receive_queue) || 956 if (!skb_queue_empty(&sk->sk_receive_queue) ||
957 (sk->sk_shutdown & RCV_SHUTDOWN)) 957 (sk->sk_shutdown & RCV_SHUTDOWN))
958 mask |= POLLIN | POLLRDNORM; 958 mask |= EPOLLIN | EPOLLRDNORM;
959 959
960 /* 960 /*
961 * we set writable also when the other side has shut down the 961 * we set writable also when the other side has shut down the
962 * connection. This prevents stuck sockets. 962 * connection. This prevents stuck sockets.
963 */ 963 */
964 if (sock_writeable(sk) && tx_flow_is_on(cf_sk)) 964 if (sock_writeable(sk) && tx_flow_is_on(cf_sk))
965 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 965 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
966 966
967 return mask; 967 return mask;
968} 968}
@@ -1032,6 +1032,8 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
1032 static struct proto prot = {.name = "PF_CAIF", 1032 static struct proto prot = {.name = "PF_CAIF",
1033 .owner = THIS_MODULE, 1033 .owner = THIS_MODULE,
1034 .obj_size = sizeof(struct caifsock), 1034 .obj_size = sizeof(struct caifsock),
1035 .useroffset = offsetof(struct caifsock, conn_req.param),
1036 .usersize = sizeof_field(struct caifsock, conn_req.param)
1035 }; 1037 };
1036 1038
1037 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN)) 1039 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN))
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 655ed7032150..a1e85f032108 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -352,15 +352,14 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
352 u8 cmdrsp; 352 u8 cmdrsp;
353 u8 cmd; 353 u8 cmd;
354 int ret = -1; 354 int ret = -1;
355 u16 tmp16;
356 u8 len; 355 u8 len;
357 u8 param[255]; 356 u8 param[255];
358 u8 linkid; 357 u8 linkid = 0;
359 struct cfctrl *cfctrl = container_obj(layer); 358 struct cfctrl *cfctrl = container_obj(layer);
360 struct cfctrl_request_info rsp, *req; 359 struct cfctrl_request_info rsp, *req;
361 360
362 361
363 cfpkt_extr_head(pkt, &cmdrsp, 1); 362 cmdrsp = cfpkt_extr_head_u8(pkt);
364 cmd = cmdrsp & CFCTRL_CMD_MASK; 363 cmd = cmdrsp & CFCTRL_CMD_MASK;
365 if (cmd != CFCTRL_CMD_LINK_ERR 364 if (cmd != CFCTRL_CMD_LINK_ERR
366 && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp) 365 && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp)
@@ -378,13 +377,12 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
378 u8 physlinkid; 377 u8 physlinkid;
379 u8 prio; 378 u8 prio;
380 u8 tmp; 379 u8 tmp;
381 u32 tmp32;
382 u8 *cp; 380 u8 *cp;
383 int i; 381 int i;
384 struct cfctrl_link_param linkparam; 382 struct cfctrl_link_param linkparam;
385 memset(&linkparam, 0, sizeof(linkparam)); 383 memset(&linkparam, 0, sizeof(linkparam));
386 384
387 cfpkt_extr_head(pkt, &tmp, 1); 385 tmp = cfpkt_extr_head_u8(pkt);
388 386
389 serv = tmp & CFCTRL_SRV_MASK; 387 serv = tmp & CFCTRL_SRV_MASK;
390 linkparam.linktype = serv; 388 linkparam.linktype = serv;
@@ -392,13 +390,13 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
392 servtype = tmp >> 4; 390 servtype = tmp >> 4;
393 linkparam.chtype = servtype; 391 linkparam.chtype = servtype;
394 392
395 cfpkt_extr_head(pkt, &tmp, 1); 393 tmp = cfpkt_extr_head_u8(pkt);
396 physlinkid = tmp & 0x07; 394 physlinkid = tmp & 0x07;
397 prio = tmp >> 3; 395 prio = tmp >> 3;
398 396
399 linkparam.priority = prio; 397 linkparam.priority = prio;
400 linkparam.phyid = physlinkid; 398 linkparam.phyid = physlinkid;
401 cfpkt_extr_head(pkt, &endpoint, 1); 399 endpoint = cfpkt_extr_head_u8(pkt);
402 linkparam.endpoint = endpoint & 0x03; 400 linkparam.endpoint = endpoint & 0x03;
403 401
404 switch (serv) { 402 switch (serv) {
@@ -407,45 +405,43 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
407 if (CFCTRL_ERR_BIT & cmdrsp) 405 if (CFCTRL_ERR_BIT & cmdrsp)
408 break; 406 break;
409 /* Link ID */ 407 /* Link ID */
410 cfpkt_extr_head(pkt, &linkid, 1); 408 linkid = cfpkt_extr_head_u8(pkt);
411 break; 409 break;
412 case CFCTRL_SRV_VIDEO: 410 case CFCTRL_SRV_VIDEO:
413 cfpkt_extr_head(pkt, &tmp, 1); 411 tmp = cfpkt_extr_head_u8(pkt);
414 linkparam.u.video.connid = tmp; 412 linkparam.u.video.connid = tmp;
415 if (CFCTRL_ERR_BIT & cmdrsp) 413 if (CFCTRL_ERR_BIT & cmdrsp)
416 break; 414 break;
417 /* Link ID */ 415 /* Link ID */
418 cfpkt_extr_head(pkt, &linkid, 1); 416 linkid = cfpkt_extr_head_u8(pkt);
419 break; 417 break;
420 418
421 case CFCTRL_SRV_DATAGRAM: 419 case CFCTRL_SRV_DATAGRAM:
422 cfpkt_extr_head(pkt, &tmp32, 4);
423 linkparam.u.datagram.connid = 420 linkparam.u.datagram.connid =
424 le32_to_cpu(tmp32); 421 cfpkt_extr_head_u32(pkt);
425 if (CFCTRL_ERR_BIT & cmdrsp) 422 if (CFCTRL_ERR_BIT & cmdrsp)
426 break; 423 break;
427 /* Link ID */ 424 /* Link ID */
428 cfpkt_extr_head(pkt, &linkid, 1); 425 linkid = cfpkt_extr_head_u8(pkt);
429 break; 426 break;
430 case CFCTRL_SRV_RFM: 427 case CFCTRL_SRV_RFM:
431 /* Construct a frame, convert 428 /* Construct a frame, convert
432 * DatagramConnectionID 429 * DatagramConnectionID
433 * to network format long and copy it out... 430 * to network format long and copy it out...
434 */ 431 */
435 cfpkt_extr_head(pkt, &tmp32, 4);
436 linkparam.u.rfm.connid = 432 linkparam.u.rfm.connid =
437 le32_to_cpu(tmp32); 433 cfpkt_extr_head_u32(pkt);
438 cp = (u8 *) linkparam.u.rfm.volume; 434 cp = (u8 *) linkparam.u.rfm.volume;
439 for (cfpkt_extr_head(pkt, &tmp, 1); 435 for (tmp = cfpkt_extr_head_u8(pkt);
440 cfpkt_more(pkt) && tmp != '\0'; 436 cfpkt_more(pkt) && tmp != '\0';
441 cfpkt_extr_head(pkt, &tmp, 1)) 437 tmp = cfpkt_extr_head_u8(pkt))
442 *cp++ = tmp; 438 *cp++ = tmp;
443 *cp = '\0'; 439 *cp = '\0';
444 440
445 if (CFCTRL_ERR_BIT & cmdrsp) 441 if (CFCTRL_ERR_BIT & cmdrsp)
446 break; 442 break;
447 /* Link ID */ 443 /* Link ID */
448 cfpkt_extr_head(pkt, &linkid, 1); 444 linkid = cfpkt_extr_head_u8(pkt);
449 445
450 break; 446 break;
451 case CFCTRL_SRV_UTIL: 447 case CFCTRL_SRV_UTIL:
@@ -454,13 +450,11 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
454 * to network format long and copy it out... 450 * to network format long and copy it out...
455 */ 451 */
456 /* Fifosize KB */ 452 /* Fifosize KB */
457 cfpkt_extr_head(pkt, &tmp16, 2);
458 linkparam.u.utility.fifosize_kb = 453 linkparam.u.utility.fifosize_kb =
459 le16_to_cpu(tmp16); 454 cfpkt_extr_head_u16(pkt);
460 /* Fifosize bufs */ 455 /* Fifosize bufs */
461 cfpkt_extr_head(pkt, &tmp16, 2);
462 linkparam.u.utility.fifosize_bufs = 456 linkparam.u.utility.fifosize_bufs =
463 le16_to_cpu(tmp16); 457 cfpkt_extr_head_u16(pkt);
464 /* name */ 458 /* name */
465 cp = (u8 *) linkparam.u.utility.name; 459 cp = (u8 *) linkparam.u.utility.name;
466 caif_assert(sizeof(linkparam.u.utility.name) 460 caif_assert(sizeof(linkparam.u.utility.name)
@@ -468,24 +462,24 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
468 for (i = 0; 462 for (i = 0;
469 i < UTILITY_NAME_LENGTH 463 i < UTILITY_NAME_LENGTH
470 && cfpkt_more(pkt); i++) { 464 && cfpkt_more(pkt); i++) {
471 cfpkt_extr_head(pkt, &tmp, 1); 465 tmp = cfpkt_extr_head_u8(pkt);
472 *cp++ = tmp; 466 *cp++ = tmp;
473 } 467 }
474 /* Length */ 468 /* Length */
475 cfpkt_extr_head(pkt, &len, 1); 469 len = cfpkt_extr_head_u8(pkt);
476 linkparam.u.utility.paramlen = len; 470 linkparam.u.utility.paramlen = len;
477 /* Param Data */ 471 /* Param Data */
478 cp = linkparam.u.utility.params; 472 cp = linkparam.u.utility.params;
479 while (cfpkt_more(pkt) && len--) { 473 while (cfpkt_more(pkt) && len--) {
480 cfpkt_extr_head(pkt, &tmp, 1); 474 tmp = cfpkt_extr_head_u8(pkt);
481 *cp++ = tmp; 475 *cp++ = tmp;
482 } 476 }
483 if (CFCTRL_ERR_BIT & cmdrsp) 477 if (CFCTRL_ERR_BIT & cmdrsp)
484 break; 478 break;
485 /* Link ID */ 479 /* Link ID */
486 cfpkt_extr_head(pkt, &linkid, 1); 480 linkid = cfpkt_extr_head_u8(pkt);
487 /* Length */ 481 /* Length */
488 cfpkt_extr_head(pkt, &len, 1); 482 len = cfpkt_extr_head_u8(pkt);
489 /* Param Data */ 483 /* Param Data */
490 cfpkt_extr_head(pkt, &param, len); 484 cfpkt_extr_head(pkt, &param, len);
491 break; 485 break;
@@ -522,7 +516,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
522 } 516 }
523 break; 517 break;
524 case CFCTRL_CMD_LINK_DESTROY: 518 case CFCTRL_CMD_LINK_DESTROY:
525 cfpkt_extr_head(pkt, &linkid, 1); 519 linkid = cfpkt_extr_head_u8(pkt);
526 cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid); 520 cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid);
527 break; 521 break;
528 case CFCTRL_CMD_LINK_ERR: 522 case CFCTRL_CMD_LINK_ERR:
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 71b6ab240dea..38c2b7a890dd 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -8,7 +8,6 @@
8 8
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/skbuff.h> 10#include <linux/skbuff.h>
11#include <linux/hardirq.h>
12#include <linux/export.h> 11#include <linux/export.h>
13#include <net/caif/cfpkt.h> 12#include <net/caif/cfpkt.h>
14 13
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 922ac1d605b3..53ecda10b790 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -8,7 +8,6 @@
8#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ 8#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
9 9
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/hardirq.h>
12#include <linux/init.h> 11#include <linux/init.h>
13#include <linux/module.h> 12#include <linux/module.h>
14#include <linux/netdevice.h> 13#include <linux/netdevice.h>
diff --git a/net/can/Kconfig b/net/can/Kconfig
index a15c0e0d1fc7..a4399be54ff4 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -11,7 +11,7 @@ menuconfig CAN
11 1991, mainly for automotive, but now widely used in marine 11 1991, mainly for automotive, but now widely used in marine
12 (NMEA2000), industrial, and medical applications. 12 (NMEA2000), industrial, and medical applications.
13 More information on the CAN network protocol family PF_CAN 13 More information on the CAN network protocol family PF_CAN
14 is contained in <Documentation/networking/can.txt>. 14 is contained in <Documentation/networking/can.rst>.
15 15
16 If you want CAN support you should say Y here and also to the 16 If you want CAN support you should say Y here and also to the
17 specific driver for your controller(s) below. 17 specific driver for your controller(s) below.
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 4d7f988a3130..6da324550eec 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -321,13 +321,13 @@ EXPORT_SYMBOL(can_send);
321 * af_can rx path 321 * af_can rx path
322 */ 322 */
323 323
324static struct dev_rcv_lists *find_dev_rcv_lists(struct net *net, 324static struct can_dev_rcv_lists *find_dev_rcv_lists(struct net *net,
325 struct net_device *dev) 325 struct net_device *dev)
326{ 326{
327 if (!dev) 327 if (!dev)
328 return net->can.can_rx_alldev_list; 328 return net->can.can_rx_alldev_list;
329 else 329 else
330 return (struct dev_rcv_lists *)dev->ml_priv; 330 return (struct can_dev_rcv_lists *)dev->ml_priv;
331} 331}
332 332
333/** 333/**
@@ -381,7 +381,7 @@ static unsigned int effhash(canid_t can_id)
381 * Reduced can_id to have a preprocessed filter compare value. 381 * Reduced can_id to have a preprocessed filter compare value.
382 */ 382 */
383static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, 383static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
384 struct dev_rcv_lists *d) 384 struct can_dev_rcv_lists *d)
385{ 385{
386 canid_t inv = *can_id & CAN_INV_FILTER; /* save flag before masking */ 386 canid_t inv = *can_id & CAN_INV_FILTER; /* save flag before masking */
387 387
@@ -464,7 +464,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
464{ 464{
465 struct receiver *r; 465 struct receiver *r;
466 struct hlist_head *rl; 466 struct hlist_head *rl;
467 struct dev_rcv_lists *d; 467 struct can_dev_rcv_lists *d;
468 struct s_pstats *can_pstats = net->can.can_pstats; 468 struct s_pstats *can_pstats = net->can.can_pstats;
469 int err = 0; 469 int err = 0;
470 470
@@ -542,7 +542,7 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id,
542 struct receiver *r = NULL; 542 struct receiver *r = NULL;
543 struct hlist_head *rl; 543 struct hlist_head *rl;
544 struct s_pstats *can_pstats = net->can.can_pstats; 544 struct s_pstats *can_pstats = net->can.can_pstats;
545 struct dev_rcv_lists *d; 545 struct can_dev_rcv_lists *d;
546 546
547 if (dev && dev->type != ARPHRD_CAN) 547 if (dev && dev->type != ARPHRD_CAN)
548 return; 548 return;
@@ -615,7 +615,7 @@ static inline void deliver(struct sk_buff *skb, struct receiver *r)
615 r->matches++; 615 r->matches++;
616} 616}
617 617
618static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) 618static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb)
619{ 619{
620 struct receiver *r; 620 struct receiver *r;
621 int matches = 0; 621 int matches = 0;
@@ -682,7 +682,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
682 682
683static void can_receive(struct sk_buff *skb, struct net_device *dev) 683static void can_receive(struct sk_buff *skb, struct net_device *dev)
684{ 684{
685 struct dev_rcv_lists *d; 685 struct can_dev_rcv_lists *d;
686 struct net *net = dev_net(dev); 686 struct net *net = dev_net(dev);
687 struct s_stats *can_stats = net->can.can_stats; 687 struct s_stats *can_stats = net->can.can_stats;
688 int matches; 688 int matches;
@@ -821,7 +821,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
821 void *ptr) 821 void *ptr)
822{ 822{
823 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 823 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
824 struct dev_rcv_lists *d; 824 struct can_dev_rcv_lists *d;
825 825
826 if (dev->type != ARPHRD_CAN) 826 if (dev->type != ARPHRD_CAN)
827 return NOTIFY_DONE; 827 return NOTIFY_DONE;
@@ -866,7 +866,7 @@ static int can_pernet_init(struct net *net)
866{ 866{
867 spin_lock_init(&net->can.can_rcvlists_lock); 867 spin_lock_init(&net->can.can_rcvlists_lock);
868 net->can.can_rx_alldev_list = 868 net->can.can_rx_alldev_list =
869 kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL); 869 kzalloc(sizeof(struct can_dev_rcv_lists), GFP_KERNEL);
870 if (!net->can.can_rx_alldev_list) 870 if (!net->can.can_rx_alldev_list)
871 goto out; 871 goto out;
872 net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL); 872 net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL);
@@ -912,7 +912,7 @@ static void can_pernet_exit(struct net *net)
912 rcu_read_lock(); 912 rcu_read_lock();
913 for_each_netdev_rcu(net, dev) { 913 for_each_netdev_rcu(net, dev) {
914 if (dev->type == ARPHRD_CAN && dev->ml_priv) { 914 if (dev->type == ARPHRD_CAN && dev->ml_priv) {
915 struct dev_rcv_lists *d = dev->ml_priv; 915 struct can_dev_rcv_lists *d = dev->ml_priv;
916 916
917 BUG_ON(d->entries); 917 BUG_ON(d->entries);
918 kfree(d); 918 kfree(d);
diff --git a/net/can/af_can.h b/net/can/af_can.h
index eca6463c6213..9cb3719632bd 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -67,7 +67,7 @@ struct receiver {
67enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX }; 67enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX };
68 68
69/* per device receive filters linked at dev->ml_priv */ 69/* per device receive filters linked at dev->ml_priv */
70struct dev_rcv_lists { 70struct can_dev_rcv_lists {
71 struct hlist_head rx[RX_MAX]; 71 struct hlist_head rx[RX_MAX];
72 struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ]; 72 struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ];
73 struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ]; 73 struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ];
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 13690334efa3..ac5e5e34fee3 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -246,7 +246,6 @@ static int bcm_proc_open(struct inode *inode, struct file *file)
246} 246}
247 247
248static const struct file_operations bcm_proc_fops = { 248static const struct file_operations bcm_proc_fops = {
249 .owner = THIS_MODULE,
250 .open = bcm_proc_open, 249 .open = bcm_proc_open,
251 .read = seq_read, 250 .read = seq_read,
252 .llseek = seq_lseek, 251 .llseek = seq_lseek,
diff --git a/net/can/gw.c b/net/can/gw.c
index 73a02af4b5d7..398dd0395ad9 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1014,6 +1014,8 @@ static struct pernet_operations cangw_pernet_ops = {
1014 1014
1015static __init int cgw_module_init(void) 1015static __init int cgw_module_init(void)
1016{ 1016{
1017 int ret;
1018
1017 /* sanitize given module parameter */ 1019 /* sanitize given module parameter */
1018 max_hops = clamp_t(unsigned int, max_hops, CGW_MIN_HOPS, CGW_MAX_HOPS); 1020 max_hops = clamp_t(unsigned int, max_hops, CGW_MIN_HOPS, CGW_MAX_HOPS);
1019 1021
@@ -1031,15 +1033,19 @@ static __init int cgw_module_init(void)
1031 notifier.notifier_call = cgw_notifier; 1033 notifier.notifier_call = cgw_notifier;
1032 register_netdevice_notifier(&notifier); 1034 register_netdevice_notifier(&notifier);
1033 1035
1034 if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, 0)) { 1036 ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_GETROUTE,
1037 NULL, cgw_dump_jobs, 0);
1038 if (ret) {
1035 unregister_netdevice_notifier(&notifier); 1039 unregister_netdevice_notifier(&notifier);
1036 kmem_cache_destroy(cgw_cache); 1040 kmem_cache_destroy(cgw_cache);
1037 return -ENOBUFS; 1041 return -ENOBUFS;
1038 } 1042 }
1039 1043
1040 /* Only the first call to __rtnl_register can fail */ 1044 /* Only the first call to rtnl_register_module can fail */
1041 __rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, 0); 1045 rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE,
1042 __rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, 0); 1046 cgw_create_job, NULL, 0);
1047 rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE,
1048 cgw_remove_job, NULL, 0);
1043 1049
1044 return 0; 1050 return 0;
1045} 1051}
diff --git a/net/can/proc.c b/net/can/proc.c
index 0c59f876fe6f..fdf704e9bb8c 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -276,7 +276,6 @@ static int can_stats_proc_open(struct inode *inode, struct file *file)
276} 276}
277 277
278static const struct file_operations can_stats_proc_fops = { 278static const struct file_operations can_stats_proc_fops = {
279 .owner = THIS_MODULE,
280 .open = can_stats_proc_open, 279 .open = can_stats_proc_open,
281 .read = seq_read, 280 .read = seq_read,
282 .llseek = seq_lseek, 281 .llseek = seq_lseek,
@@ -310,7 +309,6 @@ static int can_reset_stats_proc_open(struct inode *inode, struct file *file)
310} 309}
311 310
312static const struct file_operations can_reset_stats_proc_fops = { 311static const struct file_operations can_reset_stats_proc_fops = {
313 .owner = THIS_MODULE,
314 .open = can_reset_stats_proc_open, 312 .open = can_reset_stats_proc_open,
315 .read = seq_read, 313 .read = seq_read,
316 .llseek = seq_lseek, 314 .llseek = seq_lseek,
@@ -329,7 +327,6 @@ static int can_version_proc_open(struct inode *inode, struct file *file)
329} 327}
330 328
331static const struct file_operations can_version_proc_fops = { 329static const struct file_operations can_version_proc_fops = {
332 .owner = THIS_MODULE,
333 .open = can_version_proc_open, 330 .open = can_version_proc_open,
334 .read = seq_read, 331 .read = seq_read,
335 .llseek = seq_lseek, 332 .llseek = seq_lseek,
@@ -338,7 +335,7 @@ static const struct file_operations can_version_proc_fops = {
338 335
339static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx, 336static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx,
340 struct net_device *dev, 337 struct net_device *dev,
341 struct dev_rcv_lists *d) 338 struct can_dev_rcv_lists *d)
342{ 339{
343 if (!hlist_empty(&d->rx[idx])) { 340 if (!hlist_empty(&d->rx[idx])) {
344 can_print_recv_banner(m); 341 can_print_recv_banner(m);
@@ -353,7 +350,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v)
353 /* double cast to prevent GCC warning */ 350 /* double cast to prevent GCC warning */
354 int idx = (int)(long)PDE_DATA(m->file->f_inode); 351 int idx = (int)(long)PDE_DATA(m->file->f_inode);
355 struct net_device *dev; 352 struct net_device *dev;
356 struct dev_rcv_lists *d; 353 struct can_dev_rcv_lists *d;
357 struct net *net = m->private; 354 struct net *net = m->private;
358 355
359 seq_printf(m, "\nreceive list '%s':\n", rx_list_name[idx]); 356 seq_printf(m, "\nreceive list '%s':\n", rx_list_name[idx]);
@@ -382,7 +379,6 @@ static int can_rcvlist_proc_open(struct inode *inode, struct file *file)
382} 379}
383 380
384static const struct file_operations can_rcvlist_proc_fops = { 381static const struct file_operations can_rcvlist_proc_fops = {
385 .owner = THIS_MODULE,
386 .open = can_rcvlist_proc_open, 382 .open = can_rcvlist_proc_open,
387 .read = seq_read, 383 .read = seq_read,
388 .llseek = seq_lseek, 384 .llseek = seq_lseek,
@@ -417,7 +413,7 @@ static inline void can_rcvlist_proc_show_array(struct seq_file *m,
417static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) 413static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
418{ 414{
419 struct net_device *dev; 415 struct net_device *dev;
420 struct dev_rcv_lists *d; 416 struct can_dev_rcv_lists *d;
421 struct net *net = m->private; 417 struct net *net = m->private;
422 418
423 /* RX_SFF */ 419 /* RX_SFF */
@@ -450,7 +446,6 @@ static int can_rcvlist_sff_proc_open(struct inode *inode, struct file *file)
450} 446}
451 447
452static const struct file_operations can_rcvlist_sff_proc_fops = { 448static const struct file_operations can_rcvlist_sff_proc_fops = {
453 .owner = THIS_MODULE,
454 .open = can_rcvlist_sff_proc_open, 449 .open = can_rcvlist_sff_proc_open,
455 .read = seq_read, 450 .read = seq_read,
456 .llseek = seq_lseek, 451 .llseek = seq_lseek,
@@ -461,7 +456,7 @@ static const struct file_operations can_rcvlist_sff_proc_fops = {
461static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) 456static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
462{ 457{
463 struct net_device *dev; 458 struct net_device *dev;
464 struct dev_rcv_lists *d; 459 struct can_dev_rcv_lists *d;
465 struct net *net = m->private; 460 struct net *net = m->private;
466 461
467 /* RX_EFF */ 462 /* RX_EFF */
@@ -494,7 +489,6 @@ static int can_rcvlist_eff_proc_open(struct inode *inode, struct file *file)
494} 489}
495 490
496static const struct file_operations can_rcvlist_eff_proc_fops = { 491static const struct file_operations can_rcvlist_eff_proc_fops = {
497 .owner = THIS_MODULE,
498 .open = can_rcvlist_eff_proc_open, 492 .open = can_rcvlist_eff_proc_open,
499 .read = seq_read, 493 .read = seq_read,
500 .llseek = seq_lseek, 494 .llseek = seq_lseek,
diff --git a/net/can/raw.c b/net/can/raw.c
index 864c80dbdb72..f2ecc43376a1 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -401,6 +401,8 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
401 401
402 if (len < sizeof(*addr)) 402 if (len < sizeof(*addr))
403 return -EINVAL; 403 return -EINVAL;
404 if (addr->can_family != AF_CAN)
405 return -EINVAL;
404 406
405 lock_sock(sk); 407 lock_sock(sk);
406 408
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 5c036d2f401e..1e492ef2a33d 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -421,6 +421,10 @@ ceph_parse_options(char *options, const char *dev_name,
421 opt->name = kstrndup(argstr[0].from, 421 opt->name = kstrndup(argstr[0].from,
422 argstr[0].to-argstr[0].from, 422 argstr[0].to-argstr[0].from,
423 GFP_KERNEL); 423 GFP_KERNEL);
424 if (!opt->name) {
425 err = -ENOMEM;
426 goto out;
427 }
424 break; 428 break;
425 case Opt_secret: 429 case Opt_secret:
426 opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL); 430 opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL);
diff --git a/net/core/Makefile b/net/core/Makefile
index 1fd0a9c88b1b..6dbbba8c57ae 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
11obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ 11obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
12 neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ 12 neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
13 sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ 13 sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
14 fib_notifier.o 14 fib_notifier.o xdp.o
15 15
16obj-y += net-sysfs.o 16obj-y += net-sysfs.o
17obj-$(CONFIG_PROC_FS) += net-procfs.o 17obj-$(CONFIG_PROC_FS) += net-procfs.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 522873ed120b..9938952c5c78 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -72,12 +72,10 @@ static inline int connection_based(struct sock *sk)
72static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync, 72static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
73 void *key) 73 void *key)
74{ 74{
75 unsigned long bits = (unsigned long)key;
76
77 /* 75 /*
78 * Avoid a wakeup if event not interesting for us 76 * Avoid a wakeup if event not interesting for us
79 */ 77 */
80 if (bits && !(bits & (POLLIN | POLLERR))) 78 if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
81 return 0; 79 return 0;
82 return autoremove_wake_function(wait, mode, sync, key); 80 return autoremove_wake_function(wait, mode, sync, key);
83} 81}
@@ -833,33 +831,33 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
833 * and you use a different write policy from sock_writeable() 831 * and you use a different write policy from sock_writeable()
834 * then please supply your own write_space callback. 832 * then please supply your own write_space callback.
835 */ 833 */
836unsigned int datagram_poll(struct file *file, struct socket *sock, 834__poll_t datagram_poll(struct file *file, struct socket *sock,
837 poll_table *wait) 835 poll_table *wait)
838{ 836{
839 struct sock *sk = sock->sk; 837 struct sock *sk = sock->sk;
840 unsigned int mask; 838 __poll_t mask;
841 839
842 sock_poll_wait(file, sk_sleep(sk), wait); 840 sock_poll_wait(file, sk_sleep(sk), wait);
843 mask = 0; 841 mask = 0;
844 842
845 /* exceptional events? */ 843 /* exceptional events? */
846 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 844 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
847 mask |= POLLERR | 845 mask |= EPOLLERR |
848 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); 846 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
849 847
850 if (sk->sk_shutdown & RCV_SHUTDOWN) 848 if (sk->sk_shutdown & RCV_SHUTDOWN)
851 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 849 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
852 if (sk->sk_shutdown == SHUTDOWN_MASK) 850 if (sk->sk_shutdown == SHUTDOWN_MASK)
853 mask |= POLLHUP; 851 mask |= EPOLLHUP;
854 852
855 /* readable? */ 853 /* readable? */
856 if (!skb_queue_empty(&sk->sk_receive_queue)) 854 if (!skb_queue_empty(&sk->sk_receive_queue))
857 mask |= POLLIN | POLLRDNORM; 855 mask |= EPOLLIN | EPOLLRDNORM;
858 856
859 /* Connection-based need to check for termination and startup */ 857 /* Connection-based need to check for termination and startup */
860 if (connection_based(sk)) { 858 if (connection_based(sk)) {
861 if (sk->sk_state == TCP_CLOSE) 859 if (sk->sk_state == TCP_CLOSE)
862 mask |= POLLHUP; 860 mask |= EPOLLHUP;
863 /* connection hasn't started yet? */ 861 /* connection hasn't started yet? */
864 if (sk->sk_state == TCP_SYN_SENT) 862 if (sk->sk_state == TCP_SYN_SENT)
865 return mask; 863 return mask;
@@ -867,7 +865,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
867 865
868 /* writable? */ 866 /* writable? */
869 if (sock_writeable(sk)) 867 if (sock_writeable(sk))
870 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 868 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
871 else 869 else
872 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 870 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
873 871
diff --git a/net/core/dev.c b/net/core/dev.c
index 613fb4066be7..dda9d7b9a840 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1554,6 +1554,23 @@ void dev_disable_lro(struct net_device *dev)
1554} 1554}
1555EXPORT_SYMBOL(dev_disable_lro); 1555EXPORT_SYMBOL(dev_disable_lro);
1556 1556
1557/**
1558 * dev_disable_gro_hw - disable HW Generic Receive Offload on a device
1559 * @dev: device
1560 *
1561 * Disable HW Generic Receive Offload (GRO_HW) on a net device. Must be
1562 * called under RTNL. This is needed if Generic XDP is installed on
1563 * the device.
1564 */
1565static void dev_disable_gro_hw(struct net_device *dev)
1566{
1567 dev->wanted_features &= ~NETIF_F_GRO_HW;
1568 netdev_update_features(dev);
1569
1570 if (unlikely(dev->features & NETIF_F_GRO_HW))
1571 netdev_WARN(dev, "failed to disable GRO_HW!\n");
1572}
1573
1557static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, 1574static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
1558 struct net_device *dev) 1575 struct net_device *dev)
1559{ 1576{
@@ -1677,7 +1694,6 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
1677/** 1694/**
1678 * call_netdevice_notifiers_info - call all network notifier blocks 1695 * call_netdevice_notifiers_info - call all network notifier blocks
1679 * @val: value passed unmodified to notifier function 1696 * @val: value passed unmodified to notifier function
1680 * @dev: net_device pointer passed unmodified to notifier function
1681 * @info: notifier information data 1697 * @info: notifier information data
1682 * 1698 *
1683 * Call all network notifier blocks. Parameters and return value 1699 * Call all network notifier blocks. Parameters and return value
@@ -2815,7 +2831,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
2815 2831
2816 segs = skb_mac_gso_segment(skb, features); 2832 segs = skb_mac_gso_segment(skb, features);
2817 2833
2818 if (unlikely(skb_needs_check(skb, tx_path))) 2834 if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
2819 skb_warn_bad_offload(skb); 2835 skb_warn_bad_offload(skb);
2820 2836
2821 return segs; 2837 return segs;
@@ -3054,7 +3070,7 @@ int skb_csum_hwoffload_help(struct sk_buff *skb,
3054} 3070}
3055EXPORT_SYMBOL(skb_csum_hwoffload_help); 3071EXPORT_SYMBOL(skb_csum_hwoffload_help);
3056 3072
3057static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) 3073static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool *again)
3058{ 3074{
3059 netdev_features_t features; 3075 netdev_features_t features;
3060 3076
@@ -3078,9 +3094,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
3078 __skb_linearize(skb)) 3094 __skb_linearize(skb))
3079 goto out_kfree_skb; 3095 goto out_kfree_skb;
3080 3096
3081 if (validate_xmit_xfrm(skb, features))
3082 goto out_kfree_skb;
3083
3084 /* If packet is not checksummed and device does not 3097 /* If packet is not checksummed and device does not
3085 * support checksumming for this protocol, complete 3098 * support checksumming for this protocol, complete
3086 * checksumming here. 3099 * checksumming here.
@@ -3097,6 +3110,8 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
3097 } 3110 }
3098 } 3111 }
3099 3112
3113 skb = validate_xmit_xfrm(skb, features, again);
3114
3100 return skb; 3115 return skb;
3101 3116
3102out_kfree_skb: 3117out_kfree_skb:
@@ -3106,7 +3121,7 @@ out_null:
3106 return NULL; 3121 return NULL;
3107} 3122}
3108 3123
3109struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev) 3124struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again)
3110{ 3125{
3111 struct sk_buff *next, *head = NULL, *tail; 3126 struct sk_buff *next, *head = NULL, *tail;
3112 3127
@@ -3117,7 +3132,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d
3117 /* in case skb wont be segmented, point to itself */ 3132 /* in case skb wont be segmented, point to itself */
3118 skb->prev = skb; 3133 skb->prev = skb;
3119 3134
3120 skb = validate_xmit_skb(skb, dev); 3135 skb = validate_xmit_skb(skb, dev, again);
3121 if (!skb) 3136 if (!skb)
3122 continue; 3137 continue;
3123 3138
@@ -3185,6 +3200,21 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
3185 int rc; 3200 int rc;
3186 3201
3187 qdisc_calculate_pkt_len(skb, q); 3202 qdisc_calculate_pkt_len(skb, q);
3203
3204 if (q->flags & TCQ_F_NOLOCK) {
3205 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
3206 __qdisc_drop(skb, &to_free);
3207 rc = NET_XMIT_DROP;
3208 } else {
3209 rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
3210 __qdisc_run(q);
3211 }
3212
3213 if (unlikely(to_free))
3214 kfree_skb_list(to_free);
3215 return rc;
3216 }
3217
3188 /* 3218 /*
3189 * Heuristic to force contended enqueues to serialize on a 3219 * Heuristic to force contended enqueues to serialize on a
3190 * separate lock before trying to get qdisc main lock. 3220 * separate lock before trying to get qdisc main lock.
@@ -3215,9 +3245,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
3215 contended = false; 3245 contended = false;
3216 } 3246 }
3217 __qdisc_run(q); 3247 __qdisc_run(q);
3218 } else 3248 }
3219 qdisc_run_end(q);
3220 3249
3250 qdisc_run_end(q);
3221 rc = NET_XMIT_SUCCESS; 3251 rc = NET_XMIT_SUCCESS;
3222 } else { 3252 } else {
3223 rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; 3253 rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
@@ -3227,6 +3257,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
3227 contended = false; 3257 contended = false;
3228 } 3258 }
3229 __qdisc_run(q); 3259 __qdisc_run(q);
3260 qdisc_run_end(q);
3230 } 3261 }
3231 } 3262 }
3232 spin_unlock(root_lock); 3263 spin_unlock(root_lock);
@@ -3399,8 +3430,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
3399 else 3430 else
3400 queue_index = __netdev_pick_tx(dev, skb); 3431 queue_index = __netdev_pick_tx(dev, skb);
3401 3432
3402 if (!accel_priv) 3433 queue_index = netdev_cap_txqueue(dev, queue_index);
3403 queue_index = netdev_cap_txqueue(dev, queue_index);
3404 } 3434 }
3405 3435
3406 skb_set_queue_mapping(skb, queue_index); 3436 skb_set_queue_mapping(skb, queue_index);
@@ -3439,6 +3469,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
3439 struct netdev_queue *txq; 3469 struct netdev_queue *txq;
3440 struct Qdisc *q; 3470 struct Qdisc *q;
3441 int rc = -ENOMEM; 3471 int rc = -ENOMEM;
3472 bool again = false;
3442 3473
3443 skb_reset_mac_header(skb); 3474 skb_reset_mac_header(skb);
3444 3475
@@ -3500,7 +3531,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
3500 XMIT_RECURSION_LIMIT)) 3531 XMIT_RECURSION_LIMIT))
3501 goto recursion_alert; 3532 goto recursion_alert;
3502 3533
3503 skb = validate_xmit_skb(skb, dev); 3534 skb = validate_xmit_skb(skb, dev, &again);
3504 if (!skb) 3535 if (!skb)
3505 goto out; 3536 goto out;
3506 3537
@@ -3896,9 +3927,33 @@ drop:
3896 return NET_RX_DROP; 3927 return NET_RX_DROP;
3897} 3928}
3898 3929
3930static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
3931{
3932 struct net_device *dev = skb->dev;
3933 struct netdev_rx_queue *rxqueue;
3934
3935 rxqueue = dev->_rx;
3936
3937 if (skb_rx_queue_recorded(skb)) {
3938 u16 index = skb_get_rx_queue(skb);
3939
3940 if (unlikely(index >= dev->real_num_rx_queues)) {
3941 WARN_ONCE(dev->real_num_rx_queues > 1,
3942 "%s received packet on queue %u, but number "
3943 "of RX queues is %u\n",
3944 dev->name, index, dev->real_num_rx_queues);
3945
3946 return rxqueue; /* Return first rxqueue */
3947 }
3948 rxqueue += index;
3949 }
3950 return rxqueue;
3951}
3952
3899static u32 netif_receive_generic_xdp(struct sk_buff *skb, 3953static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3900 struct bpf_prog *xdp_prog) 3954 struct bpf_prog *xdp_prog)
3901{ 3955{
3956 struct netdev_rx_queue *rxqueue;
3902 u32 metalen, act = XDP_DROP; 3957 u32 metalen, act = XDP_DROP;
3903 struct xdp_buff xdp; 3958 struct xdp_buff xdp;
3904 void *orig_data; 3959 void *orig_data;
@@ -3942,6 +3997,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3942 xdp.data_hard_start = skb->data - skb_headroom(skb); 3997 xdp.data_hard_start = skb->data - skb_headroom(skb);
3943 orig_data = xdp.data; 3998 orig_data = xdp.data;
3944 3999
4000 rxqueue = netif_get_rxqueue(skb);
4001 xdp.rxq = &rxqueue->xdp_rxq;
4002
3945 act = bpf_prog_run_xdp(xdp_prog, &xdp); 4003 act = bpf_prog_run_xdp(xdp_prog, &xdp);
3946 4004
3947 off = xdp.data - orig_data; 4005 off = xdp.data - orig_data;
@@ -4166,21 +4224,26 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
4166 4224
4167 while (head) { 4225 while (head) {
4168 struct Qdisc *q = head; 4226 struct Qdisc *q = head;
4169 spinlock_t *root_lock; 4227 spinlock_t *root_lock = NULL;
4170 4228
4171 head = head->next_sched; 4229 head = head->next_sched;
4172 4230
4173 root_lock = qdisc_lock(q); 4231 if (!(q->flags & TCQ_F_NOLOCK)) {
4174 spin_lock(root_lock); 4232 root_lock = qdisc_lock(q);
4233 spin_lock(root_lock);
4234 }
4175 /* We need to make sure head->next_sched is read 4235 /* We need to make sure head->next_sched is read
4176 * before clearing __QDISC_STATE_SCHED 4236 * before clearing __QDISC_STATE_SCHED
4177 */ 4237 */
4178 smp_mb__before_atomic(); 4238 smp_mb__before_atomic();
4179 clear_bit(__QDISC_STATE_SCHED, &q->state); 4239 clear_bit(__QDISC_STATE_SCHED, &q->state);
4180 qdisc_run(q); 4240 qdisc_run(q);
4181 spin_unlock(root_lock); 4241 if (root_lock)
4242 spin_unlock(root_lock);
4182 } 4243 }
4183 } 4244 }
4245
4246 xfrm_dev_backlog(sd);
4184} 4247}
4185 4248
4186#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE) 4249#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE)
@@ -4568,6 +4631,7 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
4568 } else if (new && !old) { 4631 } else if (new && !old) {
4569 static_key_slow_inc(&generic_xdp_needed); 4632 static_key_slow_inc(&generic_xdp_needed);
4570 dev_disable_lro(dev); 4633 dev_disable_lro(dev);
4634 dev_disable_gro_hw(dev);
4571 } 4635 }
4572 break; 4636 break;
4573 4637
@@ -6371,6 +6435,7 @@ rollback:
6371 * netdev_upper_dev_link - Add a link to the upper device 6435 * netdev_upper_dev_link - Add a link to the upper device
6372 * @dev: device 6436 * @dev: device
6373 * @upper_dev: new upper device 6437 * @upper_dev: new upper device
6438 * @extack: netlink extended ack
6374 * 6439 *
6375 * Adds a link to device which is upper to this one. The caller must hold 6440 * Adds a link to device which is upper to this one. The caller must hold
6376 * the RTNL lock. On a failure a negative errno code is returned. 6441 * the RTNL lock. On a failure a negative errno code is returned.
@@ -6392,6 +6457,7 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
6392 * @upper_dev: new upper device 6457 * @upper_dev: new upper device
6393 * @upper_priv: upper device private 6458 * @upper_priv: upper device private
6394 * @upper_info: upper info to be passed down via notifier 6459 * @upper_info: upper info to be passed down via notifier
6460 * @extack: netlink extended ack
6395 * 6461 *
6396 * Adds a link to device which is upper to this one. In this case, only 6462 * Adds a link to device which is upper to this one. In this case, only
6397 * one master upper device can be linked, although other non-master devices 6463 * one master upper device can be linked, although other non-master devices
@@ -6982,6 +7048,35 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
6982EXPORT_SYMBOL(dev_set_mtu); 7048EXPORT_SYMBOL(dev_set_mtu);
6983 7049
6984/** 7050/**
7051 * dev_change_tx_queue_len - Change TX queue length of a netdevice
7052 * @dev: device
7053 * @new_len: new tx queue length
7054 */
7055int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
7056{
7057 unsigned int orig_len = dev->tx_queue_len;
7058 int res;
7059
7060 if (new_len != (unsigned int)new_len)
7061 return -ERANGE;
7062
7063 if (new_len != orig_len) {
7064 dev->tx_queue_len = new_len;
7065 res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
7066 res = notifier_to_errno(res);
7067 if (res) {
7068 netdev_err(dev,
7069 "refused to change device tx_queue_len\n");
7070 dev->tx_queue_len = orig_len;
7071 return res;
7072 }
7073 return dev_qdisc_change_tx_queue_len(dev);
7074 }
7075
7076 return 0;
7077}
7078
7079/**
6985 * dev_set_group - Change group this device belongs to 7080 * dev_set_group - Change group this device belongs to
6986 * @dev: device 7081 * @dev: device
6987 * @new_group: group this device should belong to 7082 * @new_group: group this device should belong to
@@ -7096,17 +7191,21 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
7096} 7191}
7097EXPORT_SYMBOL(dev_change_proto_down); 7192EXPORT_SYMBOL(dev_change_proto_down);
7098 7193
7099u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id) 7194void __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
7195 struct netdev_bpf *xdp)
7100{ 7196{
7101 struct netdev_bpf xdp; 7197 memset(xdp, 0, sizeof(*xdp));
7102 7198 xdp->command = XDP_QUERY_PROG;
7103 memset(&xdp, 0, sizeof(xdp));
7104 xdp.command = XDP_QUERY_PROG;
7105 7199
7106 /* Query must always succeed. */ 7200 /* Query must always succeed. */
7107 WARN_ON(bpf_op(dev, &xdp) < 0); 7201 WARN_ON(bpf_op(dev, xdp) < 0);
7108 if (prog_id) 7202}
7109 *prog_id = xdp.prog_id; 7203
7204static u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op)
7205{
7206 struct netdev_bpf xdp;
7207
7208 __dev_xdp_query(dev, bpf_op, &xdp);
7110 7209
7111 return xdp.prog_attached; 7210 return xdp.prog_attached;
7112} 7211}
@@ -7129,6 +7228,27 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
7129 return bpf_op(dev, &xdp); 7228 return bpf_op(dev, &xdp);
7130} 7229}
7131 7230
7231static void dev_xdp_uninstall(struct net_device *dev)
7232{
7233 struct netdev_bpf xdp;
7234 bpf_op_t ndo_bpf;
7235
7236 /* Remove generic XDP */
7237 WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL));
7238
7239 /* Remove from the driver */
7240 ndo_bpf = dev->netdev_ops->ndo_bpf;
7241 if (!ndo_bpf)
7242 return;
7243
7244 __dev_xdp_query(dev, ndo_bpf, &xdp);
7245 if (xdp.prog_attached == XDP_ATTACHED_NONE)
7246 return;
7247
7248 /* Program removal should always succeed */
7249 WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, NULL));
7250}
7251
7132/** 7252/**
7133 * dev_change_xdp_fd - set or clear a bpf program for a device rx path 7253 * dev_change_xdp_fd - set or clear a bpf program for a device rx path
7134 * @dev: device 7254 * @dev: device
@@ -7157,10 +7277,10 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
7157 bpf_chk = generic_xdp_install; 7277 bpf_chk = generic_xdp_install;
7158 7278
7159 if (fd >= 0) { 7279 if (fd >= 0) {
7160 if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL)) 7280 if (bpf_chk && __dev_xdp_attached(dev, bpf_chk))
7161 return -EEXIST; 7281 return -EEXIST;
7162 if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && 7282 if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
7163 __dev_xdp_attached(dev, bpf_op, NULL)) 7283 __dev_xdp_attached(dev, bpf_op))
7164 return -EBUSY; 7284 return -EBUSY;
7165 7285
7166 prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, 7286 prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
@@ -7259,6 +7379,7 @@ static void rollback_registered_many(struct list_head *head)
7259 /* Shutdown queueing discipline. */ 7379 /* Shutdown queueing discipline. */
7260 dev_shutdown(dev); 7380 dev_shutdown(dev);
7261 7381
7382 dev_xdp_uninstall(dev);
7262 7383
7263 /* Notify protocols, that we are about to destroy 7384 /* Notify protocols, that we are about to destroy
7264 * this device. They should clean all the things. 7385 * this device. They should clean all the things.
@@ -7268,7 +7389,7 @@ static void rollback_registered_many(struct list_head *head)
7268 if (!dev->rtnl_link_ops || 7389 if (!dev->rtnl_link_ops ||
7269 dev->rtnl_link_state == RTNL_LINK_INITIALIZED) 7390 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
7270 skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, 7391 skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
7271 GFP_KERNEL, NULL); 7392 GFP_KERNEL, NULL, 0);
7272 7393
7273 /* 7394 /*
7274 * Flush the unicast and multicast chains 7395 * Flush the unicast and multicast chains
@@ -7402,6 +7523,18 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
7402 features &= ~dev->gso_partial_features; 7523 features &= ~dev->gso_partial_features;
7403 } 7524 }
7404 7525
7526 if (!(features & NETIF_F_RXCSUM)) {
7527 /* NETIF_F_GRO_HW implies doing RXCSUM since every packet
7528 * successfully merged by hardware must also have the
7529 * checksum verified by hardware. If the user does not
7530 * want to enable RXCSUM, logically, we should disable GRO_HW.
7531 */
7532 if (features & NETIF_F_GRO_HW) {
7533 netdev_dbg(dev, "Dropping NETIF_F_GRO_HW since no RXCSUM feature.\n");
7534 features &= ~NETIF_F_GRO_HW;
7535 }
7536 }
7537
7405 return features; 7538 return features;
7406} 7539}
7407 7540
@@ -7535,12 +7668,12 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
7535} 7668}
7536EXPORT_SYMBOL(netif_stacked_transfer_operstate); 7669EXPORT_SYMBOL(netif_stacked_transfer_operstate);
7537 7670
7538#ifdef CONFIG_SYSFS
7539static int netif_alloc_rx_queues(struct net_device *dev) 7671static int netif_alloc_rx_queues(struct net_device *dev)
7540{ 7672{
7541 unsigned int i, count = dev->num_rx_queues; 7673 unsigned int i, count = dev->num_rx_queues;
7542 struct netdev_rx_queue *rx; 7674 struct netdev_rx_queue *rx;
7543 size_t sz = count * sizeof(*rx); 7675 size_t sz = count * sizeof(*rx);
7676 int err = 0;
7544 7677
7545 BUG_ON(count < 1); 7678 BUG_ON(count < 1);
7546 7679
@@ -7550,11 +7683,38 @@ static int netif_alloc_rx_queues(struct net_device *dev)
7550 7683
7551 dev->_rx = rx; 7684 dev->_rx = rx;
7552 7685
7553 for (i = 0; i < count; i++) 7686 for (i = 0; i < count; i++) {
7554 rx[i].dev = dev; 7687 rx[i].dev = dev;
7688
7689 /* XDP RX-queue setup */
7690 err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i);
7691 if (err < 0)
7692 goto err_rxq_info;
7693 }
7555 return 0; 7694 return 0;
7695
7696err_rxq_info:
7697 /* Rollback successful reg's and free other resources */
7698 while (i--)
7699 xdp_rxq_info_unreg(&rx[i].xdp_rxq);
7700 kvfree(dev->_rx);
7701 dev->_rx = NULL;
7702 return err;
7703}
7704
7705static void netif_free_rx_queues(struct net_device *dev)
7706{
7707 unsigned int i, count = dev->num_rx_queues;
7708
7709 /* netif_alloc_rx_queues alloc failed, resources have been unreg'ed */
7710 if (!dev->_rx)
7711 return;
7712
7713 for (i = 0; i < count; i++)
7714 xdp_rxq_info_unreg(&dev->_rx[i].xdp_rxq);
7715
7716 kvfree(dev->_rx);
7556} 7717}
7557#endif
7558 7718
7559static void netdev_init_one_queue(struct net_device *dev, 7719static void netdev_init_one_queue(struct net_device *dev,
7560 struct netdev_queue *queue, void *_unused) 7720 struct netdev_queue *queue, void *_unused)
@@ -8115,12 +8275,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
8115 return NULL; 8275 return NULL;
8116 } 8276 }
8117 8277
8118#ifdef CONFIG_SYSFS
8119 if (rxqs < 1) { 8278 if (rxqs < 1) {
8120 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); 8279 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
8121 return NULL; 8280 return NULL;
8122 } 8281 }
8123#endif
8124 8282
8125 alloc_size = sizeof(struct net_device); 8283 alloc_size = sizeof(struct net_device);
8126 if (sizeof_priv) { 8284 if (sizeof_priv) {
@@ -8177,12 +8335,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
8177 if (netif_alloc_netdev_queues(dev)) 8335 if (netif_alloc_netdev_queues(dev))
8178 goto free_all; 8336 goto free_all;
8179 8337
8180#ifdef CONFIG_SYSFS
8181 dev->num_rx_queues = rxqs; 8338 dev->num_rx_queues = rxqs;
8182 dev->real_num_rx_queues = rxqs; 8339 dev->real_num_rx_queues = rxqs;
8183 if (netif_alloc_rx_queues(dev)) 8340 if (netif_alloc_rx_queues(dev))
8184 goto free_all; 8341 goto free_all;
8185#endif
8186 8342
8187 strcpy(dev->name, name); 8343 strcpy(dev->name, name);
8188 dev->name_assign_type = name_assign_type; 8344 dev->name_assign_type = name_assign_type;
@@ -8218,13 +8374,10 @@ EXPORT_SYMBOL(alloc_netdev_mqs);
8218void free_netdev(struct net_device *dev) 8374void free_netdev(struct net_device *dev)
8219{ 8375{
8220 struct napi_struct *p, *n; 8376 struct napi_struct *p, *n;
8221 struct bpf_prog *prog;
8222 8377
8223 might_sleep(); 8378 might_sleep();
8224 netif_free_tx_queues(dev); 8379 netif_free_tx_queues(dev);
8225#ifdef CONFIG_SYSFS 8380 netif_free_rx_queues(dev);
8226 kvfree(dev->_rx);
8227#endif
8228 8381
8229 kfree(rcu_dereference_protected(dev->ingress_queue, 1)); 8382 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
8230 8383
@@ -8237,12 +8390,6 @@ void free_netdev(struct net_device *dev)
8237 free_percpu(dev->pcpu_refcnt); 8390 free_percpu(dev->pcpu_refcnt);
8238 dev->pcpu_refcnt = NULL; 8391 dev->pcpu_refcnt = NULL;
8239 8392
8240 prog = rcu_dereference_protected(dev->xdp_prog, 1);
8241 if (prog) {
8242 bpf_prog_put(prog);
8243 static_key_slow_dec(&generic_xdp_needed);
8244 }
8245
8246 /* Compatibility with error handling in drivers */ 8393 /* Compatibility with error handling in drivers */
8247 if (dev->reg_state == NETREG_UNINITIALIZED) { 8394 if (dev->reg_state == NETREG_UNINITIALIZED) {
8248 netdev_freemem(dev); 8395 netdev_freemem(dev);
@@ -8355,7 +8502,7 @@ EXPORT_SYMBOL(unregister_netdev);
8355 8502
8356int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) 8503int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
8357{ 8504{
8358 int err, new_nsid; 8505 int err, new_nsid, new_ifindex;
8359 8506
8360 ASSERT_RTNL(); 8507 ASSERT_RTNL();
8361 8508
@@ -8411,11 +8558,16 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
8411 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 8558 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
8412 rcu_barrier(); 8559 rcu_barrier();
8413 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); 8560 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
8414 if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) 8561
8415 new_nsid = peernet2id_alloc(dev_net(dev), net); 8562 new_nsid = peernet2id_alloc(dev_net(dev), net);
8563 /* If there is an ifindex conflict assign a new one */
8564 if (__dev_get_by_index(net, dev->ifindex))
8565 new_ifindex = dev_new_index(net);
8416 else 8566 else
8417 new_nsid = peernet2id(dev_net(dev), net); 8567 new_ifindex = dev->ifindex;
8418 rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid); 8568
8569 rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
8570 new_ifindex);
8419 8571
8420 /* 8572 /*
8421 * Flush the unicast and multicast chains 8573 * Flush the unicast and multicast chains
@@ -8429,10 +8581,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
8429 8581
8430 /* Actually switch the network namespace */ 8582 /* Actually switch the network namespace */
8431 dev_net_set(dev, net); 8583 dev_net_set(dev, net);
8432 8584 dev->ifindex = new_ifindex;
8433 /* If there is an ifindex conflict assign a new one */
8434 if (__dev_get_by_index(net, dev->ifindex))
8435 dev->ifindex = dev_new_index(net);
8436 8585
8437 /* Send a netdev-add uevent to the new namespace */ 8586 /* Send a netdev-add uevent to the new namespace */
8438 kobject_uevent(&dev->dev.kobj, KOBJ_ADD); 8587 kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
@@ -8830,6 +8979,9 @@ static int __init net_dev_init(void)
8830 8979
8831 skb_queue_head_init(&sd->input_pkt_queue); 8980 skb_queue_head_init(&sd->input_pkt_queue);
8832 skb_queue_head_init(&sd->process_queue); 8981 skb_queue_head_init(&sd->process_queue);
8982#ifdef CONFIG_XFRM_OFFLOAD
8983 skb_queue_head_init(&sd->xfrm_backlog);
8984#endif
8833 INIT_LIST_HEAD(&sd->poll_list); 8985 INIT_LIST_HEAD(&sd->poll_list);
8834 sd->output_queue_tailp = &sd->output_queue; 8986 sd->output_queue_tailp = &sd->output_queue;
8835#ifdef CONFIG_RPS 8987#ifdef CONFIG_RPS
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 7e690d0ccd05..0ab1af04296c 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -18,26 +18,10 @@
18 * match. --pb 18 * match. --pb
19 */ 19 */
20 20
21static int dev_ifname(struct net *net, struct ifreq __user *arg) 21static int dev_ifname(struct net *net, struct ifreq *ifr)
22{ 22{
23 struct ifreq ifr; 23 ifr->ifr_name[IFNAMSIZ-1] = 0;
24 int error; 24 return netdev_get_name(net, ifr->ifr_name, ifr->ifr_ifindex);
25
26 /*
27 * Fetch the caller's info block.
28 */
29
30 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
31 return -EFAULT;
32 ifr.ifr_name[IFNAMSIZ-1] = 0;
33
34 error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex);
35 if (error)
36 return error;
37
38 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
39 return -EFAULT;
40 return 0;
41} 25}
42 26
43static gifconf_func_t *gifconf_list[NPROTO]; 27static gifconf_func_t *gifconf_list[NPROTO];
@@ -66,9 +50,8 @@ EXPORT_SYMBOL(register_gifconf);
66 * Thus we will need a 'compatibility mode'. 50 * Thus we will need a 'compatibility mode'.
67 */ 51 */
68 52
69static int dev_ifconf(struct net *net, char __user *arg) 53int dev_ifconf(struct net *net, struct ifconf *ifc, int size)
70{ 54{
71 struct ifconf ifc;
72 struct net_device *dev; 55 struct net_device *dev;
73 char __user *pos; 56 char __user *pos;
74 int len; 57 int len;
@@ -79,11 +62,8 @@ static int dev_ifconf(struct net *net, char __user *arg)
79 * Fetch the caller's info block. 62 * Fetch the caller's info block.
80 */ 63 */
81 64
82 if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) 65 pos = ifc->ifc_buf;
83 return -EFAULT; 66 len = ifc->ifc_len;
84
85 pos = ifc.ifc_buf;
86 len = ifc.ifc_len;
87 67
88 /* 68 /*
89 * Loop over the interfaces, and write an info block for each. 69 * Loop over the interfaces, and write an info block for each.
@@ -95,10 +75,10 @@ static int dev_ifconf(struct net *net, char __user *arg)
95 if (gifconf_list[i]) { 75 if (gifconf_list[i]) {
96 int done; 76 int done;
97 if (!pos) 77 if (!pos)
98 done = gifconf_list[i](dev, NULL, 0); 78 done = gifconf_list[i](dev, NULL, 0, size);
99 else 79 else
100 done = gifconf_list[i](dev, pos + total, 80 done = gifconf_list[i](dev, pos + total,
101 len - total); 81 len - total, size);
102 if (done < 0) 82 if (done < 0)
103 return -EFAULT; 83 return -EFAULT;
104 total += done; 84 total += done;
@@ -109,12 +89,12 @@ static int dev_ifconf(struct net *net, char __user *arg)
109 /* 89 /*
110 * All done. Write the updated control block back to the caller. 90 * All done. Write the updated control block back to the caller.
111 */ 91 */
112 ifc.ifc_len = total; 92 ifc->ifc_len = total;
113 93
114 /* 94 /*
115 * Both BSD and Solaris return 0 here, so we do too. 95 * Both BSD and Solaris return 0 here, so we do too.
116 */ 96 */
117 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; 97 return 0;
118} 98}
119 99
120/* 100/*
@@ -406,53 +386,24 @@ EXPORT_SYMBOL(dev_load);
406 * positive or a negative errno code on error. 386 * positive or a negative errno code on error.
407 */ 387 */
408 388
409int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) 389int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_copyout)
410{ 390{
411 struct ifreq ifr;
412 int ret; 391 int ret;
413 char *colon; 392 char *colon;
414 393
415 /* One special case: SIOCGIFCONF takes ifconf argument 394 if (need_copyout)
416 and requires shared lock, because it sleeps writing 395 *need_copyout = true;
417 to user space.
418 */
419
420 if (cmd == SIOCGIFCONF) {
421 rtnl_lock();
422 ret = dev_ifconf(net, (char __user *) arg);
423 rtnl_unlock();
424 return ret;
425 }
426 if (cmd == SIOCGIFNAME) 396 if (cmd == SIOCGIFNAME)
427 return dev_ifname(net, (struct ifreq __user *)arg); 397 return dev_ifname(net, ifr);
428
429 /*
430 * Take care of Wireless Extensions. Unfortunately struct iwreq
431 * isn't a proper subset of struct ifreq (it's 8 byte shorter)
432 * so we need to treat it specially, otherwise applications may
433 * fault if the struct they're passing happens to land at the
434 * end of a mapped page.
435 */
436 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
437 struct iwreq iwr;
438
439 if (copy_from_user(&iwr, arg, sizeof(iwr)))
440 return -EFAULT;
441
442 iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0;
443 398
444 return wext_handle_ioctl(net, &iwr, cmd, arg); 399 ifr->ifr_name[IFNAMSIZ-1] = 0;
445 }
446
447 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
448 return -EFAULT;
449
450 ifr.ifr_name[IFNAMSIZ-1] = 0;
451 400
452 colon = strchr(ifr.ifr_name, ':'); 401 colon = strchr(ifr->ifr_name, ':');
453 if (colon) 402 if (colon)
454 *colon = 0; 403 *colon = 0;
455 404
405 dev_load(net, ifr->ifr_name);
406
456 /* 407 /*
457 * See which interface the caller is talking about. 408 * See which interface the caller is talking about.
458 */ 409 */
@@ -472,31 +423,19 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
472 case SIOCGIFMAP: 423 case SIOCGIFMAP:
473 case SIOCGIFINDEX: 424 case SIOCGIFINDEX:
474 case SIOCGIFTXQLEN: 425 case SIOCGIFTXQLEN:
475 dev_load(net, ifr.ifr_name);
476 rcu_read_lock(); 426 rcu_read_lock();
477 ret = dev_ifsioc_locked(net, &ifr, cmd); 427 ret = dev_ifsioc_locked(net, ifr, cmd);
478 rcu_read_unlock(); 428 rcu_read_unlock();
479 if (!ret) { 429 if (colon)
480 if (colon) 430 *colon = ':';
481 *colon = ':';
482 if (copy_to_user(arg, &ifr,
483 sizeof(struct ifreq)))
484 ret = -EFAULT;
485 }
486 return ret; 431 return ret;
487 432
488 case SIOCETHTOOL: 433 case SIOCETHTOOL:
489 dev_load(net, ifr.ifr_name);
490 rtnl_lock(); 434 rtnl_lock();
491 ret = dev_ethtool(net, &ifr); 435 ret = dev_ethtool(net, ifr);
492 rtnl_unlock(); 436 rtnl_unlock();
493 if (!ret) { 437 if (colon)
494 if (colon) 438 *colon = ':';
495 *colon = ':';
496 if (copy_to_user(arg, &ifr,
497 sizeof(struct ifreq)))
498 ret = -EFAULT;
499 }
500 return ret; 439 return ret;
501 440
502 /* 441 /*
@@ -510,17 +449,11 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
510 case SIOCSIFNAME: 449 case SIOCSIFNAME:
511 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 450 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
512 return -EPERM; 451 return -EPERM;
513 dev_load(net, ifr.ifr_name);
514 rtnl_lock(); 452 rtnl_lock();
515 ret = dev_ifsioc(net, &ifr, cmd); 453 ret = dev_ifsioc(net, ifr, cmd);
516 rtnl_unlock(); 454 rtnl_unlock();
517 if (!ret) { 455 if (colon)
518 if (colon) 456 *colon = ':';
519 *colon = ':';
520 if (copy_to_user(arg, &ifr,
521 sizeof(struct ifreq)))
522 ret = -EFAULT;
523 }
524 return ret; 457 return ret;
525 458
526 /* 459 /*
@@ -561,10 +494,11 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
561 /* fall through */ 494 /* fall through */
562 case SIOCBONDSLAVEINFOQUERY: 495 case SIOCBONDSLAVEINFOQUERY:
563 case SIOCBONDINFOQUERY: 496 case SIOCBONDINFOQUERY:
564 dev_load(net, ifr.ifr_name);
565 rtnl_lock(); 497 rtnl_lock();
566 ret = dev_ifsioc(net, &ifr, cmd); 498 ret = dev_ifsioc(net, ifr, cmd);
567 rtnl_unlock(); 499 rtnl_unlock();
500 if (need_copyout)
501 *need_copyout = false;
568 return ret; 502 return ret;
569 503
570 case SIOCGIFMEM: 504 case SIOCGIFMEM:
@@ -584,13 +518,9 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
584 cmd == SIOCGHWTSTAMP || 518 cmd == SIOCGHWTSTAMP ||
585 (cmd >= SIOCDEVPRIVATE && 519 (cmd >= SIOCDEVPRIVATE &&
586 cmd <= SIOCDEVPRIVATE + 15)) { 520 cmd <= SIOCDEVPRIVATE + 15)) {
587 dev_load(net, ifr.ifr_name);
588 rtnl_lock(); 521 rtnl_lock();
589 ret = dev_ifsioc(net, &ifr, cmd); 522 ret = dev_ifsioc(net, ifr, cmd);
590 rtnl_unlock(); 523 rtnl_unlock();
591 if (!ret && copy_to_user(arg, &ifr,
592 sizeof(struct ifreq)))
593 ret = -EFAULT;
594 return ret; 524 return ret;
595 } 525 }
596 return -ENOTTY; 526 return -ENOTTY;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 7d430c1d9c3e..18d385ed8237 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -92,12 +92,6 @@ static LIST_HEAD(devlink_list);
92 */ 92 */
93static DEFINE_MUTEX(devlink_mutex); 93static DEFINE_MUTEX(devlink_mutex);
94 94
95/* devlink_port_mutex
96 *
97 * Shared lock to guard lists of ports in all devlink devices.
98 */
99static DEFINE_MUTEX(devlink_port_mutex);
100
101static struct net *devlink_net(const struct devlink *devlink) 95static struct net *devlink_net(const struct devlink *devlink)
102{ 96{
103 return read_pnet(&devlink->_net); 97 return read_pnet(&devlink->_net);
@@ -335,15 +329,18 @@ devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
335#define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0) 329#define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0)
336#define DEVLINK_NL_FLAG_NEED_PORT BIT(1) 330#define DEVLINK_NL_FLAG_NEED_PORT BIT(1)
337#define DEVLINK_NL_FLAG_NEED_SB BIT(2) 331#define DEVLINK_NL_FLAG_NEED_SB BIT(2)
338#define DEVLINK_NL_FLAG_LOCK_PORTS BIT(3) 332
339 /* port is not needed but we need to ensure they don't 333/* The per devlink instance lock is taken by default in the pre-doit
340 * change in the middle of command 334 * operation, yet several commands do not require this. The global
341 */ 335 * devlink lock is taken and protects from disruption by user-calls.
336 */
337#define DEVLINK_NL_FLAG_NO_LOCK BIT(3)
342 338
343static int devlink_nl_pre_doit(const struct genl_ops *ops, 339static int devlink_nl_pre_doit(const struct genl_ops *ops,
344 struct sk_buff *skb, struct genl_info *info) 340 struct sk_buff *skb, struct genl_info *info)
345{ 341{
346 struct devlink *devlink; 342 struct devlink *devlink;
343 int err;
347 344
348 mutex_lock(&devlink_mutex); 345 mutex_lock(&devlink_mutex);
349 devlink = devlink_get_from_info(info); 346 devlink = devlink_get_from_info(info);
@@ -351,44 +348,47 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
351 mutex_unlock(&devlink_mutex); 348 mutex_unlock(&devlink_mutex);
352 return PTR_ERR(devlink); 349 return PTR_ERR(devlink);
353 } 350 }
351 if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
352 mutex_lock(&devlink->lock);
354 if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) { 353 if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) {
355 info->user_ptr[0] = devlink; 354 info->user_ptr[0] = devlink;
356 } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { 355 } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
357 struct devlink_port *devlink_port; 356 struct devlink_port *devlink_port;
358 357
359 mutex_lock(&devlink_port_mutex);
360 devlink_port = devlink_port_get_from_info(devlink, info); 358 devlink_port = devlink_port_get_from_info(devlink, info);
361 if (IS_ERR(devlink_port)) { 359 if (IS_ERR(devlink_port)) {
362 mutex_unlock(&devlink_port_mutex); 360 err = PTR_ERR(devlink_port);
363 mutex_unlock(&devlink_mutex); 361 goto unlock;
364 return PTR_ERR(devlink_port);
365 } 362 }
366 info->user_ptr[0] = devlink_port; 363 info->user_ptr[0] = devlink_port;
367 } 364 }
368 if (ops->internal_flags & DEVLINK_NL_FLAG_LOCK_PORTS) {
369 mutex_lock(&devlink_port_mutex);
370 }
371 if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_SB) { 365 if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_SB) {
372 struct devlink_sb *devlink_sb; 366 struct devlink_sb *devlink_sb;
373 367
374 devlink_sb = devlink_sb_get_from_info(devlink, info); 368 devlink_sb = devlink_sb_get_from_info(devlink, info);
375 if (IS_ERR(devlink_sb)) { 369 if (IS_ERR(devlink_sb)) {
376 if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) 370 err = PTR_ERR(devlink_sb);
377 mutex_unlock(&devlink_port_mutex); 371 goto unlock;
378 mutex_unlock(&devlink_mutex);
379 return PTR_ERR(devlink_sb);
380 } 372 }
381 info->user_ptr[1] = devlink_sb; 373 info->user_ptr[1] = devlink_sb;
382 } 374 }
383 return 0; 375 return 0;
376
377unlock:
378 if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
379 mutex_unlock(&devlink->lock);
380 mutex_unlock(&devlink_mutex);
381 return err;
384} 382}
385 383
386static void devlink_nl_post_doit(const struct genl_ops *ops, 384static void devlink_nl_post_doit(const struct genl_ops *ops,
387 struct sk_buff *skb, struct genl_info *info) 385 struct sk_buff *skb, struct genl_info *info)
388{ 386{
389 if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT || 387 struct devlink *devlink;
390 ops->internal_flags & DEVLINK_NL_FLAG_LOCK_PORTS) 388
391 mutex_unlock(&devlink_port_mutex); 389 devlink = devlink_get_from_info(info);
390 if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
391 mutex_unlock(&devlink->lock);
392 mutex_unlock(&devlink_mutex); 392 mutex_unlock(&devlink_mutex);
393} 393}
394 394
@@ -614,10 +614,10 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
614 int err; 614 int err;
615 615
616 mutex_lock(&devlink_mutex); 616 mutex_lock(&devlink_mutex);
617 mutex_lock(&devlink_port_mutex);
618 list_for_each_entry(devlink, &devlink_list, list) { 617 list_for_each_entry(devlink, &devlink_list, list) {
619 if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) 618 if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
620 continue; 619 continue;
620 mutex_lock(&devlink->lock);
621 list_for_each_entry(devlink_port, &devlink->port_list, list) { 621 list_for_each_entry(devlink_port, &devlink->port_list, list) {
622 if (idx < start) { 622 if (idx < start) {
623 idx++; 623 idx++;
@@ -628,13 +628,15 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
628 NETLINK_CB(cb->skb).portid, 628 NETLINK_CB(cb->skb).portid,
629 cb->nlh->nlmsg_seq, 629 cb->nlh->nlmsg_seq,
630 NLM_F_MULTI); 630 NLM_F_MULTI);
631 if (err) 631 if (err) {
632 mutex_unlock(&devlink->lock);
632 goto out; 633 goto out;
634 }
633 idx++; 635 idx++;
634 } 636 }
637 mutex_unlock(&devlink->lock);
635 } 638 }
636out: 639out:
637 mutex_unlock(&devlink_port_mutex);
638 mutex_unlock(&devlink_mutex); 640 mutex_unlock(&devlink_mutex);
639 641
640 cb->args[0] = idx; 642 cb->args[0] = idx;
@@ -801,6 +803,7 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
801 list_for_each_entry(devlink, &devlink_list, list) { 803 list_for_each_entry(devlink, &devlink_list, list) {
802 if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) 804 if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
803 continue; 805 continue;
806 mutex_lock(&devlink->lock);
804 list_for_each_entry(devlink_sb, &devlink->sb_list, list) { 807 list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
805 if (idx < start) { 808 if (idx < start) {
806 idx++; 809 idx++;
@@ -811,10 +814,13 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
811 NETLINK_CB(cb->skb).portid, 814 NETLINK_CB(cb->skb).portid,
812 cb->nlh->nlmsg_seq, 815 cb->nlh->nlmsg_seq,
813 NLM_F_MULTI); 816 NLM_F_MULTI);
814 if (err) 817 if (err) {
818 mutex_unlock(&devlink->lock);
815 goto out; 819 goto out;
820 }
816 idx++; 821 idx++;
817 } 822 }
823 mutex_unlock(&devlink->lock);
818 } 824 }
819out: 825out:
820 mutex_unlock(&devlink_mutex); 826 mutex_unlock(&devlink_mutex);
@@ -935,14 +941,18 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
935 if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || 941 if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
936 !devlink->ops || !devlink->ops->sb_pool_get) 942 !devlink->ops || !devlink->ops->sb_pool_get)
937 continue; 943 continue;
944 mutex_lock(&devlink->lock);
938 list_for_each_entry(devlink_sb, &devlink->sb_list, list) { 945 list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
939 err = __sb_pool_get_dumpit(msg, start, &idx, devlink, 946 err = __sb_pool_get_dumpit(msg, start, &idx, devlink,
940 devlink_sb, 947 devlink_sb,
941 NETLINK_CB(cb->skb).portid, 948 NETLINK_CB(cb->skb).portid,
942 cb->nlh->nlmsg_seq); 949 cb->nlh->nlmsg_seq);
943 if (err && err != -EOPNOTSUPP) 950 if (err && err != -EOPNOTSUPP) {
951 mutex_unlock(&devlink->lock);
944 goto out; 952 goto out;
953 }
945 } 954 }
955 mutex_unlock(&devlink->lock);
946 } 956 }
947out: 957out:
948 mutex_unlock(&devlink_mutex); 958 mutex_unlock(&devlink_mutex);
@@ -1123,22 +1133,24 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
1123 int err; 1133 int err;
1124 1134
1125 mutex_lock(&devlink_mutex); 1135 mutex_lock(&devlink_mutex);
1126 mutex_lock(&devlink_port_mutex);
1127 list_for_each_entry(devlink, &devlink_list, list) { 1136 list_for_each_entry(devlink, &devlink_list, list) {
1128 if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || 1137 if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
1129 !devlink->ops || !devlink->ops->sb_port_pool_get) 1138 !devlink->ops || !devlink->ops->sb_port_pool_get)
1130 continue; 1139 continue;
1140 mutex_lock(&devlink->lock);
1131 list_for_each_entry(devlink_sb, &devlink->sb_list, list) { 1141 list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
1132 err = __sb_port_pool_get_dumpit(msg, start, &idx, 1142 err = __sb_port_pool_get_dumpit(msg, start, &idx,
1133 devlink, devlink_sb, 1143 devlink, devlink_sb,
1134 NETLINK_CB(cb->skb).portid, 1144 NETLINK_CB(cb->skb).portid,
1135 cb->nlh->nlmsg_seq); 1145 cb->nlh->nlmsg_seq);
1136 if (err && err != -EOPNOTSUPP) 1146 if (err && err != -EOPNOTSUPP) {
1147 mutex_unlock(&devlink->lock);
1137 goto out; 1148 goto out;
1149 }
1138 } 1150 }
1151 mutex_unlock(&devlink->lock);
1139 } 1152 }
1140out: 1153out:
1141 mutex_unlock(&devlink_port_mutex);
1142 mutex_unlock(&devlink_mutex); 1154 mutex_unlock(&devlink_mutex);
1143 1155
1144 cb->args[0] = idx; 1156 cb->args[0] = idx;
@@ -1347,23 +1359,26 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
1347 int err; 1359 int err;
1348 1360
1349 mutex_lock(&devlink_mutex); 1361 mutex_lock(&devlink_mutex);
1350 mutex_lock(&devlink_port_mutex);
1351 list_for_each_entry(devlink, &devlink_list, list) { 1362 list_for_each_entry(devlink, &devlink_list, list) {
1352 if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || 1363 if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
1353 !devlink->ops || !devlink->ops->sb_tc_pool_bind_get) 1364 !devlink->ops || !devlink->ops->sb_tc_pool_bind_get)
1354 continue; 1365 continue;
1366
1367 mutex_lock(&devlink->lock);
1355 list_for_each_entry(devlink_sb, &devlink->sb_list, list) { 1368 list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
1356 err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx, 1369 err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx,
1357 devlink, 1370 devlink,
1358 devlink_sb, 1371 devlink_sb,
1359 NETLINK_CB(cb->skb).portid, 1372 NETLINK_CB(cb->skb).portid,
1360 cb->nlh->nlmsg_seq); 1373 cb->nlh->nlmsg_seq);
1361 if (err && err != -EOPNOTSUPP) 1374 if (err && err != -EOPNOTSUPP) {
1375 mutex_unlock(&devlink->lock);
1362 goto out; 1376 goto out;
1377 }
1363 } 1378 }
1379 mutex_unlock(&devlink->lock);
1364 } 1380 }
1365out: 1381out:
1366 mutex_unlock(&devlink_port_mutex);
1367 mutex_unlock(&devlink_mutex); 1382 mutex_unlock(&devlink_mutex);
1368 1383
1369 cb->args[0] = idx; 1384 cb->args[0] = idx;
@@ -1679,6 +1694,12 @@ static int devlink_dpipe_table_put(struct sk_buff *skb,
1679 table->counters_enabled)) 1694 table->counters_enabled))
1680 goto nla_put_failure; 1695 goto nla_put_failure;
1681 1696
1697 if (table->resource_valid) {
1698 nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
1699 table->resource_id, DEVLINK_ATTR_PAD);
1700 nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
1701 table->resource_units, DEVLINK_ATTR_PAD);
1702 }
1682 if (devlink_dpipe_matches_put(table, skb)) 1703 if (devlink_dpipe_matches_put(table, skb))
1683 goto nla_put_failure; 1704 goto nla_put_failure;
1684 1705
@@ -2273,6 +2294,273 @@ static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
2273 counters_enable); 2294 counters_enable);
2274} 2295}
2275 2296
2297static struct devlink_resource *
2298devlink_resource_find(struct devlink *devlink,
2299 struct devlink_resource *resource, u64 resource_id)
2300{
2301 struct list_head *resource_list;
2302
2303 if (resource)
2304 resource_list = &resource->resource_list;
2305 else
2306 resource_list = &devlink->resource_list;
2307
2308 list_for_each_entry(resource, resource_list, list) {
2309 struct devlink_resource *child_resource;
2310
2311 if (resource->id == resource_id)
2312 return resource;
2313
2314 child_resource = devlink_resource_find(devlink, resource,
2315 resource_id);
2316 if (child_resource)
2317 return child_resource;
2318 }
2319 return NULL;
2320}
2321
2322static void
2323devlink_resource_validate_children(struct devlink_resource *resource)
2324{
2325 struct devlink_resource *child_resource;
2326 bool size_valid = true;
2327 u64 parts_size = 0;
2328
2329 if (list_empty(&resource->resource_list))
2330 goto out;
2331
2332 list_for_each_entry(child_resource, &resource->resource_list, list)
2333 parts_size += child_resource->size_new;
2334
2335 if (parts_size > resource->size)
2336 size_valid = false;
2337out:
2338 resource->size_valid = size_valid;
2339}
2340
2341static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
2342 struct genl_info *info)
2343{
2344 struct devlink *devlink = info->user_ptr[0];
2345 struct devlink_resource *resource;
2346 u64 resource_id;
2347 u64 size;
2348 int err;
2349
2350 if (!info->attrs[DEVLINK_ATTR_RESOURCE_ID] ||
2351 !info->attrs[DEVLINK_ATTR_RESOURCE_SIZE])
2352 return -EINVAL;
2353 resource_id = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_ID]);
2354
2355 resource = devlink_resource_find(devlink, NULL, resource_id);
2356 if (!resource)
2357 return -EINVAL;
2358
2359 if (!resource->resource_ops->size_validate)
2360 return -EINVAL;
2361
2362 size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
2363 err = resource->resource_ops->size_validate(devlink, size,
2364 info->extack);
2365 if (err)
2366 return err;
2367
2368 resource->size_new = size;
2369 devlink_resource_validate_children(resource);
2370 if (resource->parent)
2371 devlink_resource_validate_children(resource->parent);
2372 return 0;
2373}
2374
2375static void
2376devlink_resource_size_params_put(struct devlink_resource *resource,
2377 struct sk_buff *skb)
2378{
2379 struct devlink_resource_size_params *size_params;
2380
2381 size_params = resource->size_params;
2382 nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
2383 size_params->size_granularity, DEVLINK_ATTR_PAD);
2384 nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
2385 size_params->size_max, DEVLINK_ATTR_PAD);
2386 nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
2387 size_params->size_min, DEVLINK_ATTR_PAD);
2388 nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit);
2389}
2390
2391static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
2392 struct devlink_resource *resource)
2393{
2394 struct devlink_resource *child_resource;
2395 struct nlattr *child_resource_attr;
2396 struct nlattr *resource_attr;
2397
2398 resource_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE);
2399 if (!resource_attr)
2400 return -EMSGSIZE;
2401
2402 if (nla_put_string(skb, DEVLINK_ATTR_RESOURCE_NAME, resource->name) ||
2403 nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE, resource->size,
2404 DEVLINK_ATTR_PAD) ||
2405 nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id,
2406 DEVLINK_ATTR_PAD))
2407 goto nla_put_failure;
2408 if (resource->size != resource->size_new)
2409 nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
2410 resource->size_new, DEVLINK_ATTR_PAD);
2411 if (resource->resource_ops && resource->resource_ops->occ_get)
2412 nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
2413 resource->resource_ops->occ_get(devlink),
2414 DEVLINK_ATTR_PAD);
2415 devlink_resource_size_params_put(resource, skb);
2416 if (list_empty(&resource->resource_list))
2417 goto out;
2418
2419 if (nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_SIZE_VALID,
2420 resource->size_valid))
2421 goto nla_put_failure;
2422
2423 child_resource_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE_LIST);
2424 if (!child_resource_attr)
2425 goto nla_put_failure;
2426
2427 list_for_each_entry(child_resource, &resource->resource_list, list) {
2428 if (devlink_resource_put(devlink, skb, child_resource))
2429 goto resource_put_failure;
2430 }
2431
2432 nla_nest_end(skb, child_resource_attr);
2433out:
2434 nla_nest_end(skb, resource_attr);
2435 return 0;
2436
2437resource_put_failure:
2438 nla_nest_cancel(skb, child_resource_attr);
2439nla_put_failure:
2440 nla_nest_cancel(skb, resource_attr);
2441 return -EMSGSIZE;
2442}
2443
2444static int devlink_resource_fill(struct genl_info *info,
2445 enum devlink_command cmd, int flags)
2446{
2447 struct devlink *devlink = info->user_ptr[0];
2448 struct devlink_resource *resource;
2449 struct nlattr *resources_attr;
2450 struct sk_buff *skb = NULL;
2451 struct nlmsghdr *nlh;
2452 bool incomplete;
2453 void *hdr;
2454 int i;
2455 int err;
2456
2457 resource = list_first_entry(&devlink->resource_list,
2458 struct devlink_resource, list);
2459start_again:
2460 err = devlink_dpipe_send_and_alloc_skb(&skb, info);
2461 if (err)
2462 return err;
2463
2464 hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
2465 &devlink_nl_family, NLM_F_MULTI, cmd);
2466 if (!hdr) {
2467 nlmsg_free(skb);
2468 return -EMSGSIZE;
2469 }
2470
2471 if (devlink_nl_put_handle(skb, devlink))
2472 goto nla_put_failure;
2473
2474 resources_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE_LIST);
2475 if (!resources_attr)
2476 goto nla_put_failure;
2477
2478 incomplete = false;
2479 i = 0;
2480 list_for_each_entry_from(resource, &devlink->resource_list, list) {
2481 err = devlink_resource_put(devlink, skb, resource);
2482 if (err) {
2483 if (!i)
2484 goto err_resource_put;
2485 incomplete = true;
2486 break;
2487 }
2488 i++;
2489 }
2490 nla_nest_end(skb, resources_attr);
2491 genlmsg_end(skb, hdr);
2492 if (incomplete)
2493 goto start_again;
2494send_done:
2495 nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
2496 NLMSG_DONE, 0, flags | NLM_F_MULTI);
2497 if (!nlh) {
2498 err = devlink_dpipe_send_and_alloc_skb(&skb, info);
2499 if (err)
2500 goto err_skb_send_alloc;
2501 goto send_done;
2502 }
2503 return genlmsg_reply(skb, info);
2504
2505nla_put_failure:
2506 err = -EMSGSIZE;
2507err_resource_put:
2508err_skb_send_alloc:
2509 genlmsg_cancel(skb, hdr);
2510 nlmsg_free(skb);
2511 return err;
2512}
2513
2514static int devlink_nl_cmd_resource_dump(struct sk_buff *skb,
2515 struct genl_info *info)
2516{
2517 struct devlink *devlink = info->user_ptr[0];
2518
2519 if (list_empty(&devlink->resource_list))
2520 return -EOPNOTSUPP;
2521
2522 return devlink_resource_fill(info, DEVLINK_CMD_RESOURCE_DUMP, 0);
2523}
2524
2525static int
2526devlink_resources_validate(struct devlink *devlink,
2527 struct devlink_resource *resource,
2528 struct genl_info *info)
2529{
2530 struct list_head *resource_list;
2531 int err = 0;
2532
2533 if (resource)
2534 resource_list = &resource->resource_list;
2535 else
2536 resource_list = &devlink->resource_list;
2537
2538 list_for_each_entry(resource, resource_list, list) {
2539 if (!resource->size_valid)
2540 return -EINVAL;
2541 err = devlink_resources_validate(devlink, resource, info);
2542 if (err)
2543 return err;
2544 }
2545 return err;
2546}
2547
2548static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
2549{
2550 struct devlink *devlink = info->user_ptr[0];
2551 int err;
2552
2553 if (!devlink->ops->reload)
2554 return -EOPNOTSUPP;
2555
2556 err = devlink_resources_validate(devlink, NULL, info);
2557 if (err) {
2558 NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
2559 return err;
2560 }
2561 return devlink->ops->reload(devlink);
2562}
2563
2276static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { 2564static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
2277 [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, 2565 [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
2278 [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, 2566 [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -2291,6 +2579,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
2291 [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 }, 2579 [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 },
2292 [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING }, 2580 [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING },
2293 [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 }, 2581 [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 },
2582 [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64},
2583 [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64},
2294}; 2584};
2295 2585
2296static const struct genl_ops devlink_nl_ops[] = { 2586static const struct genl_ops devlink_nl_ops[] = {
@@ -2322,14 +2612,16 @@ static const struct genl_ops devlink_nl_ops[] = {
2322 .doit = devlink_nl_cmd_port_split_doit, 2612 .doit = devlink_nl_cmd_port_split_doit,
2323 .policy = devlink_nl_policy, 2613 .policy = devlink_nl_policy,
2324 .flags = GENL_ADMIN_PERM, 2614 .flags = GENL_ADMIN_PERM,
2325 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, 2615 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
2616 DEVLINK_NL_FLAG_NO_LOCK,
2326 }, 2617 },
2327 { 2618 {
2328 .cmd = DEVLINK_CMD_PORT_UNSPLIT, 2619 .cmd = DEVLINK_CMD_PORT_UNSPLIT,
2329 .doit = devlink_nl_cmd_port_unsplit_doit, 2620 .doit = devlink_nl_cmd_port_unsplit_doit,
2330 .policy = devlink_nl_policy, 2621 .policy = devlink_nl_policy,
2331 .flags = GENL_ADMIN_PERM, 2622 .flags = GENL_ADMIN_PERM,
2332 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, 2623 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
2624 DEVLINK_NL_FLAG_NO_LOCK,
2333 }, 2625 },
2334 { 2626 {
2335 .cmd = DEVLINK_CMD_SB_GET, 2627 .cmd = DEVLINK_CMD_SB_GET,
@@ -2397,8 +2689,7 @@ static const struct genl_ops devlink_nl_ops[] = {
2397 .policy = devlink_nl_policy, 2689 .policy = devlink_nl_policy,
2398 .flags = GENL_ADMIN_PERM, 2690 .flags = GENL_ADMIN_PERM,
2399 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | 2691 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
2400 DEVLINK_NL_FLAG_NEED_SB | 2692 DEVLINK_NL_FLAG_NEED_SB,
2401 DEVLINK_NL_FLAG_LOCK_PORTS,
2402 }, 2693 },
2403 { 2694 {
2404 .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR, 2695 .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR,
@@ -2406,8 +2697,7 @@ static const struct genl_ops devlink_nl_ops[] = {
2406 .policy = devlink_nl_policy, 2697 .policy = devlink_nl_policy,
2407 .flags = GENL_ADMIN_PERM, 2698 .flags = GENL_ADMIN_PERM,
2408 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | 2699 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
2409 DEVLINK_NL_FLAG_NEED_SB | 2700 DEVLINK_NL_FLAG_NEED_SB,
2410 DEVLINK_NL_FLAG_LOCK_PORTS,
2411 }, 2701 },
2412 { 2702 {
2413 .cmd = DEVLINK_CMD_ESWITCH_GET, 2703 .cmd = DEVLINK_CMD_ESWITCH_GET,
@@ -2451,6 +2741,28 @@ static const struct genl_ops devlink_nl_ops[] = {
2451 .flags = GENL_ADMIN_PERM, 2741 .flags = GENL_ADMIN_PERM,
2452 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, 2742 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
2453 }, 2743 },
2744 {
2745 .cmd = DEVLINK_CMD_RESOURCE_SET,
2746 .doit = devlink_nl_cmd_resource_set,
2747 .policy = devlink_nl_policy,
2748 .flags = GENL_ADMIN_PERM,
2749 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
2750 },
2751 {
2752 .cmd = DEVLINK_CMD_RESOURCE_DUMP,
2753 .doit = devlink_nl_cmd_resource_dump,
2754 .policy = devlink_nl_policy,
2755 .flags = GENL_ADMIN_PERM,
2756 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
2757 },
2758 {
2759 .cmd = DEVLINK_CMD_RELOAD,
2760 .doit = devlink_nl_cmd_reload,
2761 .policy = devlink_nl_policy,
2762 .flags = GENL_ADMIN_PERM,
2763 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
2764 DEVLINK_NL_FLAG_NO_LOCK,
2765 },
2454}; 2766};
2455 2767
2456static struct genl_family devlink_nl_family __ro_after_init = { 2768static struct genl_family devlink_nl_family __ro_after_init = {
@@ -2488,6 +2800,8 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
2488 INIT_LIST_HEAD(&devlink->port_list); 2800 INIT_LIST_HEAD(&devlink->port_list);
2489 INIT_LIST_HEAD(&devlink->sb_list); 2801 INIT_LIST_HEAD(&devlink->sb_list);
2490 INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list); 2802 INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
2803 INIT_LIST_HEAD(&devlink->resource_list);
2804 mutex_init(&devlink->lock);
2491 return devlink; 2805 return devlink;
2492} 2806}
2493EXPORT_SYMBOL_GPL(devlink_alloc); 2807EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -2550,16 +2864,16 @@ int devlink_port_register(struct devlink *devlink,
2550 struct devlink_port *devlink_port, 2864 struct devlink_port *devlink_port,
2551 unsigned int port_index) 2865 unsigned int port_index)
2552{ 2866{
2553 mutex_lock(&devlink_port_mutex); 2867 mutex_lock(&devlink->lock);
2554 if (devlink_port_index_exists(devlink, port_index)) { 2868 if (devlink_port_index_exists(devlink, port_index)) {
2555 mutex_unlock(&devlink_port_mutex); 2869 mutex_unlock(&devlink->lock);
2556 return -EEXIST; 2870 return -EEXIST;
2557 } 2871 }
2558 devlink_port->devlink = devlink; 2872 devlink_port->devlink = devlink;
2559 devlink_port->index = port_index; 2873 devlink_port->index = port_index;
2560 devlink_port->registered = true; 2874 devlink_port->registered = true;
2561 list_add_tail(&devlink_port->list, &devlink->port_list); 2875 list_add_tail(&devlink_port->list, &devlink->port_list);
2562 mutex_unlock(&devlink_port_mutex); 2876 mutex_unlock(&devlink->lock);
2563 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); 2877 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
2564 return 0; 2878 return 0;
2565} 2879}
@@ -2572,10 +2886,12 @@ EXPORT_SYMBOL_GPL(devlink_port_register);
2572 */ 2886 */
2573void devlink_port_unregister(struct devlink_port *devlink_port) 2887void devlink_port_unregister(struct devlink_port *devlink_port)
2574{ 2888{
2889 struct devlink *devlink = devlink_port->devlink;
2890
2575 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL); 2891 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
2576 mutex_lock(&devlink_port_mutex); 2892 mutex_lock(&devlink->lock);
2577 list_del(&devlink_port->list); 2893 list_del(&devlink_port->list);
2578 mutex_unlock(&devlink_port_mutex); 2894 mutex_unlock(&devlink->lock);
2579} 2895}
2580EXPORT_SYMBOL_GPL(devlink_port_unregister); 2896EXPORT_SYMBOL_GPL(devlink_port_unregister);
2581 2897
@@ -2651,7 +2967,7 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
2651 struct devlink_sb *devlink_sb; 2967 struct devlink_sb *devlink_sb;
2652 int err = 0; 2968 int err = 0;
2653 2969
2654 mutex_lock(&devlink_mutex); 2970 mutex_lock(&devlink->lock);
2655 if (devlink_sb_index_exists(devlink, sb_index)) { 2971 if (devlink_sb_index_exists(devlink, sb_index)) {
2656 err = -EEXIST; 2972 err = -EEXIST;
2657 goto unlock; 2973 goto unlock;
@@ -2670,7 +2986,7 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
2670 devlink_sb->egress_tc_count = egress_tc_count; 2986 devlink_sb->egress_tc_count = egress_tc_count;
2671 list_add_tail(&devlink_sb->list, &devlink->sb_list); 2987 list_add_tail(&devlink_sb->list, &devlink->sb_list);
2672unlock: 2988unlock:
2673 mutex_unlock(&devlink_mutex); 2989 mutex_unlock(&devlink->lock);
2674 return err; 2990 return err;
2675} 2991}
2676EXPORT_SYMBOL_GPL(devlink_sb_register); 2992EXPORT_SYMBOL_GPL(devlink_sb_register);
@@ -2679,11 +2995,11 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
2679{ 2995{
2680 struct devlink_sb *devlink_sb; 2996 struct devlink_sb *devlink_sb;
2681 2997
2682 mutex_lock(&devlink_mutex); 2998 mutex_lock(&devlink->lock);
2683 devlink_sb = devlink_sb_get_by_index(devlink, sb_index); 2999 devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
2684 WARN_ON(!devlink_sb); 3000 WARN_ON(!devlink_sb);
2685 list_del(&devlink_sb->list); 3001 list_del(&devlink_sb->list);
2686 mutex_unlock(&devlink_mutex); 3002 mutex_unlock(&devlink->lock);
2687 kfree(devlink_sb); 3003 kfree(devlink_sb);
2688} 3004}
2689EXPORT_SYMBOL_GPL(devlink_sb_unregister); 3005EXPORT_SYMBOL_GPL(devlink_sb_unregister);
@@ -2699,9 +3015,9 @@ EXPORT_SYMBOL_GPL(devlink_sb_unregister);
2699int devlink_dpipe_headers_register(struct devlink *devlink, 3015int devlink_dpipe_headers_register(struct devlink *devlink,
2700 struct devlink_dpipe_headers *dpipe_headers) 3016 struct devlink_dpipe_headers *dpipe_headers)
2701{ 3017{
2702 mutex_lock(&devlink_mutex); 3018 mutex_lock(&devlink->lock);
2703 devlink->dpipe_headers = dpipe_headers; 3019 devlink->dpipe_headers = dpipe_headers;
2704 mutex_unlock(&devlink_mutex); 3020 mutex_unlock(&devlink->lock);
2705 return 0; 3021 return 0;
2706} 3022}
2707EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register); 3023EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register);
@@ -2715,9 +3031,9 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register);
2715 */ 3031 */
2716void devlink_dpipe_headers_unregister(struct devlink *devlink) 3032void devlink_dpipe_headers_unregister(struct devlink *devlink)
2717{ 3033{
2718 mutex_lock(&devlink_mutex); 3034 mutex_lock(&devlink->lock);
2719 devlink->dpipe_headers = NULL; 3035 devlink->dpipe_headers = NULL;
2720 mutex_unlock(&devlink_mutex); 3036 mutex_unlock(&devlink->lock);
2721} 3037}
2722EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister); 3038EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister);
2723 3039
@@ -2783,9 +3099,9 @@ int devlink_dpipe_table_register(struct devlink *devlink,
2783 table->priv = priv; 3099 table->priv = priv;
2784 table->counter_control_extern = counter_control_extern; 3100 table->counter_control_extern = counter_control_extern;
2785 3101
2786 mutex_lock(&devlink_mutex); 3102 mutex_lock(&devlink->lock);
2787 list_add_tail_rcu(&table->list, &devlink->dpipe_table_list); 3103 list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
2788 mutex_unlock(&devlink_mutex); 3104 mutex_unlock(&devlink->lock);
2789 return 0; 3105 return 0;
2790} 3106}
2791EXPORT_SYMBOL_GPL(devlink_dpipe_table_register); 3107EXPORT_SYMBOL_GPL(devlink_dpipe_table_register);
@@ -2801,20 +3117,182 @@ void devlink_dpipe_table_unregister(struct devlink *devlink,
2801{ 3117{
2802 struct devlink_dpipe_table *table; 3118 struct devlink_dpipe_table *table;
2803 3119
2804 mutex_lock(&devlink_mutex); 3120 mutex_lock(&devlink->lock);
2805 table = devlink_dpipe_table_find(&devlink->dpipe_table_list, 3121 table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
2806 table_name); 3122 table_name);
2807 if (!table) 3123 if (!table)
2808 goto unlock; 3124 goto unlock;
2809 list_del_rcu(&table->list); 3125 list_del_rcu(&table->list);
2810 mutex_unlock(&devlink_mutex); 3126 mutex_unlock(&devlink->lock);
2811 kfree_rcu(table, rcu); 3127 kfree_rcu(table, rcu);
2812 return; 3128 return;
2813unlock: 3129unlock:
2814 mutex_unlock(&devlink_mutex); 3130 mutex_unlock(&devlink->lock);
2815} 3131}
2816EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister); 3132EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
2817 3133
3134/**
3135 * devlink_resource_register - devlink resource register
3136 *
3137 * @devlink: devlink
3138 * @resource_name: resource's name
3139 * @top_hierarchy: top hierarchy
3140 * @reload_required: reload is required for new configuration to
3141 * apply
3142 * @resource_size: resource's size
3143 * @resource_id: resource's id
3144 * @parent_reosurce_id: resource's parent id
3145 * @size params: size parameters
3146 * @resource_ops: resource ops
3147 */
3148int devlink_resource_register(struct devlink *devlink,
3149 const char *resource_name,
3150 bool top_hierarchy,
3151 u64 resource_size,
3152 u64 resource_id,
3153 u64 parent_resource_id,
3154 struct devlink_resource_size_params *size_params,
3155 const struct devlink_resource_ops *resource_ops)
3156{
3157 struct devlink_resource *resource;
3158 struct list_head *resource_list;
3159 int err = 0;
3160
3161 mutex_lock(&devlink->lock);
3162 resource = devlink_resource_find(devlink, NULL, resource_id);
3163 if (resource) {
3164 err = -EINVAL;
3165 goto out;
3166 }
3167
3168 resource = kzalloc(sizeof(*resource), GFP_KERNEL);
3169 if (!resource) {
3170 err = -ENOMEM;
3171 goto out;
3172 }
3173
3174 if (top_hierarchy) {
3175 resource_list = &devlink->resource_list;
3176 } else {
3177 struct devlink_resource *parent_resource;
3178
3179 parent_resource = devlink_resource_find(devlink, NULL,
3180 parent_resource_id);
3181 if (parent_resource) {
3182 resource_list = &parent_resource->resource_list;
3183 resource->parent = parent_resource;
3184 } else {
3185 kfree(resource);
3186 err = -EINVAL;
3187 goto out;
3188 }
3189 }
3190
3191 resource->name = resource_name;
3192 resource->size = resource_size;
3193 resource->size_new = resource_size;
3194 resource->id = resource_id;
3195 resource->resource_ops = resource_ops;
3196 resource->size_valid = true;
3197 resource->size_params = size_params;
3198 INIT_LIST_HEAD(&resource->resource_list);
3199 list_add_tail(&resource->list, resource_list);
3200out:
3201 mutex_unlock(&devlink->lock);
3202 return err;
3203}
3204EXPORT_SYMBOL_GPL(devlink_resource_register);
3205
3206/**
3207 * devlink_resources_unregister - free all resources
3208 *
3209 * @devlink: devlink
3210 * @resource: resource
3211 */
3212void devlink_resources_unregister(struct devlink *devlink,
3213 struct devlink_resource *resource)
3214{
3215 struct devlink_resource *tmp, *child_resource;
3216 struct list_head *resource_list;
3217
3218 if (resource)
3219 resource_list = &resource->resource_list;
3220 else
3221 resource_list = &devlink->resource_list;
3222
3223 if (!resource)
3224 mutex_lock(&devlink->lock);
3225
3226 list_for_each_entry_safe(child_resource, tmp, resource_list, list) {
3227 devlink_resources_unregister(devlink, child_resource);
3228 list_del(&child_resource->list);
3229 kfree(child_resource);
3230 }
3231
3232 if (!resource)
3233 mutex_unlock(&devlink->lock);
3234}
3235EXPORT_SYMBOL_GPL(devlink_resources_unregister);
3236
3237/**
3238 * devlink_resource_size_get - get and update size
3239 *
3240 * @devlink: devlink
3241 * @resource_id: the requested resource id
3242 * @p_resource_size: ptr to update
3243 */
3244int devlink_resource_size_get(struct devlink *devlink,
3245 u64 resource_id,
3246 u64 *p_resource_size)
3247{
3248 struct devlink_resource *resource;
3249 int err = 0;
3250
3251 mutex_lock(&devlink->lock);
3252 resource = devlink_resource_find(devlink, NULL, resource_id);
3253 if (!resource) {
3254 err = -EINVAL;
3255 goto out;
3256 }
3257 *p_resource_size = resource->size_new;
3258 resource->size = resource->size_new;
3259out:
3260 mutex_unlock(&devlink->lock);
3261 return err;
3262}
3263EXPORT_SYMBOL_GPL(devlink_resource_size_get);
3264
3265/**
3266 * devlink_dpipe_table_resource_set - set the resource id
3267 *
3268 * @devlink: devlink
3269 * @table_name: table name
3270 * @resource_id: resource id
3271 * @resource_units: number of resource's units consumed per table's entry
3272 */
3273int devlink_dpipe_table_resource_set(struct devlink *devlink,
3274 const char *table_name, u64 resource_id,
3275 u64 resource_units)
3276{
3277 struct devlink_dpipe_table *table;
3278 int err = 0;
3279
3280 mutex_lock(&devlink->lock);
3281 table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
3282 table_name);
3283 if (!table) {
3284 err = -EINVAL;
3285 goto out;
3286 }
3287 table->resource_id = resource_id;
3288 table->resource_units = resource_units;
3289 table->resource_valid = true;
3290out:
3291 mutex_unlock(&devlink->lock);
3292 return err;
3293}
3294EXPORT_SYMBOL_GPL(devlink_dpipe_table_resource_set);
3295
2818static int __init devlink_module_init(void) 3296static int __init devlink_module_init(void)
2819{ 3297{
2820 return genl_register_family(&devlink_nl_family); 3298 return genl_register_family(&devlink_nl_family);
diff --git a/net/core/dst.c b/net/core/dst.c
index 662a2d4a3d19..007aa0b08291 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -21,6 +21,7 @@
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/prefetch.h> 22#include <linux/prefetch.h>
23#include <net/lwtunnel.h> 23#include <net/lwtunnel.h>
24#include <net/xfrm.h>
24 25
25#include <net/dst.h> 26#include <net/dst.h>
26#include <net/dst_metadata.h> 27#include <net/dst_metadata.h>
@@ -62,15 +63,12 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
62 struct net_device *dev, int initial_ref, int initial_obsolete, 63 struct net_device *dev, int initial_ref, int initial_obsolete,
63 unsigned short flags) 64 unsigned short flags)
64{ 65{
65 dst->child = NULL;
66 dst->dev = dev; 66 dst->dev = dev;
67 if (dev) 67 if (dev)
68 dev_hold(dev); 68 dev_hold(dev);
69 dst->ops = ops; 69 dst->ops = ops;
70 dst_init_metrics(dst, dst_default_metrics.metrics, true); 70 dst_init_metrics(dst, dst_default_metrics.metrics, true);
71 dst->expires = 0UL; 71 dst->expires = 0UL;
72 dst->path = dst;
73 dst->from = NULL;
74#ifdef CONFIG_XFRM 72#ifdef CONFIG_XFRM
75 dst->xfrm = NULL; 73 dst->xfrm = NULL;
76#endif 74#endif
@@ -88,7 +86,6 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
88 dst->__use = 0; 86 dst->__use = 0;
89 dst->lastuse = jiffies; 87 dst->lastuse = jiffies;
90 dst->flags = flags; 88 dst->flags = flags;
91 dst->next = NULL;
92 if (!(flags & DST_NOCOUNT)) 89 if (!(flags & DST_NOCOUNT))
93 dst_entries_add(ops, 1); 90 dst_entries_add(ops, 1);
94} 91}
@@ -116,12 +113,17 @@ EXPORT_SYMBOL(dst_alloc);
116 113
117struct dst_entry *dst_destroy(struct dst_entry * dst) 114struct dst_entry *dst_destroy(struct dst_entry * dst)
118{ 115{
119 struct dst_entry *child; 116 struct dst_entry *child = NULL;
120 117
121 smp_rmb(); 118 smp_rmb();
122 119
123 child = dst->child; 120#ifdef CONFIG_XFRM
121 if (dst->xfrm) {
122 struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
124 123
124 child = xdst->child;
125 }
126#endif
125 if (!(dst->flags & DST_NOCOUNT)) 127 if (!(dst->flags & DST_NOCOUNT))
126 dst_entries_add(dst->ops, -1); 128 dst_entries_add(dst->ops, -1);
127 129
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 8225416911ae..494e6a5d7306 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -73,6 +73,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
73 [NETIF_F_LLTX_BIT] = "tx-lockless", 73 [NETIF_F_LLTX_BIT] = "tx-lockless",
74 [NETIF_F_NETNS_LOCAL_BIT] = "netns-local", 74 [NETIF_F_NETNS_LOCAL_BIT] = "netns-local",
75 [NETIF_F_GRO_BIT] = "rx-gro", 75 [NETIF_F_GRO_BIT] = "rx-gro",
76 [NETIF_F_GRO_HW_BIT] = "rx-gro-hw",
76 [NETIF_F_LRO_BIT] = "rx-lro", 77 [NETIF_F_LRO_BIT] = "rx-lro",
77 78
78 [NETIF_F_TSO_BIT] = "tx-tcp-segmentation", 79 [NETIF_F_TSO_BIT] = "tx-tcp-segmentation",
@@ -615,18 +616,15 @@ static int load_link_ksettings_from_user(struct ethtool_link_ksettings *to,
615 return -EFAULT; 616 return -EFAULT;
616 617
617 memcpy(&to->base, &link_usettings.base, sizeof(to->base)); 618 memcpy(&to->base, &link_usettings.base, sizeof(to->base));
618 bitmap_from_u32array(to->link_modes.supported, 619 bitmap_from_arr32(to->link_modes.supported,
619 __ETHTOOL_LINK_MODE_MASK_NBITS, 620 link_usettings.link_modes.supported,
620 link_usettings.link_modes.supported, 621 __ETHTOOL_LINK_MODE_MASK_NBITS);
621 __ETHTOOL_LINK_MODE_MASK_NU32); 622 bitmap_from_arr32(to->link_modes.advertising,
622 bitmap_from_u32array(to->link_modes.advertising, 623 link_usettings.link_modes.advertising,
623 __ETHTOOL_LINK_MODE_MASK_NBITS, 624 __ETHTOOL_LINK_MODE_MASK_NBITS);
624 link_usettings.link_modes.advertising, 625 bitmap_from_arr32(to->link_modes.lp_advertising,
625 __ETHTOOL_LINK_MODE_MASK_NU32); 626 link_usettings.link_modes.lp_advertising,
626 bitmap_from_u32array(to->link_modes.lp_advertising, 627 __ETHTOOL_LINK_MODE_MASK_NBITS);
627 __ETHTOOL_LINK_MODE_MASK_NBITS,
628 link_usettings.link_modes.lp_advertising,
629 __ETHTOOL_LINK_MODE_MASK_NU32);
630 628
631 return 0; 629 return 0;
632} 630}
@@ -642,18 +640,15 @@ store_link_ksettings_for_user(void __user *to,
642 struct ethtool_link_usettings link_usettings; 640 struct ethtool_link_usettings link_usettings;
643 641
644 memcpy(&link_usettings.base, &from->base, sizeof(link_usettings)); 642 memcpy(&link_usettings.base, &from->base, sizeof(link_usettings));
645 bitmap_to_u32array(link_usettings.link_modes.supported, 643 bitmap_to_arr32(link_usettings.link_modes.supported,
646 __ETHTOOL_LINK_MODE_MASK_NU32, 644 from->link_modes.supported,
647 from->link_modes.supported, 645 __ETHTOOL_LINK_MODE_MASK_NBITS);
648 __ETHTOOL_LINK_MODE_MASK_NBITS); 646 bitmap_to_arr32(link_usettings.link_modes.advertising,
649 bitmap_to_u32array(link_usettings.link_modes.advertising, 647 from->link_modes.advertising,
650 __ETHTOOL_LINK_MODE_MASK_NU32, 648 __ETHTOOL_LINK_MODE_MASK_NBITS);
651 from->link_modes.advertising, 649 bitmap_to_arr32(link_usettings.link_modes.lp_advertising,
652 __ETHTOOL_LINK_MODE_MASK_NBITS); 650 from->link_modes.lp_advertising,
653 bitmap_to_u32array(link_usettings.link_modes.lp_advertising, 651 __ETHTOOL_LINK_MODE_MASK_NBITS);
654 __ETHTOOL_LINK_MODE_MASK_NU32,
655 from->link_modes.lp_advertising,
656 __ETHTOOL_LINK_MODE_MASK_NBITS);
657 652
658 if (copy_to_user(to, &link_usettings, sizeof(link_usettings))) 653 if (copy_to_user(to, &link_usettings, sizeof(link_usettings)))
659 return -EFAULT; 654 return -EFAULT;
@@ -1692,14 +1687,23 @@ static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr)
1692 1687
1693static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) 1688static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr)
1694{ 1689{
1695 struct ethtool_ringparam ringparam; 1690 struct ethtool_ringparam ringparam, max = { .cmd = ETHTOOL_GRINGPARAM };
1696 1691
1697 if (!dev->ethtool_ops->set_ringparam) 1692 if (!dev->ethtool_ops->set_ringparam || !dev->ethtool_ops->get_ringparam)
1698 return -EOPNOTSUPP; 1693 return -EOPNOTSUPP;
1699 1694
1700 if (copy_from_user(&ringparam, useraddr, sizeof(ringparam))) 1695 if (copy_from_user(&ringparam, useraddr, sizeof(ringparam)))
1701 return -EFAULT; 1696 return -EFAULT;
1702 1697
1698 dev->ethtool_ops->get_ringparam(dev, &max);
1699
1700 /* ensure new ring parameters are within the maximums */
1701 if (ringparam.rx_pending > max.rx_max_pending ||
1702 ringparam.rx_mini_pending > max.rx_mini_max_pending ||
1703 ringparam.rx_jumbo_pending > max.rx_jumbo_max_pending ||
1704 ringparam.tx_pending > max.tx_max_pending)
1705 return -EINVAL;
1706
1703 return dev->ethtool_ops->set_ringparam(dev, &ringparam); 1707 return dev->ethtool_ops->set_ringparam(dev, &ringparam);
1704} 1708}
1705 1709
@@ -2348,10 +2352,8 @@ static int ethtool_get_per_queue_coalesce(struct net_device *dev,
2348 2352
2349 useraddr += sizeof(*per_queue_opt); 2353 useraddr += sizeof(*per_queue_opt);
2350 2354
2351 bitmap_from_u32array(queue_mask, 2355 bitmap_from_arr32(queue_mask, per_queue_opt->queue_mask,
2352 MAX_NUM_QUEUE, 2356 MAX_NUM_QUEUE);
2353 per_queue_opt->queue_mask,
2354 DIV_ROUND_UP(MAX_NUM_QUEUE, 32));
2355 2357
2356 for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) { 2358 for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) {
2357 struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; 2359 struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
@@ -2383,10 +2385,7 @@ static int ethtool_set_per_queue_coalesce(struct net_device *dev,
2383 2385
2384 useraddr += sizeof(*per_queue_opt); 2386 useraddr += sizeof(*per_queue_opt);
2385 2387
2386 bitmap_from_u32array(queue_mask, 2388 bitmap_from_arr32(queue_mask, per_queue_opt->queue_mask, MAX_NUM_QUEUE);
2387 MAX_NUM_QUEUE,
2388 per_queue_opt->queue_mask,
2389 DIV_ROUND_UP(MAX_NUM_QUEUE, 32));
2390 n_queue = bitmap_weight(queue_mask, MAX_NUM_QUEUE); 2389 n_queue = bitmap_weight(queue_mask, MAX_NUM_QUEUE);
2391 tmp = backup = kmalloc_array(n_queue, sizeof(*backup), GFP_KERNEL); 2390 tmp = backup = kmalloc_array(n_queue, sizeof(*backup), GFP_KERNEL);
2392 if (!backup) 2391 if (!backup)
diff --git a/net/core/filter.c b/net/core/filter.c
index 1c0eb436671f..08ab4c65a998 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -401,8 +401,8 @@ do_pass:
401 /* Classic BPF expects A and X to be reset first. These need 401 /* Classic BPF expects A and X to be reset first. These need
402 * to be guaranteed to be the first two instructions. 402 * to be guaranteed to be the first two instructions.
403 */ 403 */
404 *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); 404 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
405 *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X); 405 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
406 406
407 /* All programs must keep CTX in callee saved BPF_REG_CTX. 407 /* All programs must keep CTX in callee saved BPF_REG_CTX.
408 * In eBPF case it's done by the compiler, here we need to 408 * In eBPF case it's done by the compiler, here we need to
@@ -459,8 +459,15 @@ do_pass:
459 break; 459 break;
460 460
461 if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) || 461 if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
462 fp->code == (BPF_ALU | BPF_MOD | BPF_X)) 462 fp->code == (BPF_ALU | BPF_MOD | BPF_X)) {
463 *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X); 463 *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
464 /* Error with exception code on div/mod by 0.
465 * For cBPF programs, this was always return 0.
466 */
467 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2);
468 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
469 *insn++ = BPF_EXIT_INSN();
470 }
464 471
465 *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); 472 *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
466 break; 473 break;
@@ -2686,8 +2693,9 @@ static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
2686 return 0; 2693 return 0;
2687} 2694}
2688 2695
2689int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, 2696static int xdp_do_generic_redirect_map(struct net_device *dev,
2690 struct bpf_prog *xdp_prog) 2697 struct sk_buff *skb,
2698 struct bpf_prog *xdp_prog)
2691{ 2699{
2692 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 2700 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2693 unsigned long map_owner = ri->map_owner; 2701 unsigned long map_owner = ri->map_owner;
@@ -2864,7 +2872,7 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
2864 .arg2_type = ARG_CONST_MAP_PTR, 2872 .arg2_type = ARG_CONST_MAP_PTR,
2865 .arg3_type = ARG_ANYTHING, 2873 .arg3_type = ARG_ANYTHING,
2866 .arg4_type = ARG_PTR_TO_MEM, 2874 .arg4_type = ARG_PTR_TO_MEM,
2867 .arg5_type = ARG_CONST_SIZE, 2875 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
2868}; 2876};
2869 2877
2870static unsigned short bpf_tunnel_key_af(u64 flags) 2878static unsigned short bpf_tunnel_key_af(u64 flags)
@@ -3015,6 +3023,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
3015 info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE; 3023 info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
3016 if (flags & BPF_F_DONT_FRAGMENT) 3024 if (flags & BPF_F_DONT_FRAGMENT)
3017 info->key.tun_flags |= TUNNEL_DONT_FRAGMENT; 3025 info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
3026 if (flags & BPF_F_ZERO_CSUM_TX)
3027 info->key.tun_flags &= ~TUNNEL_CSUM;
3018 3028
3019 info->key.tun_id = cpu_to_be64(from->tunnel_id); 3029 info->key.tun_id = cpu_to_be64(from->tunnel_id);
3020 info->key.tos = from->tunnel_tos; 3030 info->key.tos = from->tunnel_tos;
@@ -3028,8 +3038,6 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
3028 IPV6_FLOWLABEL_MASK; 3038 IPV6_FLOWLABEL_MASK;
3029 } else { 3039 } else {
3030 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); 3040 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
3031 if (flags & BPF_F_ZERO_CSUM_TX)
3032 info->key.tun_flags &= ~TUNNEL_CSUM;
3033 } 3041 }
3034 3042
3035 return 0; 3043 return 0;
@@ -3153,7 +3161,7 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
3153 .arg2_type = ARG_CONST_MAP_PTR, 3161 .arg2_type = ARG_CONST_MAP_PTR,
3154 .arg3_type = ARG_ANYTHING, 3162 .arg3_type = ARG_ANYTHING,
3155 .arg4_type = ARG_PTR_TO_MEM, 3163 .arg4_type = ARG_PTR_TO_MEM,
3156 .arg5_type = ARG_CONST_SIZE, 3164 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
3157}; 3165};
3158 3166
3159BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb) 3167BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
@@ -3231,6 +3239,29 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
3231 ret = -EINVAL; 3239 ret = -EINVAL;
3232 } 3240 }
3233#ifdef CONFIG_INET 3241#ifdef CONFIG_INET
3242#if IS_ENABLED(CONFIG_IPV6)
3243 } else if (level == SOL_IPV6) {
3244 if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
3245 return -EINVAL;
3246
3247 val = *((int *)optval);
3248 /* Only some options are supported */
3249 switch (optname) {
3250 case IPV6_TCLASS:
3251 if (val < -1 || val > 0xff) {
3252 ret = -EINVAL;
3253 } else {
3254 struct ipv6_pinfo *np = inet6_sk(sk);
3255
3256 if (val == -1)
3257 val = 0;
3258 np->tclass = val;
3259 }
3260 break;
3261 default:
3262 ret = -EINVAL;
3263 }
3264#endif
3234 } else if (level == SOL_TCP && 3265 } else if (level == SOL_TCP &&
3235 sk->sk_prot->setsockopt == tcp_setsockopt) { 3266 sk->sk_prot->setsockopt == tcp_setsockopt) {
3236 if (optname == TCP_CONGESTION) { 3267 if (optname == TCP_CONGESTION) {
@@ -3240,7 +3271,8 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
3240 strncpy(name, optval, min_t(long, optlen, 3271 strncpy(name, optval, min_t(long, optlen,
3241 TCP_CA_NAME_MAX-1)); 3272 TCP_CA_NAME_MAX-1));
3242 name[TCP_CA_NAME_MAX-1] = 0; 3273 name[TCP_CA_NAME_MAX-1] = 0;
3243 ret = tcp_set_congestion_control(sk, name, false, reinit); 3274 ret = tcp_set_congestion_control(sk, name, false,
3275 reinit);
3244 } else { 3276 } else {
3245 struct tcp_sock *tp = tcp_sk(sk); 3277 struct tcp_sock *tp = tcp_sk(sk);
3246 3278
@@ -3306,6 +3338,22 @@ BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
3306 } else { 3338 } else {
3307 goto err_clear; 3339 goto err_clear;
3308 } 3340 }
3341#if IS_ENABLED(CONFIG_IPV6)
3342 } else if (level == SOL_IPV6) {
3343 struct ipv6_pinfo *np = inet6_sk(sk);
3344
3345 if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
3346 goto err_clear;
3347
3348 /* Only some options are supported */
3349 switch (optname) {
3350 case IPV6_TCLASS:
3351 *((int *)optval) = (int)np->tclass;
3352 break;
3353 default:
3354 goto err_clear;
3355 }
3356#endif
3309 } else { 3357 } else {
3310 goto err_clear; 3358 goto err_clear;
3311 } 3359 }
@@ -3327,6 +3375,33 @@ static const struct bpf_func_proto bpf_getsockopt_proto = {
3327 .arg5_type = ARG_CONST_SIZE, 3375 .arg5_type = ARG_CONST_SIZE,
3328}; 3376};
3329 3377
3378BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
3379 int, argval)
3380{
3381 struct sock *sk = bpf_sock->sk;
3382 int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
3383
3384 if (!sk_fullsock(sk))
3385 return -EINVAL;
3386
3387#ifdef CONFIG_INET
3388 if (val)
3389 tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
3390
3391 return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
3392#else
3393 return -EINVAL;
3394#endif
3395}
3396
3397static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
3398 .func = bpf_sock_ops_cb_flags_set,
3399 .gpl_only = false,
3400 .ret_type = RET_INTEGER,
3401 .arg1_type = ARG_PTR_TO_CTX,
3402 .arg2_type = ARG_ANYTHING,
3403};
3404
3330static const struct bpf_func_proto * 3405static const struct bpf_func_proto *
3331bpf_base_func_proto(enum bpf_func_id func_id) 3406bpf_base_func_proto(enum bpf_func_id func_id)
3332{ 3407{
@@ -3459,6 +3534,8 @@ xdp_func_proto(enum bpf_func_id func_id)
3459 return &bpf_xdp_event_output_proto; 3534 return &bpf_xdp_event_output_proto;
3460 case BPF_FUNC_get_smp_processor_id: 3535 case BPF_FUNC_get_smp_processor_id:
3461 return &bpf_get_smp_processor_id_proto; 3536 return &bpf_get_smp_processor_id_proto;
3537 case BPF_FUNC_csum_diff:
3538 return &bpf_csum_diff_proto;
3462 case BPF_FUNC_xdp_adjust_head: 3539 case BPF_FUNC_xdp_adjust_head:
3463 return &bpf_xdp_adjust_head_proto; 3540 return &bpf_xdp_adjust_head_proto;
3464 case BPF_FUNC_xdp_adjust_meta: 3541 case BPF_FUNC_xdp_adjust_meta:
@@ -3507,6 +3584,8 @@ static const struct bpf_func_proto *
3507 return &bpf_setsockopt_proto; 3584 return &bpf_setsockopt_proto;
3508 case BPF_FUNC_getsockopt: 3585 case BPF_FUNC_getsockopt:
3509 return &bpf_getsockopt_proto; 3586 return &bpf_getsockopt_proto;
3587 case BPF_FUNC_sock_ops_cb_flags_set:
3588 return &bpf_sock_ops_cb_flags_set_proto;
3510 case BPF_FUNC_sock_map_update: 3589 case BPF_FUNC_sock_map_update:
3511 return &bpf_sock_map_update_proto; 3590 return &bpf_sock_map_update_proto;
3512 default: 3591 default:
@@ -3823,34 +3902,44 @@ void bpf_warn_invalid_xdp_action(u32 act)
3823} 3902}
3824EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); 3903EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
3825 3904
3826static bool __is_valid_sock_ops_access(int off, int size) 3905static bool sock_ops_is_valid_access(int off, int size,
3906 enum bpf_access_type type,
3907 struct bpf_insn_access_aux *info)
3827{ 3908{
3909 const int size_default = sizeof(__u32);
3910
3828 if (off < 0 || off >= sizeof(struct bpf_sock_ops)) 3911 if (off < 0 || off >= sizeof(struct bpf_sock_ops))
3829 return false; 3912 return false;
3913
3830 /* The verifier guarantees that size > 0. */ 3914 /* The verifier guarantees that size > 0. */
3831 if (off % size != 0) 3915 if (off % size != 0)
3832 return false; 3916 return false;
3833 if (size != sizeof(__u32))
3834 return false;
3835 3917
3836 return true;
3837}
3838
3839static bool sock_ops_is_valid_access(int off, int size,
3840 enum bpf_access_type type,
3841 struct bpf_insn_access_aux *info)
3842{
3843 if (type == BPF_WRITE) { 3918 if (type == BPF_WRITE) {
3844 switch (off) { 3919 switch (off) {
3845 case offsetof(struct bpf_sock_ops, op) ... 3920 case offsetof(struct bpf_sock_ops, reply):
3846 offsetof(struct bpf_sock_ops, replylong[3]): 3921 case offsetof(struct bpf_sock_ops, sk_txhash):
3922 if (size != size_default)
3923 return false;
3847 break; 3924 break;
3848 default: 3925 default:
3849 return false; 3926 return false;
3850 } 3927 }
3928 } else {
3929 switch (off) {
3930 case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
3931 bytes_acked):
3932 if (size != sizeof(__u64))
3933 return false;
3934 break;
3935 default:
3936 if (size != size_default)
3937 return false;
3938 break;
3939 }
3851 } 3940 }
3852 3941
3853 return __is_valid_sock_ops_access(off, size); 3942 return true;
3854} 3943}
3855 3944
3856static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write, 3945static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
@@ -4305,6 +4394,24 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
4305 si->dst_reg, si->src_reg, 4394 si->dst_reg, si->src_reg,
4306 offsetof(struct xdp_buff, data_end)); 4395 offsetof(struct xdp_buff, data_end));
4307 break; 4396 break;
4397 case offsetof(struct xdp_md, ingress_ifindex):
4398 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
4399 si->dst_reg, si->src_reg,
4400 offsetof(struct xdp_buff, rxq));
4401 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
4402 si->dst_reg, si->dst_reg,
4403 offsetof(struct xdp_rxq_info, dev));
4404 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4405 offsetof(struct net_device, ifindex));
4406 break;
4407 case offsetof(struct xdp_md, rx_queue_index):
4408 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
4409 si->dst_reg, si->src_reg,
4410 offsetof(struct xdp_buff, rxq));
4411 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4412 offsetof(struct xdp_rxq_info,
4413 queue_index));
4414 break;
4308 } 4415 }
4309 4416
4310 return insn - insn_buf; 4417 return insn - insn_buf;
@@ -4439,6 +4546,211 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
4439 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 4546 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
4440 offsetof(struct sock_common, skc_num)); 4547 offsetof(struct sock_common, skc_num));
4441 break; 4548 break;
4549
4550 case offsetof(struct bpf_sock_ops, is_fullsock):
4551 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
4552 struct bpf_sock_ops_kern,
4553 is_fullsock),
4554 si->dst_reg, si->src_reg,
4555 offsetof(struct bpf_sock_ops_kern,
4556 is_fullsock));
4557 break;
4558
4559 case offsetof(struct bpf_sock_ops, state):
4560 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1);
4561
4562 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
4563 struct bpf_sock_ops_kern, sk),
4564 si->dst_reg, si->src_reg,
4565 offsetof(struct bpf_sock_ops_kern, sk));
4566 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
4567 offsetof(struct sock_common, skc_state));
4568 break;
4569
4570 case offsetof(struct bpf_sock_ops, rtt_min):
4571 BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
4572 sizeof(struct minmax));
4573 BUILD_BUG_ON(sizeof(struct minmax) <
4574 sizeof(struct minmax_sample));
4575
4576 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
4577 struct bpf_sock_ops_kern, sk),
4578 si->dst_reg, si->src_reg,
4579 offsetof(struct bpf_sock_ops_kern, sk));
4580 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4581 offsetof(struct tcp_sock, rtt_min) +
4582 FIELD_SIZEOF(struct minmax_sample, t));
4583 break;
4584
4585/* Helper macro for adding read access to tcp_sock or sock fields. */
4586#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
4587 do { \
4588 BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
4589 FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
4590 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
4591 struct bpf_sock_ops_kern, \
4592 is_fullsock), \
4593 si->dst_reg, si->src_reg, \
4594 offsetof(struct bpf_sock_ops_kern, \
4595 is_fullsock)); \
4596 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \
4597 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
4598 struct bpf_sock_ops_kern, sk),\
4599 si->dst_reg, si->src_reg, \
4600 offsetof(struct bpf_sock_ops_kern, sk));\
4601 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \
4602 OBJ_FIELD), \
4603 si->dst_reg, si->dst_reg, \
4604 offsetof(OBJ, OBJ_FIELD)); \
4605 } while (0)
4606
4607/* Helper macro for adding write access to tcp_sock or sock fields.
4608 * The macro is called with two registers, dst_reg which contains a pointer
4609 * to ctx (context) and src_reg which contains the value that should be
4610 * stored. However, we need an additional register since we cannot overwrite
4611 * dst_reg because it may be used later in the program.
4612 * Instead we "borrow" one of the other register. We first save its value
4613 * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
4614 * it at the end of the macro.
4615 */
4616#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
4617 do { \
4618 int reg = BPF_REG_9; \
4619 BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
4620 FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
4621 if (si->dst_reg == reg || si->src_reg == reg) \
4622 reg--; \
4623 if (si->dst_reg == reg || si->src_reg == reg) \
4624 reg--; \
4625 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \
4626 offsetof(struct bpf_sock_ops_kern, \
4627 temp)); \
4628 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
4629 struct bpf_sock_ops_kern, \
4630 is_fullsock), \
4631 reg, si->dst_reg, \
4632 offsetof(struct bpf_sock_ops_kern, \
4633 is_fullsock)); \
4634 *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
4635 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
4636 struct bpf_sock_ops_kern, sk),\
4637 reg, si->dst_reg, \
4638 offsetof(struct bpf_sock_ops_kern, sk));\
4639 *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
4640 reg, si->src_reg, \
4641 offsetof(OBJ, OBJ_FIELD)); \
4642 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
4643 offsetof(struct bpf_sock_ops_kern, \
4644 temp)); \
4645 } while (0)
4646
4647#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \
4648 do { \
4649 if (TYPE == BPF_WRITE) \
4650 SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
4651 else \
4652 SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
4653 } while (0)
4654
4655 case offsetof(struct bpf_sock_ops, snd_cwnd):
4656 SOCK_OPS_GET_FIELD(snd_cwnd, snd_cwnd, struct tcp_sock);
4657 break;
4658
4659 case offsetof(struct bpf_sock_ops, srtt_us):
4660 SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock);
4661 break;
4662
4663 case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
4664 SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
4665 struct tcp_sock);
4666 break;
4667
4668 case offsetof(struct bpf_sock_ops, snd_ssthresh):
4669 SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock);
4670 break;
4671
4672 case offsetof(struct bpf_sock_ops, rcv_nxt):
4673 SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock);
4674 break;
4675
4676 case offsetof(struct bpf_sock_ops, snd_nxt):
4677 SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock);
4678 break;
4679
4680 case offsetof(struct bpf_sock_ops, snd_una):
4681 SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock);
4682 break;
4683
4684 case offsetof(struct bpf_sock_ops, mss_cache):
4685 SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock);
4686 break;
4687
4688 case offsetof(struct bpf_sock_ops, ecn_flags):
4689 SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock);
4690 break;
4691
4692 case offsetof(struct bpf_sock_ops, rate_delivered):
4693 SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered,
4694 struct tcp_sock);
4695 break;
4696
4697 case offsetof(struct bpf_sock_ops, rate_interval_us):
4698 SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us,
4699 struct tcp_sock);
4700 break;
4701
4702 case offsetof(struct bpf_sock_ops, packets_out):
4703 SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock);
4704 break;
4705
4706 case offsetof(struct bpf_sock_ops, retrans_out):
4707 SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock);
4708 break;
4709
4710 case offsetof(struct bpf_sock_ops, total_retrans):
4711 SOCK_OPS_GET_FIELD(total_retrans, total_retrans,
4712 struct tcp_sock);
4713 break;
4714
4715 case offsetof(struct bpf_sock_ops, segs_in):
4716 SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock);
4717 break;
4718
4719 case offsetof(struct bpf_sock_ops, data_segs_in):
4720 SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock);
4721 break;
4722
4723 case offsetof(struct bpf_sock_ops, segs_out):
4724 SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock);
4725 break;
4726
4727 case offsetof(struct bpf_sock_ops, data_segs_out):
4728 SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out,
4729 struct tcp_sock);
4730 break;
4731
4732 case offsetof(struct bpf_sock_ops, lost_out):
4733 SOCK_OPS_GET_FIELD(lost_out, lost_out, struct tcp_sock);
4734 break;
4735
4736 case offsetof(struct bpf_sock_ops, sacked_out):
4737 SOCK_OPS_GET_FIELD(sacked_out, sacked_out, struct tcp_sock);
4738 break;
4739
4740 case offsetof(struct bpf_sock_ops, sk_txhash):
4741 SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
4742 struct sock, type);
4743 break;
4744
4745 case offsetof(struct bpf_sock_ops, bytes_received):
4746 SOCK_OPS_GET_FIELD(bytes_received, bytes_received,
4747 struct tcp_sock);
4748 break;
4749
4750 case offsetof(struct bpf_sock_ops, bytes_acked):
4751 SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock);
4752 break;
4753
4442 } 4754 }
4443 return insn - insn_buf; 4755 return insn - insn_buf;
4444} 4756}
@@ -4475,6 +4787,7 @@ const struct bpf_verifier_ops sk_filter_verifier_ops = {
4475}; 4787};
4476 4788
4477const struct bpf_prog_ops sk_filter_prog_ops = { 4789const struct bpf_prog_ops sk_filter_prog_ops = {
4790 .test_run = bpf_prog_test_run_skb,
4478}; 4791};
4479 4792
4480const struct bpf_verifier_ops tc_cls_act_verifier_ops = { 4793const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 544bddf08e13..559db9ea8d86 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -24,6 +24,7 @@
24#include <linux/tcp.h> 24#include <linux/tcp.h>
25#include <net/flow_dissector.h> 25#include <net/flow_dissector.h>
26#include <scsi/fc/fc_fcoe.h> 26#include <scsi/fc/fc_fcoe.h>
27#include <uapi/linux/batadv_packet.h>
27 28
28static void dissector_set_key(struct flow_dissector *flow_dissector, 29static void dissector_set_key(struct flow_dissector *flow_dissector,
29 enum flow_dissector_key_id key_id) 30 enum flow_dissector_key_id key_id)
@@ -133,10 +134,10 @@ skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
133 ctrl->addr_type = type; 134 ctrl->addr_type = type;
134} 135}
135 136
136static void 137void
137__skb_flow_dissect_tunnel_info(const struct sk_buff *skb, 138skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
138 struct flow_dissector *flow_dissector, 139 struct flow_dissector *flow_dissector,
139 void *target_container) 140 void *target_container)
140{ 141{
141 struct ip_tunnel_info *info; 142 struct ip_tunnel_info *info;
142 struct ip_tunnel_key *key; 143 struct ip_tunnel_key *key;
@@ -212,6 +213,7 @@ __skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
212 tp->dst = key->tp_dst; 213 tp->dst = key->tp_dst;
213 } 214 }
214} 215}
216EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
215 217
216static enum flow_dissect_ret 218static enum flow_dissect_ret
217__skb_flow_dissect_mpls(const struct sk_buff *skb, 219__skb_flow_dissect_mpls(const struct sk_buff *skb,
@@ -436,6 +438,57 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
436 return FLOW_DISSECT_RET_PROTO_AGAIN; 438 return FLOW_DISSECT_RET_PROTO_AGAIN;
437} 439}
438 440
441/**
442 * __skb_flow_dissect_batadv() - dissect batman-adv header
443 * @skb: sk_buff to with the batman-adv header
444 * @key_control: flow dissectors control key
445 * @data: raw buffer pointer to the packet, if NULL use skb->data
446 * @p_proto: pointer used to update the protocol to process next
447 * @p_nhoff: pointer used to update inner network header offset
448 * @hlen: packet header length
449 * @flags: any combination of FLOW_DISSECTOR_F_*
450 *
451 * ETH_P_BATMAN packets are tried to be dissected. Only
452 * &struct batadv_unicast packets are actually processed because they contain an
453 * inner ethernet header and are usually followed by actual network header. This
454 * allows the flow dissector to continue processing the packet.
455 *
456 * Return: FLOW_DISSECT_RET_PROTO_AGAIN when &struct batadv_unicast was found,
457 * FLOW_DISSECT_RET_OUT_GOOD when dissector should stop after encapsulation,
458 * otherwise FLOW_DISSECT_RET_OUT_BAD
459 */
460static enum flow_dissect_ret
461__skb_flow_dissect_batadv(const struct sk_buff *skb,
462 struct flow_dissector_key_control *key_control,
463 void *data, __be16 *p_proto, int *p_nhoff, int hlen,
464 unsigned int flags)
465{
466 struct {
467 struct batadv_unicast_packet batadv_unicast;
468 struct ethhdr eth;
469 } *hdr, _hdr;
470
471 hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr), data, hlen,
472 &_hdr);
473 if (!hdr)
474 return FLOW_DISSECT_RET_OUT_BAD;
475
476 if (hdr->batadv_unicast.version != BATADV_COMPAT_VERSION)
477 return FLOW_DISSECT_RET_OUT_BAD;
478
479 if (hdr->batadv_unicast.packet_type != BATADV_UNICAST)
480 return FLOW_DISSECT_RET_OUT_BAD;
481
482 *p_proto = hdr->eth.h_proto;
483 *p_nhoff += sizeof(*hdr);
484
485 key_control->flags |= FLOW_DIS_ENCAPSULATION;
486 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
487 return FLOW_DISSECT_RET_OUT_GOOD;
488
489 return FLOW_DISSECT_RET_PROTO_AGAIN;
490}
491
439static void 492static void
440__skb_flow_dissect_tcp(const struct sk_buff *skb, 493__skb_flow_dissect_tcp(const struct sk_buff *skb,
441 struct flow_dissector *flow_dissector, 494 struct flow_dissector *flow_dissector,
@@ -576,9 +629,6 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
576 FLOW_DISSECTOR_KEY_BASIC, 629 FLOW_DISSECTOR_KEY_BASIC,
577 target_container); 630 target_container);
578 631
579 __skb_flow_dissect_tunnel_info(skb, flow_dissector,
580 target_container);
581
582 if (dissector_uses_key(flow_dissector, 632 if (dissector_uses_key(flow_dissector,
583 FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 633 FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
584 struct ethhdr *eth = eth_hdr(skb); 634 struct ethhdr *eth = eth_hdr(skb);
@@ -817,6 +867,11 @@ proto_again:
817 nhoff, hlen); 867 nhoff, hlen);
818 break; 868 break;
819 869
870 case htons(ETH_P_BATMAN):
871 fdret = __skb_flow_dissect_batadv(skb, key_control, data,
872 &proto, &nhoff, hlen, flags);
873 break;
874
820 default: 875 default:
821 fdret = FLOW_DISSECT_RET_OUT_BAD; 876 fdret = FLOW_DISSECT_RET_OUT_BAD;
822 break; 877 break;
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 9834cfa21b21..0a3f88f08727 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -159,7 +159,11 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
159 est->intvl_log = intvl_log; 159 est->intvl_log = intvl_log;
160 est->cpu_bstats = cpu_bstats; 160 est->cpu_bstats = cpu_bstats;
161 161
162 if (stats_lock)
163 local_bh_disable();
162 est_fetch_counters(est, &b); 164 est_fetch_counters(est, &b);
165 if (stats_lock)
166 local_bh_enable();
163 est->last_bytes = b.bytes; 167 est->last_bytes = b.bytes;
164 est->last_packets = b.packets; 168 est->last_packets = b.packets;
165 old = rcu_dereference_protected(*rate_est, 1); 169 old = rcu_dereference_protected(*rate_est, 1);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 87f28557b329..b2b2323bdc84 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -252,10 +252,10 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
252 } 252 }
253} 253}
254 254
255static void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, 255void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
256 const struct gnet_stats_queue __percpu *cpu, 256 const struct gnet_stats_queue __percpu *cpu,
257 const struct gnet_stats_queue *q, 257 const struct gnet_stats_queue *q,
258 __u32 qlen) 258 __u32 qlen)
259{ 259{
260 if (cpu) { 260 if (cpu) {
261 __gnet_stats_copy_queue_cpu(qstats, cpu); 261 __gnet_stats_copy_queue_cpu(qstats, cpu);
@@ -269,6 +269,7 @@ static void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
269 269
270 qstats->qlen = qlen; 270 qstats->qlen = qlen;
271} 271}
272EXPORT_SYMBOL(__gnet_stats_copy_queue);
272 273
273/** 274/**
274 * gnet_stats_copy_queue - copy queue statistics into statistics TLV 275 * gnet_stats_copy_queue - copy queue statistics into statistics TLV
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 982861607f88..e38e641e98d5 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -92,7 +92,7 @@ static bool linkwatch_urgent_event(struct net_device *dev)
92 if (dev->ifindex != dev_get_iflink(dev)) 92 if (dev->ifindex != dev_get_iflink(dev))
93 return true; 93 return true;
94 94
95 if (dev->priv_flags & IFF_TEAM_PORT) 95 if (netif_is_lag_port(dev) || netif_is_lag_master(dev))
96 return true; 96 return true;
97 97
98 return netif_carrier_ok(dev) && qdisc_tx_changing(dev); 98 return netif_carrier_ok(dev) && qdisc_tx_changing(dev);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 7f831711b6e0..7b7a14abba28 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2862,7 +2862,6 @@ static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2862}; 2862};
2863 2863
2864static const struct file_operations neigh_stat_seq_fops = { 2864static const struct file_operations neigh_stat_seq_fops = {
2865 .owner = THIS_MODULE,
2866 .open = neigh_stat_seq_open, 2865 .open = neigh_stat_seq_open,
2867 .read = seq_read, 2866 .read = seq_read,
2868 .llseek = seq_lseek, 2867 .llseek = seq_lseek,
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 615ccab55f38..e010bb800d7b 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -182,7 +182,6 @@ static int dev_seq_open(struct inode *inode, struct file *file)
182} 182}
183 183
184static const struct file_operations dev_seq_fops = { 184static const struct file_operations dev_seq_fops = {
185 .owner = THIS_MODULE,
186 .open = dev_seq_open, 185 .open = dev_seq_open,
187 .read = seq_read, 186 .read = seq_read,
188 .llseek = seq_lseek, 187 .llseek = seq_lseek,
@@ -202,7 +201,6 @@ static int softnet_seq_open(struct inode *inode, struct file *file)
202} 201}
203 202
204static const struct file_operations softnet_seq_fops = { 203static const struct file_operations softnet_seq_fops = {
205 .owner = THIS_MODULE,
206 .open = softnet_seq_open, 204 .open = softnet_seq_open,
207 .read = seq_read, 205 .read = seq_read,
208 .llseek = seq_lseek, 206 .llseek = seq_lseek,
@@ -306,7 +304,6 @@ static int ptype_seq_open(struct inode *inode, struct file *file)
306} 304}
307 305
308static const struct file_operations ptype_seq_fops = { 306static const struct file_operations ptype_seq_fops = {
309 .owner = THIS_MODULE,
310 .open = ptype_seq_open, 307 .open = ptype_seq_open,
311 .read = seq_read, 308 .read = seq_read,
312 .llseek = seq_lseek, 309 .llseek = seq_lseek,
@@ -387,7 +384,6 @@ static int dev_mc_seq_open(struct inode *inode, struct file *file)
387} 384}
388 385
389static const struct file_operations dev_mc_seq_fops = { 386static const struct file_operations dev_mc_seq_fops = {
390 .owner = THIS_MODULE,
391 .open = dev_mc_seq_open, 387 .open = dev_mc_seq_open,
392 .read = seq_read, 388 .read = seq_read,
393 .llseek = seq_lseek, 389 .llseek = seq_lseek,
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 799b75268291..60a5ad2c33ee 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -295,10 +295,31 @@ static ssize_t carrier_changes_show(struct device *dev,
295 struct net_device *netdev = to_net_dev(dev); 295 struct net_device *netdev = to_net_dev(dev);
296 296
297 return sprintf(buf, fmt_dec, 297 return sprintf(buf, fmt_dec,
298 atomic_read(&netdev->carrier_changes)); 298 atomic_read(&netdev->carrier_up_count) +
299 atomic_read(&netdev->carrier_down_count));
299} 300}
300static DEVICE_ATTR_RO(carrier_changes); 301static DEVICE_ATTR_RO(carrier_changes);
301 302
303static ssize_t carrier_up_count_show(struct device *dev,
304 struct device_attribute *attr,
305 char *buf)
306{
307 struct net_device *netdev = to_net_dev(dev);
308
309 return sprintf(buf, fmt_dec, atomic_read(&netdev->carrier_up_count));
310}
311static DEVICE_ATTR_RO(carrier_up_count);
312
313static ssize_t carrier_down_count_show(struct device *dev,
314 struct device_attribute *attr,
315 char *buf)
316{
317 struct net_device *netdev = to_net_dev(dev);
318
319 return sprintf(buf, fmt_dec, atomic_read(&netdev->carrier_down_count));
320}
321static DEVICE_ATTR_RO(carrier_down_count);
322
302/* read-write attributes */ 323/* read-write attributes */
303 324
304static int change_mtu(struct net_device *dev, unsigned long new_mtu) 325static int change_mtu(struct net_device *dev, unsigned long new_mtu)
@@ -325,29 +346,6 @@ static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
325} 346}
326NETDEVICE_SHOW_RW(flags, fmt_hex); 347NETDEVICE_SHOW_RW(flags, fmt_hex);
327 348
328static int change_tx_queue_len(struct net_device *dev, unsigned long new_len)
329{
330 unsigned int orig_len = dev->tx_queue_len;
331 int res;
332
333 if (new_len != (unsigned int)new_len)
334 return -ERANGE;
335
336 if (new_len != orig_len) {
337 dev->tx_queue_len = new_len;
338 res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
339 res = notifier_to_errno(res);
340 if (res) {
341 netdev_err(dev,
342 "refused to change device tx_queue_len\n");
343 dev->tx_queue_len = orig_len;
344 return -EFAULT;
345 }
346 }
347
348 return 0;
349}
350
351static ssize_t tx_queue_len_store(struct device *dev, 349static ssize_t tx_queue_len_store(struct device *dev,
352 struct device_attribute *attr, 350 struct device_attribute *attr,
353 const char *buf, size_t len) 351 const char *buf, size_t len)
@@ -355,7 +353,7 @@ static ssize_t tx_queue_len_store(struct device *dev,
355 if (!capable(CAP_NET_ADMIN)) 353 if (!capable(CAP_NET_ADMIN))
356 return -EPERM; 354 return -EPERM;
357 355
358 return netdev_store(dev, attr, buf, len, change_tx_queue_len); 356 return netdev_store(dev, attr, buf, len, dev_change_tx_queue_len);
359} 357}
360NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec); 358NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);
361 359
@@ -547,6 +545,8 @@ static struct attribute *net_class_attrs[] __ro_after_init = {
547 &dev_attr_phys_port_name.attr, 545 &dev_attr_phys_port_name.attr,
548 &dev_attr_phys_switch_id.attr, 546 &dev_attr_phys_switch_id.attr,
549 &dev_attr_proto_down.attr, 547 &dev_attr_proto_down.attr,
548 &dev_attr_carrier_up_count.attr,
549 &dev_attr_carrier_down_count.attr,
550 NULL, 550 NULL,
551}; 551};
552ATTRIBUTE_GROUPS(net_class); 552ATTRIBUTE_GROUPS(net_class);
@@ -961,7 +961,7 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
961 while (--i >= new_num) { 961 while (--i >= new_num) {
962 struct kobject *kobj = &dev->_rx[i].kobj; 962 struct kobject *kobj = &dev->_rx[i].kobj;
963 963
964 if (!atomic_read(&dev_net(dev)->count)) 964 if (!refcount_read(&dev_net(dev)->count))
965 kobj->uevent_suppress = 1; 965 kobj->uevent_suppress = 1;
966 if (dev->sysfs_rx_queue_group) 966 if (dev->sysfs_rx_queue_group)
967 sysfs_remove_group(kobj, dev->sysfs_rx_queue_group); 967 sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
@@ -1367,7 +1367,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
1367 while (--i >= new_num) { 1367 while (--i >= new_num) {
1368 struct netdev_queue *queue = dev->_tx + i; 1368 struct netdev_queue *queue = dev->_tx + i;
1369 1369
1370 if (!atomic_read(&dev_net(dev)->count)) 1370 if (!refcount_read(&dev_net(dev)->count))
1371 queue->kobj.uevent_suppress = 1; 1371 queue->kobj.uevent_suppress = 1;
1372#ifdef CONFIG_BQL 1372#ifdef CONFIG_BQL
1373 sysfs_remove_group(&queue->kobj, &dql_group); 1373 sysfs_remove_group(&queue->kobj, &dql_group);
@@ -1558,7 +1558,7 @@ void netdev_unregister_kobject(struct net_device *ndev)
1558{ 1558{
1559 struct device *dev = &ndev->dev; 1559 struct device *dev = &ndev->dev;
1560 1560
1561 if (!atomic_read(&dev_net(ndev)->count)) 1561 if (!refcount_read(&dev_net(ndev)->count))
1562 dev_set_uevent_suppress(dev, 1); 1562 dev_set_uevent_suppress(dev, 1);
1563 1563
1564 kobject_get(&dev->kobj); 1564 kobject_get(&dev->kobj);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 60a71be75aea..3cad5f51afd3 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -35,7 +35,7 @@ LIST_HEAD(net_namespace_list);
35EXPORT_SYMBOL_GPL(net_namespace_list); 35EXPORT_SYMBOL_GPL(net_namespace_list);
36 36
37struct net init_net = { 37struct net init_net = {
38 .count = ATOMIC_INIT(1), 38 .count = REFCOUNT_INIT(1),
39 .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), 39 .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
40}; 40};
41EXPORT_SYMBOL(init_net); 41EXPORT_SYMBOL(init_net);
@@ -221,17 +221,26 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id);
221 */ 221 */
222int peernet2id_alloc(struct net *net, struct net *peer) 222int peernet2id_alloc(struct net *net, struct net *peer)
223{ 223{
224 bool alloc; 224 bool alloc = false, alive = false;
225 int id; 225 int id;
226 226
227 if (atomic_read(&net->count) == 0) 227 if (refcount_read(&net->count) == 0)
228 return NETNSA_NSID_NOT_ASSIGNED; 228 return NETNSA_NSID_NOT_ASSIGNED;
229 spin_lock_bh(&net->nsid_lock); 229 spin_lock_bh(&net->nsid_lock);
230 alloc = atomic_read(&peer->count) == 0 ? false : true; 230 /*
231 * When peer is obtained from RCU lists, we may race with
232 * its cleanup. Check whether it's alive, and this guarantees
233 * we never hash a peer back to net->netns_ids, after it has
234 * just been idr_remove()'d from there in cleanup_net().
235 */
236 if (maybe_get_net(peer))
237 alive = alloc = true;
231 id = __peernet2id_alloc(net, peer, &alloc); 238 id = __peernet2id_alloc(net, peer, &alloc);
232 spin_unlock_bh(&net->nsid_lock); 239 spin_unlock_bh(&net->nsid_lock);
233 if (alloc && id >= 0) 240 if (alloc && id >= 0)
234 rtnl_net_notifyid(net, RTM_NEWNSID, id); 241 rtnl_net_notifyid(net, RTM_NEWNSID, id);
242 if (alive)
243 put_net(peer);
235 return id; 244 return id;
236} 245}
237EXPORT_SYMBOL_GPL(peernet2id_alloc); 246EXPORT_SYMBOL_GPL(peernet2id_alloc);
@@ -264,11 +273,9 @@ struct net *get_net_ns_by_id(struct net *net, int id)
264 return NULL; 273 return NULL;
265 274
266 rcu_read_lock(); 275 rcu_read_lock();
267 spin_lock_bh(&net->nsid_lock);
268 peer = idr_find(&net->netns_ids, id); 276 peer = idr_find(&net->netns_ids, id);
269 if (peer) 277 if (peer)
270 peer = maybe_get_net(peer); 278 peer = maybe_get_net(peer);
271 spin_unlock_bh(&net->nsid_lock);
272 rcu_read_unlock(); 279 rcu_read_unlock();
273 280
274 return peer; 281 return peer;
@@ -284,7 +291,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
284 int error = 0; 291 int error = 0;
285 LIST_HEAD(net_exit_list); 292 LIST_HEAD(net_exit_list);
286 293
287 atomic_set(&net->count, 1); 294 refcount_set(&net->count, 1);
288 refcount_set(&net->passive, 1); 295 refcount_set(&net->passive, 1);
289 net->dev_base_seq = 1; 296 net->dev_base_seq = 1;
290 net->user_ns = user_ns; 297 net->user_ns = user_ns;
@@ -432,13 +439,40 @@ struct net *copy_net_ns(unsigned long flags,
432 return net; 439 return net;
433} 440}
434 441
442static void unhash_nsid(struct net *net, struct net *last)
443{
444 struct net *tmp;
445 /* This function is only called from cleanup_net() work,
446 * and this work is the only process, that may delete
447 * a net from net_namespace_list. So, when the below
448 * is executing, the list may only grow. Thus, we do not
449 * use for_each_net_rcu() or rtnl_lock().
450 */
451 for_each_net(tmp) {
452 int id;
453
454 spin_lock_bh(&tmp->nsid_lock);
455 id = __peernet2id(tmp, net);
456 if (id >= 0)
457 idr_remove(&tmp->netns_ids, id);
458 spin_unlock_bh(&tmp->nsid_lock);
459 if (id >= 0)
460 rtnl_net_notifyid(tmp, RTM_DELNSID, id);
461 if (tmp == last)
462 break;
463 }
464 spin_lock_bh(&net->nsid_lock);
465 idr_destroy(&net->netns_ids);
466 spin_unlock_bh(&net->nsid_lock);
467}
468
435static DEFINE_SPINLOCK(cleanup_list_lock); 469static DEFINE_SPINLOCK(cleanup_list_lock);
436static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ 470static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
437 471
438static void cleanup_net(struct work_struct *work) 472static void cleanup_net(struct work_struct *work)
439{ 473{
440 const struct pernet_operations *ops; 474 const struct pernet_operations *ops;
441 struct net *net, *tmp; 475 struct net *net, *tmp, *last;
442 struct list_head net_kill_list; 476 struct list_head net_kill_list;
443 LIST_HEAD(net_exit_list); 477 LIST_HEAD(net_exit_list);
444 478
@@ -451,26 +485,25 @@ static void cleanup_net(struct work_struct *work)
451 485
452 /* Don't let anyone else find us. */ 486 /* Don't let anyone else find us. */
453 rtnl_lock(); 487 rtnl_lock();
454 list_for_each_entry(net, &net_kill_list, cleanup_list) { 488 list_for_each_entry(net, &net_kill_list, cleanup_list)
455 list_del_rcu(&net->list); 489 list_del_rcu(&net->list);
456 list_add_tail(&net->exit_list, &net_exit_list); 490 /* Cache last net. After we unlock rtnl, no one new net
457 for_each_net(tmp) { 491 * added to net_namespace_list can assign nsid pointer
458 int id; 492 * to a net from net_kill_list (see peernet2id_alloc()).
459 493 * So, we skip them in unhash_nsid().
460 spin_lock_bh(&tmp->nsid_lock); 494 *
461 id = __peernet2id(tmp, net); 495 * Note, that unhash_nsid() does not delete nsid links
462 if (id >= 0) 496 * between net_kill_list's nets, as they've already
463 idr_remove(&tmp->netns_ids, id); 497 * deleted from net_namespace_list. But, this would be
464 spin_unlock_bh(&tmp->nsid_lock); 498 * useless anyway, as netns_ids are destroyed there.
465 if (id >= 0) 499 */
466 rtnl_net_notifyid(tmp, RTM_DELNSID, id); 500 last = list_last_entry(&net_namespace_list, struct net, list);
467 } 501 rtnl_unlock();
468 spin_lock_bh(&net->nsid_lock);
469 idr_destroy(&net->netns_ids);
470 spin_unlock_bh(&net->nsid_lock);
471 502
503 list_for_each_entry(net, &net_kill_list, cleanup_list) {
504 unhash_nsid(net, last);
505 list_add_tail(&net->exit_list, &net_exit_list);
472 } 506 }
473 rtnl_unlock();
474 507
475 /* 508 /*
476 * Another CPU might be rcu-iterating the list, wait for it. 509 * Another CPU might be rcu-iterating the list, wait for it.
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index f95a15086225..b8ab5c829511 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -184,25 +184,44 @@
184 184
185#define func_enter() pr_debug("entering %s\n", __func__); 185#define func_enter() pr_debug("entering %s\n", __func__);
186 186
187#define PKT_FLAGS \
188 pf(IPV6) /* Interface in IPV6 Mode */ \
189 pf(IPSRC_RND) /* IP-Src Random */ \
190 pf(IPDST_RND) /* IP-Dst Random */ \
191 pf(TXSIZE_RND) /* Transmit size is random */ \
192 pf(UDPSRC_RND) /* UDP-Src Random */ \
193 pf(UDPDST_RND) /* UDP-Dst Random */ \
194 pf(UDPCSUM) /* Include UDP checksum */ \
195 pf(NO_TIMESTAMP) /* Don't timestamp packets (default TS) */ \
196 pf(MPLS_RND) /* Random MPLS labels */ \
197 pf(QUEUE_MAP_RND) /* queue map Random */ \
198 pf(QUEUE_MAP_CPU) /* queue map mirrors smp_processor_id() */ \
199 pf(FLOW_SEQ) /* Sequential flows */ \
200 pf(IPSEC) /* ipsec on for flows */ \
201 pf(MACSRC_RND) /* MAC-Src Random */ \
202 pf(MACDST_RND) /* MAC-Dst Random */ \
203 pf(VID_RND) /* Random VLAN ID */ \
204 pf(SVID_RND) /* Random SVLAN ID */ \
205 pf(NODE) /* Node memory alloc*/ \
206
207#define pf(flag) flag##_SHIFT,
208enum pkt_flags {
209 PKT_FLAGS
210};
211#undef pf
212
187/* Device flag bits */ 213/* Device flag bits */
188#define F_IPSRC_RND (1<<0) /* IP-Src Random */ 214#define pf(flag) static const __u32 F_##flag = (1<<flag##_SHIFT);
189#define F_IPDST_RND (1<<1) /* IP-Dst Random */ 215PKT_FLAGS
190#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */ 216#undef pf
191#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */ 217
192#define F_MACSRC_RND (1<<4) /* MAC-Src Random */ 218#define pf(flag) __stringify(flag),
193#define F_MACDST_RND (1<<5) /* MAC-Dst Random */ 219static char *pkt_flag_names[] = {
194#define F_TXSIZE_RND (1<<6) /* Transmit size is random */ 220 PKT_FLAGS
195#define F_IPV6 (1<<7) /* Interface in IPV6 Mode */ 221};
196#define F_MPLS_RND (1<<8) /* Random MPLS labels */ 222#undef pf
197#define F_VID_RND (1<<9) /* Random VLAN ID */ 223
198#define F_SVID_RND (1<<10) /* Random SVLAN ID */ 224#define NR_PKT_FLAGS ARRAY_SIZE(pkt_flag_names)
199#define F_FLOW_SEQ (1<<11) /* Sequential flows */
200#define F_IPSEC_ON (1<<12) /* ipsec on for flows */
201#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */
202#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
203#define F_NODE (1<<15) /* Node memory alloc*/
204#define F_UDPCSUM (1<<16) /* Include UDP checksum */
205#define F_NO_TIMESTAMP (1<<17) /* Don't timestamp packets (default TS) */
206 225
207/* Thread control flag bits */ 226/* Thread control flag bits */
208#define T_STOP (1<<0) /* Stop run */ 227#define T_STOP (1<<0) /* Stop run */
@@ -399,7 +418,7 @@ struct pktgen_dev {
399 __u8 ipsmode; /* IPSEC mode (config) */ 418 __u8 ipsmode; /* IPSEC mode (config) */
400 __u8 ipsproto; /* IPSEC type (config) */ 419 __u8 ipsproto; /* IPSEC type (config) */
401 __u32 spi; 420 __u32 spi;
402 struct dst_entry dst; 421 struct xfrm_dst xdst;
403 struct dst_ops dstops; 422 struct dst_ops dstops;
404#endif 423#endif
405 char result[512]; 424 char result[512];
@@ -523,7 +542,6 @@ static int pgctrl_open(struct inode *inode, struct file *file)
523} 542}
524 543
525static const struct file_operations pktgen_fops = { 544static const struct file_operations pktgen_fops = {
526 .owner = THIS_MODULE,
527 .open = pgctrl_open, 545 .open = pgctrl_open,
528 .read = seq_read, 546 .read = seq_read,
529 .llseek = seq_lseek, 547 .llseek = seq_lseek,
@@ -535,6 +553,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
535{ 553{
536 const struct pktgen_dev *pkt_dev = seq->private; 554 const struct pktgen_dev *pkt_dev = seq->private;
537 ktime_t stopped; 555 ktime_t stopped;
556 unsigned int i;
538 u64 idle; 557 u64 idle;
539 558
540 seq_printf(seq, 559 seq_printf(seq,
@@ -596,7 +615,6 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
596 pkt_dev->src_mac_count, pkt_dev->dst_mac_count); 615 pkt_dev->src_mac_count, pkt_dev->dst_mac_count);
597 616
598 if (pkt_dev->nr_labels) { 617 if (pkt_dev->nr_labels) {
599 unsigned int i;
600 seq_puts(seq, " mpls: "); 618 seq_puts(seq, " mpls: ");
601 for (i = 0; i < pkt_dev->nr_labels; i++) 619 for (i = 0; i < pkt_dev->nr_labels; i++)
602 seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]), 620 seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]),
@@ -632,68 +650,21 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
632 650
633 seq_puts(seq, " Flags: "); 651 seq_puts(seq, " Flags: ");
634 652
635 if (pkt_dev->flags & F_IPV6) 653 for (i = 0; i < NR_PKT_FLAGS; i++) {
636 seq_puts(seq, "IPV6 "); 654 if (i == F_FLOW_SEQ)
637 655 if (!pkt_dev->cflows)
638 if (pkt_dev->flags & F_IPSRC_RND) 656 continue;
639 seq_puts(seq, "IPSRC_RND ");
640
641 if (pkt_dev->flags & F_IPDST_RND)
642 seq_puts(seq, "IPDST_RND ");
643
644 if (pkt_dev->flags & F_TXSIZE_RND)
645 seq_puts(seq, "TXSIZE_RND ");
646
647 if (pkt_dev->flags & F_UDPSRC_RND)
648 seq_puts(seq, "UDPSRC_RND ");
649
650 if (pkt_dev->flags & F_UDPDST_RND)
651 seq_puts(seq, "UDPDST_RND ");
652
653 if (pkt_dev->flags & F_UDPCSUM)
654 seq_puts(seq, "UDPCSUM ");
655
656 if (pkt_dev->flags & F_NO_TIMESTAMP)
657 seq_puts(seq, "NO_TIMESTAMP ");
658
659 if (pkt_dev->flags & F_MPLS_RND)
660 seq_puts(seq, "MPLS_RND ");
661
662 if (pkt_dev->flags & F_QUEUE_MAP_RND)
663 seq_puts(seq, "QUEUE_MAP_RND ");
664 657
665 if (pkt_dev->flags & F_QUEUE_MAP_CPU) 658 if (pkt_dev->flags & (1 << i))
666 seq_puts(seq, "QUEUE_MAP_CPU "); 659 seq_printf(seq, "%s ", pkt_flag_names[i]);
667 660 else if (i == F_FLOW_SEQ)
668 if (pkt_dev->cflows) { 661 seq_puts(seq, "FLOW_RND ");
669 if (pkt_dev->flags & F_FLOW_SEQ)
670 seq_puts(seq, "FLOW_SEQ "); /*in sequence flows*/
671 else
672 seq_puts(seq, "FLOW_RND ");
673 }
674 662
675#ifdef CONFIG_XFRM 663#ifdef CONFIG_XFRM
676 if (pkt_dev->flags & F_IPSEC_ON) { 664 if (i == F_IPSEC && pkt_dev->spi)
677 seq_puts(seq, "IPSEC ");
678 if (pkt_dev->spi)
679 seq_printf(seq, "spi:%u", pkt_dev->spi); 665 seq_printf(seq, "spi:%u", pkt_dev->spi);
680 }
681#endif 666#endif
682 667 }
683 if (pkt_dev->flags & F_MACSRC_RND)
684 seq_puts(seq, "MACSRC_RND ");
685
686 if (pkt_dev->flags & F_MACDST_RND)
687 seq_puts(seq, "MACDST_RND ");
688
689 if (pkt_dev->flags & F_VID_RND)
690 seq_puts(seq, "VID_RND ");
691
692 if (pkt_dev->flags & F_SVID_RND)
693 seq_puts(seq, "SVID_RND ");
694
695 if (pkt_dev->flags & F_NODE)
696 seq_puts(seq, "NODE_ALLOC ");
697 668
698 seq_puts(seq, "\n"); 669 seq_puts(seq, "\n");
699 670
@@ -859,6 +830,35 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
859 return i; 830 return i;
860} 831}
861 832
833static __u32 pktgen_read_flag(const char *f, bool *disable)
834{
835 __u32 i;
836
837 if (f[0] == '!') {
838 *disable = true;
839 f++;
840 }
841
842 for (i = 0; i < NR_PKT_FLAGS; i++) {
843 if (!IS_ENABLED(CONFIG_XFRM) && i == IPSEC_SHIFT)
844 continue;
845
846 /* allow only disabling ipv6 flag */
847 if (!*disable && i == IPV6_SHIFT)
848 continue;
849
850 if (strcmp(f, pkt_flag_names[i]) == 0)
851 return 1 << i;
852 }
853
854 if (strcmp(f, "FLOW_RND") == 0) {
855 *disable = !*disable;
856 return F_FLOW_SEQ;
857 }
858
859 return 0;
860}
861
862static ssize_t pktgen_if_write(struct file *file, 862static ssize_t pktgen_if_write(struct file *file,
863 const char __user * user_buffer, size_t count, 863 const char __user * user_buffer, size_t count,
864 loff_t * offset) 864 loff_t * offset)
@@ -1216,7 +1216,10 @@ static ssize_t pktgen_if_write(struct file *file,
1216 return count; 1216 return count;
1217 } 1217 }
1218 if (!strcmp(name, "flag")) { 1218 if (!strcmp(name, "flag")) {
1219 __u32 flag;
1219 char f[32]; 1220 char f[32];
1221 bool disable = false;
1222
1220 memset(f, 0, 32); 1223 memset(f, 0, 32);
1221 len = strn_len(&user_buffer[i], sizeof(f) - 1); 1224 len = strn_len(&user_buffer[i], sizeof(f) - 1);
1222 if (len < 0) 1225 if (len < 0)
@@ -1225,107 +1228,15 @@ static ssize_t pktgen_if_write(struct file *file,
1225 if (copy_from_user(f, &user_buffer[i], len)) 1228 if (copy_from_user(f, &user_buffer[i], len))
1226 return -EFAULT; 1229 return -EFAULT;
1227 i += len; 1230 i += len;
1228 if (strcmp(f, "IPSRC_RND") == 0)
1229 pkt_dev->flags |= F_IPSRC_RND;
1230
1231 else if (strcmp(f, "!IPSRC_RND") == 0)
1232 pkt_dev->flags &= ~F_IPSRC_RND;
1233
1234 else if (strcmp(f, "TXSIZE_RND") == 0)
1235 pkt_dev->flags |= F_TXSIZE_RND;
1236
1237 else if (strcmp(f, "!TXSIZE_RND") == 0)
1238 pkt_dev->flags &= ~F_TXSIZE_RND;
1239
1240 else if (strcmp(f, "IPDST_RND") == 0)
1241 pkt_dev->flags |= F_IPDST_RND;
1242
1243 else if (strcmp(f, "!IPDST_RND") == 0)
1244 pkt_dev->flags &= ~F_IPDST_RND;
1245
1246 else if (strcmp(f, "UDPSRC_RND") == 0)
1247 pkt_dev->flags |= F_UDPSRC_RND;
1248
1249 else if (strcmp(f, "!UDPSRC_RND") == 0)
1250 pkt_dev->flags &= ~F_UDPSRC_RND;
1251
1252 else if (strcmp(f, "UDPDST_RND") == 0)
1253 pkt_dev->flags |= F_UDPDST_RND;
1254
1255 else if (strcmp(f, "!UDPDST_RND") == 0)
1256 pkt_dev->flags &= ~F_UDPDST_RND;
1257
1258 else if (strcmp(f, "MACSRC_RND") == 0)
1259 pkt_dev->flags |= F_MACSRC_RND;
1260
1261 else if (strcmp(f, "!MACSRC_RND") == 0)
1262 pkt_dev->flags &= ~F_MACSRC_RND;
1263
1264 else if (strcmp(f, "MACDST_RND") == 0)
1265 pkt_dev->flags |= F_MACDST_RND;
1266
1267 else if (strcmp(f, "!MACDST_RND") == 0)
1268 pkt_dev->flags &= ~F_MACDST_RND;
1269
1270 else if (strcmp(f, "MPLS_RND") == 0)
1271 pkt_dev->flags |= F_MPLS_RND;
1272
1273 else if (strcmp(f, "!MPLS_RND") == 0)
1274 pkt_dev->flags &= ~F_MPLS_RND;
1275 1231
1276 else if (strcmp(f, "VID_RND") == 0) 1232 flag = pktgen_read_flag(f, &disable);
1277 pkt_dev->flags |= F_VID_RND;
1278 1233
1279 else if (strcmp(f, "!VID_RND") == 0) 1234 if (flag) {
1280 pkt_dev->flags &= ~F_VID_RND; 1235 if (disable)
1281 1236 pkt_dev->flags &= ~flag;
1282 else if (strcmp(f, "SVID_RND") == 0) 1237 else
1283 pkt_dev->flags |= F_SVID_RND; 1238 pkt_dev->flags |= flag;
1284 1239 } else {
1285 else if (strcmp(f, "!SVID_RND") == 0)
1286 pkt_dev->flags &= ~F_SVID_RND;
1287
1288 else if (strcmp(f, "FLOW_SEQ") == 0)
1289 pkt_dev->flags |= F_FLOW_SEQ;
1290
1291 else if (strcmp(f, "QUEUE_MAP_RND") == 0)
1292 pkt_dev->flags |= F_QUEUE_MAP_RND;
1293
1294 else if (strcmp(f, "!QUEUE_MAP_RND") == 0)
1295 pkt_dev->flags &= ~F_QUEUE_MAP_RND;
1296
1297 else if (strcmp(f, "QUEUE_MAP_CPU") == 0)
1298 pkt_dev->flags |= F_QUEUE_MAP_CPU;
1299
1300 else if (strcmp(f, "!QUEUE_MAP_CPU") == 0)
1301 pkt_dev->flags &= ~F_QUEUE_MAP_CPU;
1302#ifdef CONFIG_XFRM
1303 else if (strcmp(f, "IPSEC") == 0)
1304 pkt_dev->flags |= F_IPSEC_ON;
1305#endif
1306
1307 else if (strcmp(f, "!IPV6") == 0)
1308 pkt_dev->flags &= ~F_IPV6;
1309
1310 else if (strcmp(f, "NODE_ALLOC") == 0)
1311 pkt_dev->flags |= F_NODE;
1312
1313 else if (strcmp(f, "!NODE_ALLOC") == 0)
1314 pkt_dev->flags &= ~F_NODE;
1315
1316 else if (strcmp(f, "UDPCSUM") == 0)
1317 pkt_dev->flags |= F_UDPCSUM;
1318
1319 else if (strcmp(f, "!UDPCSUM") == 0)
1320 pkt_dev->flags &= ~F_UDPCSUM;
1321
1322 else if (strcmp(f, "NO_TIMESTAMP") == 0)
1323 pkt_dev->flags |= F_NO_TIMESTAMP;
1324
1325 else if (strcmp(f, "!NO_TIMESTAMP") == 0)
1326 pkt_dev->flags &= ~F_NO_TIMESTAMP;
1327
1328 else {
1329 sprintf(pg_result, 1240 sprintf(pg_result,
1330 "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", 1241 "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
1331 f, 1242 f,
@@ -1804,7 +1715,6 @@ static int pktgen_if_open(struct inode *inode, struct file *file)
1804} 1715}
1805 1716
1806static const struct file_operations pktgen_if_fops = { 1717static const struct file_operations pktgen_if_fops = {
1807 .owner = THIS_MODULE,
1808 .open = pktgen_if_open, 1718 .open = pktgen_if_open,
1809 .read = seq_read, 1719 .read = seq_read,
1810 .llseek = seq_lseek, 1720 .llseek = seq_lseek,
@@ -1942,7 +1852,6 @@ static int pktgen_thread_open(struct inode *inode, struct file *file)
1942} 1852}
1943 1853
1944static const struct file_operations pktgen_thread_fops = { 1854static const struct file_operations pktgen_thread_fops = {
1945 .owner = THIS_MODULE,
1946 .open = pktgen_thread_open, 1855 .open = pktgen_thread_open,
1947 .read = seq_read, 1856 .read = seq_read,
1948 .llseek = seq_lseek, 1857 .llseek = seq_lseek,
@@ -2544,7 +2453,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2544 pkt_dev->flows[flow].cur_daddr = 2453 pkt_dev->flows[flow].cur_daddr =
2545 pkt_dev->cur_daddr; 2454 pkt_dev->cur_daddr;
2546#ifdef CONFIG_XFRM 2455#ifdef CONFIG_XFRM
2547 if (pkt_dev->flags & F_IPSEC_ON) 2456 if (pkt_dev->flags & F_IPSEC)
2548 get_ipsec_sa(pkt_dev, flow); 2457 get_ipsec_sa(pkt_dev, flow);
2549#endif 2458#endif
2550 pkt_dev->nflows++; 2459 pkt_dev->nflows++;
@@ -2609,7 +2518,7 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
2609 * supports both transport/tunnel mode + ESP/AH type. 2518 * supports both transport/tunnel mode + ESP/AH type.
2610 */ 2519 */
2611 if ((x->props.mode == XFRM_MODE_TUNNEL) && (pkt_dev->spi != 0)) 2520 if ((x->props.mode == XFRM_MODE_TUNNEL) && (pkt_dev->spi != 0))
2612 skb->_skb_refdst = (unsigned long)&pkt_dev->dst | SKB_DST_NOREF; 2521 skb->_skb_refdst = (unsigned long)&pkt_dev->xdst.u.dst | SKB_DST_NOREF;
2613 2522
2614 rcu_read_lock_bh(); 2523 rcu_read_lock_bh();
2615 err = x->outer_mode->output(x, skb); 2524 err = x->outer_mode->output(x, skb);
@@ -2649,7 +2558,7 @@ static void free_SAs(struct pktgen_dev *pkt_dev)
2649static int process_ipsec(struct pktgen_dev *pkt_dev, 2558static int process_ipsec(struct pktgen_dev *pkt_dev,
2650 struct sk_buff *skb, __be16 protocol) 2559 struct sk_buff *skb, __be16 protocol)
2651{ 2560{
2652 if (pkt_dev->flags & F_IPSEC_ON) { 2561 if (pkt_dev->flags & F_IPSEC) {
2653 struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; 2562 struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x;
2654 int nhead = 0; 2563 int nhead = 0;
2655 if (x) { 2564 if (x) {
@@ -3742,10 +3651,10 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3742 * performance under such circumstance. 3651 * performance under such circumstance.
3743 */ 3652 */
3744 pkt_dev->dstops.family = AF_INET; 3653 pkt_dev->dstops.family = AF_INET;
3745 pkt_dev->dst.dev = pkt_dev->odev; 3654 pkt_dev->xdst.u.dst.dev = pkt_dev->odev;
3746 dst_init_metrics(&pkt_dev->dst, pktgen_dst_metrics, false); 3655 dst_init_metrics(&pkt_dev->xdst.u.dst, pktgen_dst_metrics, false);
3747 pkt_dev->dst.child = &pkt_dev->dst; 3656 pkt_dev->xdst.child = &pkt_dev->xdst.u.dst;
3748 pkt_dev->dst.ops = &pkt_dev->dstops; 3657 pkt_dev->xdst.u.dst.ops = &pkt_dev->dstops;
3749#endif 3658#endif
3750 3659
3751 return add_dev_to_thread(t, pkt_dev); 3660 return add_dev_to_thread(t, pkt_dev);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 778d7f03404a..bc290413a49d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -62,7 +62,9 @@
62struct rtnl_link { 62struct rtnl_link {
63 rtnl_doit_func doit; 63 rtnl_doit_func doit;
64 rtnl_dumpit_func dumpit; 64 rtnl_dumpit_func dumpit;
65 struct module *owner;
65 unsigned int flags; 66 unsigned int flags;
67 struct rcu_head rcu;
66}; 68};
67 69
68static DEFINE_MUTEX(rtnl_mutex); 70static DEFINE_MUTEX(rtnl_mutex);
@@ -127,8 +129,7 @@ bool lockdep_rtnl_is_held(void)
127EXPORT_SYMBOL(lockdep_rtnl_is_held); 129EXPORT_SYMBOL(lockdep_rtnl_is_held);
128#endif /* #ifdef CONFIG_PROVE_LOCKING */ 130#endif /* #ifdef CONFIG_PROVE_LOCKING */
129 131
130static struct rtnl_link __rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; 132static struct rtnl_link *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
131static refcount_t rtnl_msg_handlers_ref[RTNL_FAMILY_MAX + 1];
132 133
133static inline int rtm_msgindex(int msgtype) 134static inline int rtm_msgindex(int msgtype)
134{ 135{
@@ -144,72 +145,127 @@ static inline int rtm_msgindex(int msgtype)
144 return msgindex; 145 return msgindex;
145} 146}
146 147
147/** 148static struct rtnl_link *rtnl_get_link(int protocol, int msgtype)
148 * __rtnl_register - Register a rtnetlink message type
149 * @protocol: Protocol family or PF_UNSPEC
150 * @msgtype: rtnetlink message type
151 * @doit: Function pointer called for each request message
152 * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
153 * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
154 *
155 * Registers the specified function pointers (at least one of them has
156 * to be non-NULL) to be called whenever a request message for the
157 * specified protocol family and message type is received.
158 *
159 * The special protocol family PF_UNSPEC may be used to define fallback
160 * function pointers for the case when no entry for the specific protocol
161 * family exists.
162 *
163 * Returns 0 on success or a negative error code.
164 */
165int __rtnl_register(int protocol, int msgtype,
166 rtnl_doit_func doit, rtnl_dumpit_func dumpit,
167 unsigned int flags)
168{ 149{
169 struct rtnl_link *tab; 150 struct rtnl_link **tab;
151
152 if (protocol >= ARRAY_SIZE(rtnl_msg_handlers))
153 protocol = PF_UNSPEC;
154
155 tab = rcu_dereference_rtnl(rtnl_msg_handlers[protocol]);
156 if (!tab)
157 tab = rcu_dereference_rtnl(rtnl_msg_handlers[PF_UNSPEC]);
158
159 return tab[msgtype];
160}
161
162static int rtnl_register_internal(struct module *owner,
163 int protocol, int msgtype,
164 rtnl_doit_func doit, rtnl_dumpit_func dumpit,
165 unsigned int flags)
166{
167 struct rtnl_link *link, *old;
168 struct rtnl_link __rcu **tab;
170 int msgindex; 169 int msgindex;
170 int ret = -ENOBUFS;
171 171
172 BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); 172 BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
173 msgindex = rtm_msgindex(msgtype); 173 msgindex = rtm_msgindex(msgtype);
174 174
175 tab = rcu_dereference_raw(rtnl_msg_handlers[protocol]); 175 rtnl_lock();
176 tab = rtnl_msg_handlers[protocol];
176 if (tab == NULL) { 177 if (tab == NULL) {
177 tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL); 178 tab = kcalloc(RTM_NR_MSGTYPES, sizeof(void *), GFP_KERNEL);
178 if (tab == NULL) 179 if (!tab)
179 return -ENOBUFS; 180 goto unlock;
180 181
182 /* ensures we see the 0 stores */
181 rcu_assign_pointer(rtnl_msg_handlers[protocol], tab); 183 rcu_assign_pointer(rtnl_msg_handlers[protocol], tab);
182 } 184 }
183 185
186 old = rtnl_dereference(tab[msgindex]);
187 if (old) {
188 link = kmemdup(old, sizeof(*old), GFP_KERNEL);
189 if (!link)
190 goto unlock;
191 } else {
192 link = kzalloc(sizeof(*link), GFP_KERNEL);
193 if (!link)
194 goto unlock;
195 }
196
197 WARN_ON(link->owner && link->owner != owner);
198 link->owner = owner;
199
200 WARN_ON(doit && link->doit && link->doit != doit);
184 if (doit) 201 if (doit)
185 tab[msgindex].doit = doit; 202 link->doit = doit;
203 WARN_ON(dumpit && link->dumpit && link->dumpit != dumpit);
186 if (dumpit) 204 if (dumpit)
187 tab[msgindex].dumpit = dumpit; 205 link->dumpit = dumpit;
188 tab[msgindex].flags |= flags;
189 206
190 return 0; 207 link->flags |= flags;
208
209 /* publish protocol:msgtype */
210 rcu_assign_pointer(tab[msgindex], link);
211 ret = 0;
212 if (old)
213 kfree_rcu(old, rcu);
214unlock:
215 rtnl_unlock();
216 return ret;
217}
218
219/**
220 * rtnl_register_module - Register a rtnetlink message type
221 *
222 * @owner: module registering the hook (THIS_MODULE)
223 * @protocol: Protocol family or PF_UNSPEC
224 * @msgtype: rtnetlink message type
225 * @doit: Function pointer called for each request message
226 * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
227 * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
228 *
229 * Like rtnl_register, but for use by removable modules.
230 */
231int rtnl_register_module(struct module *owner,
232 int protocol, int msgtype,
233 rtnl_doit_func doit, rtnl_dumpit_func dumpit,
234 unsigned int flags)
235{
236 return rtnl_register_internal(owner, protocol, msgtype,
237 doit, dumpit, flags);
191} 238}
192EXPORT_SYMBOL_GPL(__rtnl_register); 239EXPORT_SYMBOL_GPL(rtnl_register_module);
193 240
194/** 241/**
195 * rtnl_register - Register a rtnetlink message type 242 * rtnl_register - Register a rtnetlink message type
243 * @protocol: Protocol family or PF_UNSPEC
244 * @msgtype: rtnetlink message type
245 * @doit: Function pointer called for each request message
246 * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
247 * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
248 *
249 * Registers the specified function pointers (at least one of them has
250 * to be non-NULL) to be called whenever a request message for the
251 * specified protocol family and message type is received.
196 * 252 *
197 * Identical to __rtnl_register() but panics on failure. This is useful 253 * The special protocol family PF_UNSPEC may be used to define fallback
198 * as failure of this function is very unlikely, it can only happen due 254 * function pointers for the case when no entry for the specific protocol
199 * to lack of memory when allocating the chain to store all message 255 * family exists.
200 * handlers for a protocol. Meant for use in init functions where lack
201 * of memory implies no sense in continuing.
202 */ 256 */
203void rtnl_register(int protocol, int msgtype, 257void rtnl_register(int protocol, int msgtype,
204 rtnl_doit_func doit, rtnl_dumpit_func dumpit, 258 rtnl_doit_func doit, rtnl_dumpit_func dumpit,
205 unsigned int flags) 259 unsigned int flags)
206{ 260{
207 if (__rtnl_register(protocol, msgtype, doit, dumpit, flags) < 0) 261 int err;
208 panic("Unable to register rtnetlink message handler, " 262
209 "protocol = %d, message type = %d\n", 263 err = rtnl_register_internal(NULL, protocol, msgtype, doit, dumpit,
210 protocol, msgtype); 264 flags);
265 if (err)
266 pr_err("Unable to register rtnetlink message handler, "
267 "protocol = %d, message type = %d\n", protocol, msgtype);
211} 268}
212EXPORT_SYMBOL_GPL(rtnl_register);
213 269
214/** 270/**
215 * rtnl_unregister - Unregister a rtnetlink message type 271 * rtnl_unregister - Unregister a rtnetlink message type
@@ -220,24 +276,25 @@ EXPORT_SYMBOL_GPL(rtnl_register);
220 */ 276 */
221int rtnl_unregister(int protocol, int msgtype) 277int rtnl_unregister(int protocol, int msgtype)
222{ 278{
223 struct rtnl_link *handlers; 279 struct rtnl_link **tab, *link;
224 int msgindex; 280 int msgindex;
225 281
226 BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); 282 BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
227 msgindex = rtm_msgindex(msgtype); 283 msgindex = rtm_msgindex(msgtype);
228 284
229 rtnl_lock(); 285 rtnl_lock();
230 handlers = rtnl_dereference(rtnl_msg_handlers[protocol]); 286 tab = rtnl_dereference(rtnl_msg_handlers[protocol]);
231 if (!handlers) { 287 if (!tab) {
232 rtnl_unlock(); 288 rtnl_unlock();
233 return -ENOENT; 289 return -ENOENT;
234 } 290 }
235 291
236 handlers[msgindex].doit = NULL; 292 link = tab[msgindex];
237 handlers[msgindex].dumpit = NULL; 293 rcu_assign_pointer(tab[msgindex], NULL);
238 handlers[msgindex].flags = 0;
239 rtnl_unlock(); 294 rtnl_unlock();
240 295
296 kfree_rcu(link, rcu);
297
241 return 0; 298 return 0;
242} 299}
243EXPORT_SYMBOL_GPL(rtnl_unregister); 300EXPORT_SYMBOL_GPL(rtnl_unregister);
@@ -251,20 +308,27 @@ EXPORT_SYMBOL_GPL(rtnl_unregister);
251 */ 308 */
252void rtnl_unregister_all(int protocol) 309void rtnl_unregister_all(int protocol)
253{ 310{
254 struct rtnl_link *handlers; 311 struct rtnl_link **tab, *link;
312 int msgindex;
255 313
256 BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); 314 BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
257 315
258 rtnl_lock(); 316 rtnl_lock();
259 handlers = rtnl_dereference(rtnl_msg_handlers[protocol]); 317 tab = rtnl_msg_handlers[protocol];
260 RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL); 318 RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL);
319 for (msgindex = 0; msgindex < RTM_NR_MSGTYPES; msgindex++) {
320 link = tab[msgindex];
321 if (!link)
322 continue;
323
324 rcu_assign_pointer(tab[msgindex], NULL);
325 kfree_rcu(link, rcu);
326 }
261 rtnl_unlock(); 327 rtnl_unlock();
262 328
263 synchronize_net(); 329 synchronize_net();
264 330
265 while (refcount_read(&rtnl_msg_handlers_ref[protocol]) > 1) 331 kfree(tab);
266 schedule();
267 kfree(handlers);
268} 332}
269EXPORT_SYMBOL_GPL(rtnl_unregister_all); 333EXPORT_SYMBOL_GPL(rtnl_unregister_all);
270 334
@@ -840,6 +904,10 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
840 nla_total_size_64bit(sizeof(__u64)) + 904 nla_total_size_64bit(sizeof(__u64)) +
841 /* IFLA_VF_STATS_MULTICAST */ 905 /* IFLA_VF_STATS_MULTICAST */
842 nla_total_size_64bit(sizeof(__u64)) + 906 nla_total_size_64bit(sizeof(__u64)) +
907 /* IFLA_VF_STATS_RX_DROPPED */
908 nla_total_size_64bit(sizeof(__u64)) +
909 /* IFLA_VF_STATS_TX_DROPPED */
910 nla_total_size_64bit(sizeof(__u64)) +
843 nla_total_size(sizeof(struct ifla_vf_trust))); 911 nla_total_size(sizeof(struct ifla_vf_trust)));
844 return size; 912 return size;
845 } else 913 } else
@@ -920,8 +988,11 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
920 + rtnl_xdp_size() /* IFLA_XDP */ 988 + rtnl_xdp_size() /* IFLA_XDP */
921 + nla_total_size(4) /* IFLA_EVENT */ 989 + nla_total_size(4) /* IFLA_EVENT */
922 + nla_total_size(4) /* IFLA_NEW_NETNSID */ 990 + nla_total_size(4) /* IFLA_NEW_NETNSID */
991 + nla_total_size(4) /* IFLA_NEW_IFINDEX */
923 + nla_total_size(1) /* IFLA_PROTO_DOWN */ 992 + nla_total_size(1) /* IFLA_PROTO_DOWN */
924 + nla_total_size(4) /* IFLA_IF_NETNSID */ 993 + nla_total_size(4) /* IFLA_IF_NETNSID */
994 + nla_total_size(4) /* IFLA_CARRIER_UP_COUNT */
995 + nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */
925 + 0; 996 + 0;
926} 997}
927 998
@@ -1194,7 +1265,11 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
1194 nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST, 1265 nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST,
1195 vf_stats.broadcast, IFLA_VF_STATS_PAD) || 1266 vf_stats.broadcast, IFLA_VF_STATS_PAD) ||
1196 nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST, 1267 nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST,
1197 vf_stats.multicast, IFLA_VF_STATS_PAD)) { 1268 vf_stats.multicast, IFLA_VF_STATS_PAD) ||
1269 nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED,
1270 vf_stats.rx_dropped, IFLA_VF_STATS_PAD) ||
1271 nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED,
1272 vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) {
1198 nla_nest_cancel(skb, vfstats); 1273 nla_nest_cancel(skb, vfstats);
1199 goto nla_put_vf_failure; 1274 goto nla_put_vf_failure;
1200 } 1275 }
@@ -1261,6 +1336,7 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
1261{ 1336{
1262 const struct net_device_ops *ops = dev->netdev_ops; 1337 const struct net_device_ops *ops = dev->netdev_ops;
1263 const struct bpf_prog *generic_xdp_prog; 1338 const struct bpf_prog *generic_xdp_prog;
1339 struct netdev_bpf xdp;
1264 1340
1265 ASSERT_RTNL(); 1341 ASSERT_RTNL();
1266 1342
@@ -1273,7 +1349,10 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
1273 if (!ops->ndo_bpf) 1349 if (!ops->ndo_bpf)
1274 return XDP_ATTACHED_NONE; 1350 return XDP_ATTACHED_NONE;
1275 1351
1276 return __dev_xdp_attached(dev, ops->ndo_bpf, prog_id); 1352 __dev_xdp_query(dev, ops->ndo_bpf, &xdp);
1353 *prog_id = xdp.prog_id;
1354
1355 return xdp.prog_attached;
1277} 1356}
1278 1357
1279static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) 1358static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1433,7 +1512,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
1433 struct net_device *dev, struct net *src_net, 1512 struct net_device *dev, struct net *src_net,
1434 int type, u32 pid, u32 seq, u32 change, 1513 int type, u32 pid, u32 seq, u32 change,
1435 unsigned int flags, u32 ext_filter_mask, 1514 unsigned int flags, u32 ext_filter_mask,
1436 u32 event, int *new_nsid, int tgt_netnsid) 1515 u32 event, int *new_nsid, int new_ifindex,
1516 int tgt_netnsid)
1437{ 1517{
1438 struct ifinfomsg *ifm; 1518 struct ifinfomsg *ifm;
1439 struct nlmsghdr *nlh; 1519 struct nlmsghdr *nlh;
@@ -1475,8 +1555,13 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
1475 nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || 1555 nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
1476 nla_put_ifalias(skb, dev) || 1556 nla_put_ifalias(skb, dev) ||
1477 nla_put_u32(skb, IFLA_CARRIER_CHANGES, 1557 nla_put_u32(skb, IFLA_CARRIER_CHANGES,
1478 atomic_read(&dev->carrier_changes)) || 1558 atomic_read(&dev->carrier_up_count) +
1479 nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) 1559 atomic_read(&dev->carrier_down_count)) ||
1560 nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down) ||
1561 nla_put_u32(skb, IFLA_CARRIER_UP_COUNT,
1562 atomic_read(&dev->carrier_up_count)) ||
1563 nla_put_u32(skb, IFLA_CARRIER_DOWN_COUNT,
1564 atomic_read(&dev->carrier_down_count)))
1480 goto nla_put_failure; 1565 goto nla_put_failure;
1481 1566
1482 if (event != IFLA_EVENT_NONE) { 1567 if (event != IFLA_EVENT_NONE) {
@@ -1525,6 +1610,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
1525 if (new_nsid && 1610 if (new_nsid &&
1526 nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0) 1611 nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0)
1527 goto nla_put_failure; 1612 goto nla_put_failure;
1613 if (new_ifindex &&
1614 nla_put_s32(skb, IFLA_NEW_IFINDEX, new_ifindex) < 0)
1615 goto nla_put_failure;
1616
1528 1617
1529 rcu_read_lock(); 1618 rcu_read_lock();
1530 if (rtnl_fill_link_af(skb, dev, ext_filter_mask)) 1619 if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
@@ -1569,6 +1658,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
1569 [IFLA_PROMISCUITY] = { .type = NLA_U32 }, 1658 [IFLA_PROMISCUITY] = { .type = NLA_U32 },
1570 [IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 }, 1659 [IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 },
1571 [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 }, 1660 [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
1661 [IFLA_GSO_MAX_SEGS] = { .type = NLA_U32 },
1662 [IFLA_GSO_MAX_SIZE] = { .type = NLA_U32 },
1572 [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, 1663 [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
1573 [IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */ 1664 [IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */
1574 [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, 1665 [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
@@ -1578,6 +1669,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
1578 [IFLA_EVENT] = { .type = NLA_U32 }, 1669 [IFLA_EVENT] = { .type = NLA_U32 },
1579 [IFLA_GROUP] = { .type = NLA_U32 }, 1670 [IFLA_GROUP] = { .type = NLA_U32 },
1580 [IFLA_IF_NETNSID] = { .type = NLA_S32 }, 1671 [IFLA_IF_NETNSID] = { .type = NLA_S32 },
1672 [IFLA_CARRIER_UP_COUNT] = { .type = NLA_U32 },
1673 [IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 },
1581}; 1674};
1582 1675
1583static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { 1676static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1766,7 +1859,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1766 NETLINK_CB(cb->skb).portid, 1859 NETLINK_CB(cb->skb).portid,
1767 cb->nlh->nlmsg_seq, 0, 1860 cb->nlh->nlmsg_seq, 0,
1768 flags, 1861 flags,
1769 ext_filter_mask, 0, NULL, 1862 ext_filter_mask, 0, NULL, 0,
1770 netnsid); 1863 netnsid);
1771 1864
1772 if (err < 0) { 1865 if (err < 0) {
@@ -1815,6 +1908,81 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
1815} 1908}
1816EXPORT_SYMBOL(rtnl_link_get_net); 1909EXPORT_SYMBOL(rtnl_link_get_net);
1817 1910
1911/* Figure out which network namespace we are talking about by
1912 * examining the link attributes in the following order:
1913 *
1914 * 1. IFLA_NET_NS_PID
1915 * 2. IFLA_NET_NS_FD
1916 * 3. IFLA_IF_NETNSID
1917 */
1918static struct net *rtnl_link_get_net_by_nlattr(struct net *src_net,
1919 struct nlattr *tb[])
1920{
1921 struct net *net;
1922
1923 if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD])
1924 return rtnl_link_get_net(src_net, tb);
1925
1926 if (!tb[IFLA_IF_NETNSID])
1927 return get_net(src_net);
1928
1929 net = get_net_ns_by_id(src_net, nla_get_u32(tb[IFLA_IF_NETNSID]));
1930 if (!net)
1931 return ERR_PTR(-EINVAL);
1932
1933 return net;
1934}
1935
1936static struct net *rtnl_link_get_net_capable(const struct sk_buff *skb,
1937 struct net *src_net,
1938 struct nlattr *tb[], int cap)
1939{
1940 struct net *net;
1941
1942 net = rtnl_link_get_net_by_nlattr(src_net, tb);
1943 if (IS_ERR(net))
1944 return net;
1945
1946 if (!netlink_ns_capable(skb, net->user_ns, cap)) {
1947 put_net(net);
1948 return ERR_PTR(-EPERM);
1949 }
1950
1951 return net;
1952}
1953
1954/* Verify that rtnetlink requests do not pass additional properties
1955 * potentially referring to different network namespaces.
1956 */
1957static int rtnl_ensure_unique_netns(struct nlattr *tb[],
1958 struct netlink_ext_ack *extack,
1959 bool netns_id_only)
1960{
1961
1962 if (netns_id_only) {
1963 if (!tb[IFLA_NET_NS_PID] && !tb[IFLA_NET_NS_FD])
1964 return 0;
1965
1966 NL_SET_ERR_MSG(extack, "specified netns attribute not supported");
1967 return -EOPNOTSUPP;
1968 }
1969
1970 if (tb[IFLA_IF_NETNSID] && (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]))
1971 goto invalid_attr;
1972
1973 if (tb[IFLA_NET_NS_PID] && (tb[IFLA_IF_NETNSID] || tb[IFLA_NET_NS_FD]))
1974 goto invalid_attr;
1975
1976 if (tb[IFLA_NET_NS_FD] && (tb[IFLA_IF_NETNSID] || tb[IFLA_NET_NS_PID]))
1977 goto invalid_attr;
1978
1979 return 0;
1980
1981invalid_attr:
1982 NL_SET_ERR_MSG(extack, "multiple netns identifying attributes specified");
1983 return -EINVAL;
1984}
1985
1818static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) 1986static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
1819{ 1987{
1820 if (dev) { 1988 if (dev) {
@@ -2077,17 +2245,14 @@ static int do_setlink(const struct sk_buff *skb,
2077 const struct net_device_ops *ops = dev->netdev_ops; 2245 const struct net_device_ops *ops = dev->netdev_ops;
2078 int err; 2246 int err;
2079 2247
2080 if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) { 2248 if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_IF_NETNSID]) {
2081 struct net *net = rtnl_link_get_net(dev_net(dev), tb); 2249 struct net *net = rtnl_link_get_net_capable(skb, dev_net(dev),
2250 tb, CAP_NET_ADMIN);
2082 if (IS_ERR(net)) { 2251 if (IS_ERR(net)) {
2083 err = PTR_ERR(net); 2252 err = PTR_ERR(net);
2084 goto errout; 2253 goto errout;
2085 } 2254 }
2086 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { 2255
2087 put_net(net);
2088 err = -EPERM;
2089 goto errout;
2090 }
2091 err = dev_change_net_namespace(dev, net, ifname); 2256 err = dev_change_net_namespace(dev, net, ifname);
2092 put_net(net); 2257 put_net(net);
2093 if (err) 2258 if (err)
@@ -2204,17 +2369,37 @@ static int do_setlink(const struct sk_buff *skb,
2204 2369
2205 if (tb[IFLA_TXQLEN]) { 2370 if (tb[IFLA_TXQLEN]) {
2206 unsigned int value = nla_get_u32(tb[IFLA_TXQLEN]); 2371 unsigned int value = nla_get_u32(tb[IFLA_TXQLEN]);
2207 unsigned int orig_len = dev->tx_queue_len; 2372
2208 2373 err = dev_change_tx_queue_len(dev, value);
2209 if (dev->tx_queue_len ^ value) { 2374 if (err)
2210 dev->tx_queue_len = value; 2375 goto errout;
2211 err = call_netdevice_notifiers( 2376 status |= DO_SETLINK_MODIFIED;
2212 NETDEV_CHANGE_TX_QUEUE_LEN, dev); 2377 }
2213 err = notifier_to_errno(err); 2378
2214 if (err) { 2379 if (tb[IFLA_GSO_MAX_SIZE]) {
2215 dev->tx_queue_len = orig_len; 2380 u32 max_size = nla_get_u32(tb[IFLA_GSO_MAX_SIZE]);
2216 goto errout; 2381
2217 } 2382 if (max_size > GSO_MAX_SIZE) {
2383 err = -EINVAL;
2384 goto errout;
2385 }
2386
2387 if (dev->gso_max_size ^ max_size) {
2388 netif_set_gso_max_size(dev, max_size);
2389 status |= DO_SETLINK_MODIFIED;
2390 }
2391 }
2392
2393 if (tb[IFLA_GSO_MAX_SEGS]) {
2394 u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);
2395
2396 if (max_segs > GSO_MAX_SEGS) {
2397 err = -EINVAL;
2398 goto errout;
2399 }
2400
2401 if (dev->gso_max_segs ^ max_segs) {
2402 dev->gso_max_segs = max_segs;
2218 status |= DO_SETLINK_MODIFIED; 2403 status |= DO_SETLINK_MODIFIED;
2219 } 2404 }
2220 } 2405 }
@@ -2400,8 +2585,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
2400 if (err < 0) 2585 if (err < 0)
2401 goto errout; 2586 goto errout;
2402 2587
2403 if (tb[IFLA_IF_NETNSID]) 2588 err = rtnl_ensure_unique_netns(tb, extack, false);
2404 return -EOPNOTSUPP; 2589 if (err < 0)
2590 goto errout;
2405 2591
2406 if (tb[IFLA_IFNAME]) 2592 if (tb[IFLA_IFNAME])
2407 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); 2593 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
@@ -2487,36 +2673,57 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
2487 struct netlink_ext_ack *extack) 2673 struct netlink_ext_ack *extack)
2488{ 2674{
2489 struct net *net = sock_net(skb->sk); 2675 struct net *net = sock_net(skb->sk);
2490 struct net_device *dev; 2676 struct net *tgt_net = net;
2677 struct net_device *dev = NULL;
2491 struct ifinfomsg *ifm; 2678 struct ifinfomsg *ifm;
2492 char ifname[IFNAMSIZ]; 2679 char ifname[IFNAMSIZ];
2493 struct nlattr *tb[IFLA_MAX+1]; 2680 struct nlattr *tb[IFLA_MAX+1];
2494 int err; 2681 int err;
2682 int netnsid = -1;
2495 2683
2496 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); 2684 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
2497 if (err < 0) 2685 if (err < 0)
2498 return err; 2686 return err;
2499 2687
2500 if (tb[IFLA_IF_NETNSID]) 2688 err = rtnl_ensure_unique_netns(tb, extack, true);
2501 return -EOPNOTSUPP; 2689 if (err < 0)
2690 return err;
2502 2691
2503 if (tb[IFLA_IFNAME]) 2692 if (tb[IFLA_IFNAME])
2504 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); 2693 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
2505 2694
2695 if (tb[IFLA_IF_NETNSID]) {
2696 netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
2697 tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
2698 if (IS_ERR(tgt_net))
2699 return PTR_ERR(tgt_net);
2700 }
2701
2702 err = -EINVAL;
2506 ifm = nlmsg_data(nlh); 2703 ifm = nlmsg_data(nlh);
2507 if (ifm->ifi_index > 0) 2704 if (ifm->ifi_index > 0)
2508 dev = __dev_get_by_index(net, ifm->ifi_index); 2705 dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
2509 else if (tb[IFLA_IFNAME]) 2706 else if (tb[IFLA_IFNAME])
2510 dev = __dev_get_by_name(net, ifname); 2707 dev = __dev_get_by_name(tgt_net, ifname);
2511 else if (tb[IFLA_GROUP]) 2708 else if (tb[IFLA_GROUP])
2512 return rtnl_group_dellink(net, nla_get_u32(tb[IFLA_GROUP])); 2709 err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP]));
2513 else 2710 else
2514 return -EINVAL; 2711 goto out;
2515 2712
2516 if (!dev) 2713 if (!dev) {
2517 return -ENODEV; 2714 if (tb[IFLA_IFNAME] || ifm->ifi_index > 0)
2715 err = -ENODEV;
2716
2717 goto out;
2718 }
2719
2720 err = rtnl_delete_link(dev);
2721
2722out:
2723 if (netnsid >= 0)
2724 put_net(tgt_net);
2518 2725
2519 return rtnl_delete_link(dev); 2726 return err;
2520} 2727}
2521 2728
2522int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) 2729int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
@@ -2583,6 +2790,10 @@ struct net_device *rtnl_create_link(struct net *net,
2583 dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); 2790 dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
2584 if (tb[IFLA_GROUP]) 2791 if (tb[IFLA_GROUP])
2585 dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); 2792 dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
2793 if (tb[IFLA_GSO_MAX_SIZE])
2794 netif_set_gso_max_size(dev, nla_get_u32(tb[IFLA_GSO_MAX_SIZE]));
2795 if (tb[IFLA_GSO_MAX_SEGS])
2796 dev->gso_max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);
2586 2797
2587 return dev; 2798 return dev;
2588} 2799}
@@ -2631,8 +2842,9 @@ replay:
2631 if (err < 0) 2842 if (err < 0)
2632 return err; 2843 return err;
2633 2844
2634 if (tb[IFLA_IF_NETNSID]) 2845 err = rtnl_ensure_unique_netns(tb, extack, false);
2635 return -EOPNOTSUPP; 2846 if (err < 0)
2847 return err;
2636 2848
2637 if (tb[IFLA_IFNAME]) 2849 if (tb[IFLA_IFNAME])
2638 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); 2850 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
@@ -2781,14 +2993,10 @@ replay:
2781 name_assign_type = NET_NAME_ENUM; 2993 name_assign_type = NET_NAME_ENUM;
2782 } 2994 }
2783 2995
2784 dest_net = rtnl_link_get_net(net, tb); 2996 dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
2785 if (IS_ERR(dest_net)) 2997 if (IS_ERR(dest_net))
2786 return PTR_ERR(dest_net); 2998 return PTR_ERR(dest_net);
2787 2999
2788 err = -EPERM;
2789 if (!netlink_ns_capable(skb, dest_net->user_ns, CAP_NET_ADMIN))
2790 goto out;
2791
2792 if (tb[IFLA_LINK_NETNSID]) { 3000 if (tb[IFLA_LINK_NETNSID]) {
2793 int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); 3001 int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
2794 3002
@@ -2881,6 +3089,10 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
2881 if (err < 0) 3089 if (err < 0)
2882 return err; 3090 return err;
2883 3091
3092 err = rtnl_ensure_unique_netns(tb, extack, true);
3093 if (err < 0)
3094 return err;
3095
2884 if (tb[IFLA_IF_NETNSID]) { 3096 if (tb[IFLA_IF_NETNSID]) {
2885 netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); 3097 netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
2886 tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid); 3098 tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
@@ -2915,7 +3127,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
2915 err = rtnl_fill_ifinfo(nskb, dev, net, 3127 err = rtnl_fill_ifinfo(nskb, dev, net,
2916 RTM_NEWLINK, NETLINK_CB(skb).portid, 3128 RTM_NEWLINK, NETLINK_CB(skb).portid,
2917 nlh->nlmsg_seq, 0, 0, ext_filter_mask, 3129 nlh->nlmsg_seq, 0, 0, ext_filter_mask,
2918 0, NULL, netnsid); 3130 0, NULL, 0, netnsid);
2919 if (err < 0) { 3131 if (err < 0) {
2920 /* -EMSGSIZE implies BUG in if_nlmsg_size */ 3132 /* -EMSGSIZE implies BUG in if_nlmsg_size */
2921 WARN_ON(err == -EMSGSIZE); 3133 WARN_ON(err == -EMSGSIZE);
@@ -2973,18 +3185,26 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
2973 s_idx = 1; 3185 s_idx = 1;
2974 3186
2975 for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) { 3187 for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
3188 struct rtnl_link **tab;
2976 int type = cb->nlh->nlmsg_type-RTM_BASE; 3189 int type = cb->nlh->nlmsg_type-RTM_BASE;
2977 struct rtnl_link *handlers; 3190 struct rtnl_link *link;
2978 rtnl_dumpit_func dumpit; 3191 rtnl_dumpit_func dumpit;
2979 3192
2980 if (idx < s_idx || idx == PF_PACKET) 3193 if (idx < s_idx || idx == PF_PACKET)
2981 continue; 3194 continue;
2982 3195
2983 handlers = rtnl_dereference(rtnl_msg_handlers[idx]); 3196 if (type < 0 || type >= RTM_NR_MSGTYPES)
2984 if (!handlers) 3197 continue;
3198
3199 tab = rcu_dereference_rtnl(rtnl_msg_handlers[idx]);
3200 if (!tab)
3201 continue;
3202
3203 link = tab[type];
3204 if (!link)
2985 continue; 3205 continue;
2986 3206
2987 dumpit = READ_ONCE(handlers[type].dumpit); 3207 dumpit = link->dumpit;
2988 if (!dumpit) 3208 if (!dumpit)
2989 continue; 3209 continue;
2990 3210
@@ -3003,7 +3223,8 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
3003 3223
3004struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, 3224struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
3005 unsigned int change, 3225 unsigned int change,
3006 u32 event, gfp_t flags, int *new_nsid) 3226 u32 event, gfp_t flags, int *new_nsid,
3227 int new_ifindex)
3007{ 3228{
3008 struct net *net = dev_net(dev); 3229 struct net *net = dev_net(dev);
3009 struct sk_buff *skb; 3230 struct sk_buff *skb;
@@ -3016,7 +3237,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
3016 3237
3017 err = rtnl_fill_ifinfo(skb, dev, dev_net(dev), 3238 err = rtnl_fill_ifinfo(skb, dev, dev_net(dev),
3018 type, 0, 0, change, 0, 0, event, 3239 type, 0, 0, change, 0, 0, event,
3019 new_nsid, -1); 3240 new_nsid, new_ifindex, -1);
3020 if (err < 0) { 3241 if (err < 0) {
3021 /* -EMSGSIZE implies BUG in if_nlmsg_size() */ 3242 /* -EMSGSIZE implies BUG in if_nlmsg_size() */
3022 WARN_ON(err == -EMSGSIZE); 3243 WARN_ON(err == -EMSGSIZE);
@@ -3039,14 +3260,15 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags)
3039 3260
3040static void rtmsg_ifinfo_event(int type, struct net_device *dev, 3261static void rtmsg_ifinfo_event(int type, struct net_device *dev,
3041 unsigned int change, u32 event, 3262 unsigned int change, u32 event,
3042 gfp_t flags, int *new_nsid) 3263 gfp_t flags, int *new_nsid, int new_ifindex)
3043{ 3264{
3044 struct sk_buff *skb; 3265 struct sk_buff *skb;
3045 3266
3046 if (dev->reg_state != NETREG_REGISTERED) 3267 if (dev->reg_state != NETREG_REGISTERED)
3047 return; 3268 return;
3048 3269
3049 skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid); 3270 skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid,
3271 new_ifindex);
3050 if (skb) 3272 if (skb)
3051 rtmsg_ifinfo_send(skb, dev, flags); 3273 rtmsg_ifinfo_send(skb, dev, flags);
3052} 3274}
@@ -3054,14 +3276,15 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev,
3054void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, 3276void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
3055 gfp_t flags) 3277 gfp_t flags)
3056{ 3278{
3057 rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, NULL); 3279 rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags,
3280 NULL, 0);
3058} 3281}
3059 3282
3060void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, 3283void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
3061 gfp_t flags, int *new_nsid) 3284 gfp_t flags, int *new_nsid, int new_ifindex)
3062{ 3285{
3063 rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, 3286 rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags,
3064 new_nsid); 3287 new_nsid, new_ifindex);
3065} 3288}
3066 3289
3067static int nlmsg_populate_fdb_fill(struct sk_buff *skb, 3290static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
@@ -4314,7 +4537,8 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
4314 struct netlink_ext_ack *extack) 4537 struct netlink_ext_ack *extack)
4315{ 4538{
4316 struct net *net = sock_net(skb->sk); 4539 struct net *net = sock_net(skb->sk);
4317 struct rtnl_link *handlers; 4540 struct rtnl_link *link;
4541 struct module *owner;
4318 int err = -EOPNOTSUPP; 4542 int err = -EOPNOTSUPP;
4319 rtnl_doit_func doit; 4543 rtnl_doit_func doit;
4320 unsigned int flags; 4544 unsigned int flags;
@@ -4338,79 +4562,85 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
4338 if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN)) 4562 if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
4339 return -EPERM; 4563 return -EPERM;
4340 4564
4341 if (family >= ARRAY_SIZE(rtnl_msg_handlers))
4342 family = PF_UNSPEC;
4343
4344 rcu_read_lock(); 4565 rcu_read_lock();
4345 handlers = rcu_dereference(rtnl_msg_handlers[family]);
4346 if (!handlers) {
4347 family = PF_UNSPEC;
4348 handlers = rcu_dereference(rtnl_msg_handlers[family]);
4349 }
4350
4351 if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { 4566 if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
4352 struct sock *rtnl; 4567 struct sock *rtnl;
4353 rtnl_dumpit_func dumpit; 4568 rtnl_dumpit_func dumpit;
4354 u16 min_dump_alloc = 0; 4569 u16 min_dump_alloc = 0;
4355 4570
4356 dumpit = READ_ONCE(handlers[type].dumpit); 4571 link = rtnl_get_link(family, type);
4357 if (!dumpit) { 4572 if (!link || !link->dumpit) {
4358 family = PF_UNSPEC; 4573 family = PF_UNSPEC;
4359 handlers = rcu_dereference(rtnl_msg_handlers[PF_UNSPEC]); 4574 link = rtnl_get_link(family, type);
4360 if (!handlers) 4575 if (!link || !link->dumpit)
4361 goto err_unlock;
4362
4363 dumpit = READ_ONCE(handlers[type].dumpit);
4364 if (!dumpit)
4365 goto err_unlock; 4576 goto err_unlock;
4366 } 4577 }
4367 4578 owner = link->owner;
4368 refcount_inc(&rtnl_msg_handlers_ref[family]); 4579 dumpit = link->dumpit;
4369 4580
4370 if (type == RTM_GETLINK - RTM_BASE) 4581 if (type == RTM_GETLINK - RTM_BASE)
4371 min_dump_alloc = rtnl_calcit(skb, nlh); 4582 min_dump_alloc = rtnl_calcit(skb, nlh);
4372 4583
4584 err = 0;
4585 /* need to do this before rcu_read_unlock() */
4586 if (!try_module_get(owner))
4587 err = -EPROTONOSUPPORT;
4588
4373 rcu_read_unlock(); 4589 rcu_read_unlock();
4374 4590
4375 rtnl = net->rtnl; 4591 rtnl = net->rtnl;
4376 { 4592 if (err == 0) {
4377 struct netlink_dump_control c = { 4593 struct netlink_dump_control c = {
4378 .dump = dumpit, 4594 .dump = dumpit,
4379 .min_dump_alloc = min_dump_alloc, 4595 .min_dump_alloc = min_dump_alloc,
4596 .module = owner,
4380 }; 4597 };
4381 err = netlink_dump_start(rtnl, skb, nlh, &c); 4598 err = netlink_dump_start(rtnl, skb, nlh, &c);
4599 /* netlink_dump_start() will keep a reference on
4600 * module if dump is still in progress.
4601 */
4602 module_put(owner);
4382 } 4603 }
4383 refcount_dec(&rtnl_msg_handlers_ref[family]);
4384 return err; 4604 return err;
4385 } 4605 }
4386 4606
4387 doit = READ_ONCE(handlers[type].doit); 4607 link = rtnl_get_link(family, type);
4388 if (!doit) { 4608 if (!link || !link->doit) {
4389 family = PF_UNSPEC; 4609 family = PF_UNSPEC;
4390 handlers = rcu_dereference(rtnl_msg_handlers[family]); 4610 link = rtnl_get_link(PF_UNSPEC, type);
4611 if (!link || !link->doit)
4612 goto out_unlock;
4391 } 4613 }
4392 4614
4393 flags = READ_ONCE(handlers[type].flags); 4615 owner = link->owner;
4616 if (!try_module_get(owner)) {
4617 err = -EPROTONOSUPPORT;
4618 goto out_unlock;
4619 }
4620
4621 flags = link->flags;
4394 if (flags & RTNL_FLAG_DOIT_UNLOCKED) { 4622 if (flags & RTNL_FLAG_DOIT_UNLOCKED) {
4395 refcount_inc(&rtnl_msg_handlers_ref[family]); 4623 doit = link->doit;
4396 doit = READ_ONCE(handlers[type].doit);
4397 rcu_read_unlock(); 4624 rcu_read_unlock();
4398 if (doit) 4625 if (doit)
4399 err = doit(skb, nlh, extack); 4626 err = doit(skb, nlh, extack);
4400 refcount_dec(&rtnl_msg_handlers_ref[family]); 4627 module_put(owner);
4401 return err; 4628 return err;
4402 } 4629 }
4403
4404 rcu_read_unlock(); 4630 rcu_read_unlock();
4405 4631
4406 rtnl_lock(); 4632 rtnl_lock();
4407 handlers = rtnl_dereference(rtnl_msg_handlers[family]); 4633 link = rtnl_get_link(family, type);
4408 if (handlers) { 4634 if (link && link->doit)
4409 doit = READ_ONCE(handlers[type].doit); 4635 err = link->doit(skb, nlh, extack);
4410 if (doit)
4411 err = doit(skb, nlh, extack);
4412 }
4413 rtnl_unlock(); 4636 rtnl_unlock();
4637
4638 module_put(owner);
4639
4640 return err;
4641
4642out_unlock:
4643 rcu_read_unlock();
4414 return err; 4644 return err;
4415 4645
4416err_unlock: 4646err_unlock:
@@ -4454,7 +4684,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
4454 case NETDEV_CHANGELOWERSTATE: 4684 case NETDEV_CHANGELOWERSTATE:
4455 case NETDEV_CHANGE_TX_QUEUE_LEN: 4685 case NETDEV_CHANGE_TX_QUEUE_LEN:
4456 rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event), 4686 rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event),
4457 GFP_KERNEL, NULL); 4687 GFP_KERNEL, NULL, 0);
4458 break; 4688 break;
4459 default: 4689 default:
4460 break; 4690 break;
@@ -4498,11 +4728,6 @@ static struct pernet_operations rtnetlink_net_ops = {
4498 4728
4499void __init rtnetlink_init(void) 4729void __init rtnetlink_init(void)
4500{ 4730{
4501 int i;
4502
4503 for (i = 0; i < ARRAY_SIZE(rtnl_msg_handlers_ref); i++)
4504 refcount_set(&rtnl_msg_handlers_ref[i], 1);
4505
4506 if (register_pernet_subsys(&rtnetlink_net_ops)) 4731 if (register_pernet_subsys(&rtnetlink_net_ops))
4507 panic("rtnetlink_init: cannot initialize rtnetlink\n"); 4732 panic("rtnetlink_init: cannot initialize rtnetlink\n");
4508 4733
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 08f574081315..09bd89c90a71 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3656,6 +3656,10 @@ normal:
3656 skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & 3656 skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
3657 SKBTX_SHARED_FRAG; 3657 SKBTX_SHARED_FRAG;
3658 3658
3659 if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
3660 skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
3661 goto err;
3662
3659 while (pos < offset + len) { 3663 while (pos < offset + len) {
3660 if (i >= nfrags) { 3664 if (i >= nfrags) {
3661 BUG_ON(skb_headlen(list_skb)); 3665 BUG_ON(skb_headlen(list_skb));
@@ -3667,6 +3671,11 @@ normal:
3667 3671
3668 BUG_ON(!nfrags); 3672 BUG_ON(!nfrags);
3669 3673
3674 if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
3675 skb_zerocopy_clone(nskb, frag_skb,
3676 GFP_ATOMIC))
3677 goto err;
3678
3670 list_skb = list_skb->next; 3679 list_skb = list_skb->next;
3671 } 3680 }
3672 3681
@@ -3678,11 +3687,6 @@ normal:
3678 goto err; 3687 goto err;
3679 } 3688 }
3680 3689
3681 if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
3682 goto err;
3683 if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
3684 goto err;
3685
3686 *nskb_frag = *frag; 3690 *nskb_frag = *frag;
3687 __skb_frag_ref(nskb_frag); 3691 __skb_frag_ref(nskb_frag);
3688 size = skb_frag_size(nskb_frag); 3692 size = skb_frag_size(nskb_frag);
@@ -3890,10 +3894,12 @@ EXPORT_SYMBOL_GPL(skb_gro_receive);
3890 3894
3891void __init skb_init(void) 3895void __init skb_init(void)
3892{ 3896{
3893 skbuff_head_cache = kmem_cache_create("skbuff_head_cache", 3897 skbuff_head_cache = kmem_cache_create_usercopy("skbuff_head_cache",
3894 sizeof(struct sk_buff), 3898 sizeof(struct sk_buff),
3895 0, 3899 0,
3896 SLAB_HWCACHE_ALIGN|SLAB_PANIC, 3900 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
3901 offsetof(struct sk_buff, cb),
3902 sizeof_field(struct sk_buff, cb),
3897 NULL); 3903 NULL);
3898 skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", 3904 skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
3899 sizeof(struct sk_buff_fclones), 3905 sizeof(struct sk_buff_fclones),
@@ -4910,37 +4916,74 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
4910EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); 4916EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
4911 4917
4912/** 4918/**
4913 * skb_gso_validate_mtu - Return in case such skb fits a given MTU 4919 * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS
4914 * 4920 *
4915 * @skb: GSO skb 4921 * There are a couple of instances where we have a GSO skb, and we
4916 * @mtu: MTU to validate against 4922 * want to determine what size it would be after it is segmented.
4917 * 4923 *
4918 * skb_gso_validate_mtu validates if a given skb will fit a wanted MTU 4924 * We might want to check:
4919 * once split. 4925 * - L3+L4+payload size (e.g. IP forwarding)
4926 * - L2+L3+L4+payload size (e.g. sanity check before passing to driver)
4927 *
4928 * This is a helper to do that correctly considering GSO_BY_FRAGS.
4929 *
4930 * @seg_len: The segmented length (from skb_gso_*_seglen). In the
4931 * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
4932 *
4933 * @max_len: The maximum permissible length.
4934 *
4935 * Returns true if the segmented length <= max length.
4920 */ 4936 */
4921bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu) 4937static inline bool skb_gso_size_check(const struct sk_buff *skb,
4922{ 4938 unsigned int seg_len,
4939 unsigned int max_len) {
4923 const struct skb_shared_info *shinfo = skb_shinfo(skb); 4940 const struct skb_shared_info *shinfo = skb_shinfo(skb);
4924 const struct sk_buff *iter; 4941 const struct sk_buff *iter;
4925 unsigned int hlen;
4926
4927 hlen = skb_gso_network_seglen(skb);
4928 4942
4929 if (shinfo->gso_size != GSO_BY_FRAGS) 4943 if (shinfo->gso_size != GSO_BY_FRAGS)
4930 return hlen <= mtu; 4944 return seg_len <= max_len;
4931 4945
4932 /* Undo this so we can re-use header sizes */ 4946 /* Undo this so we can re-use header sizes */
4933 hlen -= GSO_BY_FRAGS; 4947 seg_len -= GSO_BY_FRAGS;
4934 4948
4935 skb_walk_frags(skb, iter) { 4949 skb_walk_frags(skb, iter) {
4936 if (hlen + skb_headlen(iter) > mtu) 4950 if (seg_len + skb_headlen(iter) > max_len)
4937 return false; 4951 return false;
4938 } 4952 }
4939 4953
4940 return true; 4954 return true;
4941} 4955}
4956
4957/**
4958 * skb_gso_validate_mtu - Return in case such skb fits a given MTU
4959 *
4960 * @skb: GSO skb
4961 * @mtu: MTU to validate against
4962 *
4963 * skb_gso_validate_mtu validates if a given skb will fit a wanted MTU
4964 * once split.
4965 */
4966bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu)
4967{
4968 return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu);
4969}
4942EXPORT_SYMBOL_GPL(skb_gso_validate_mtu); 4970EXPORT_SYMBOL_GPL(skb_gso_validate_mtu);
4943 4971
4972/**
4973 * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length?
4974 *
4975 * @skb: GSO skb
4976 * @len: length to validate against
4977 *
4978 * skb_gso_validate_mac_len validates if a given skb will fit a wanted
4979 * length once split, including L2, L3 and L4 headers and the payload.
4980 */
4981bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len)
4982{
4983 return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len);
4984}
4985EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
4986
4944static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) 4987static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
4945{ 4988{
4946 if (skb_cow(skb, skb_headroom(skb)) < 0) { 4989 if (skb_cow(skb, skb_headroom(skb)) < 0) {
diff --git a/net/core/sock.c b/net/core/sock.c
index c0b5b2f17412..c501499a04fe 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -145,6 +145,8 @@
145static DEFINE_MUTEX(proto_list_mutex); 145static DEFINE_MUTEX(proto_list_mutex);
146static LIST_HEAD(proto_list); 146static LIST_HEAD(proto_list);
147 147
148static void sock_inuse_add(struct net *net, int val);
149
148/** 150/**
149 * sk_ns_capable - General socket capability test 151 * sk_ns_capable - General socket capability test
150 * @sk: Socket to use a capability on or through 152 * @sk: Socket to use a capability on or through
@@ -1531,8 +1533,11 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1531 sk->sk_kern_sock = kern; 1533 sk->sk_kern_sock = kern;
1532 sock_lock_init(sk); 1534 sock_lock_init(sk);
1533 sk->sk_net_refcnt = kern ? 0 : 1; 1535 sk->sk_net_refcnt = kern ? 0 : 1;
1534 if (likely(sk->sk_net_refcnt)) 1536 if (likely(sk->sk_net_refcnt)) {
1535 get_net(net); 1537 get_net(net);
1538 sock_inuse_add(net, 1);
1539 }
1540
1536 sock_net_set(sk, net); 1541 sock_net_set(sk, net);
1537 refcount_set(&sk->sk_wmem_alloc, 1); 1542 refcount_set(&sk->sk_wmem_alloc, 1);
1538 1543
@@ -1595,6 +1600,9 @@ void sk_destruct(struct sock *sk)
1595 1600
1596static void __sk_free(struct sock *sk) 1601static void __sk_free(struct sock *sk)
1597{ 1602{
1603 if (likely(sk->sk_net_refcnt))
1604 sock_inuse_add(sock_net(sk), -1);
1605
1598 if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) 1606 if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
1599 sock_diag_broadcast_destroy(sk); 1607 sock_diag_broadcast_destroy(sk);
1600 else 1608 else
@@ -1675,16 +1683,13 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1675 newsk->sk_dst_pending_confirm = 0; 1683 newsk->sk_dst_pending_confirm = 0;
1676 newsk->sk_wmem_queued = 0; 1684 newsk->sk_wmem_queued = 0;
1677 newsk->sk_forward_alloc = 0; 1685 newsk->sk_forward_alloc = 0;
1678
1679 /* sk->sk_memcg will be populated at accept() time */
1680 newsk->sk_memcg = NULL;
1681
1682 atomic_set(&newsk->sk_drops, 0); 1686 atomic_set(&newsk->sk_drops, 0);
1683 newsk->sk_send_head = NULL; 1687 newsk->sk_send_head = NULL;
1684 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; 1688 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1685 atomic_set(&newsk->sk_zckey, 0); 1689 atomic_set(&newsk->sk_zckey, 0);
1686 1690
1687 sock_reset_flag(newsk, SOCK_DONE); 1691 sock_reset_flag(newsk, SOCK_DONE);
1692 mem_cgroup_sk_alloc(newsk);
1688 cgroup_sk_alloc(&newsk->sk_cgrp_data); 1693 cgroup_sk_alloc(&newsk->sk_cgrp_data);
1689 1694
1690 rcu_read_lock(); 1695 rcu_read_lock();
@@ -1716,6 +1721,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1716 newsk->sk_priority = 0; 1721 newsk->sk_priority = 0;
1717 newsk->sk_incoming_cpu = raw_smp_processor_id(); 1722 newsk->sk_incoming_cpu = raw_smp_processor_id();
1718 atomic64_set(&newsk->sk_cookie, 0); 1723 atomic64_set(&newsk->sk_cookie, 0);
1724 if (likely(newsk->sk_net_refcnt))
1725 sock_inuse_add(sock_net(newsk), 1);
1719 1726
1720 /* 1727 /*
1721 * Before updating sk_refcnt, we must commit prior changes to memory 1728 * Before updating sk_refcnt, we must commit prior changes to memory
@@ -2496,7 +2503,7 @@ int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2496} 2503}
2497EXPORT_SYMBOL(sock_no_getname); 2504EXPORT_SYMBOL(sock_no_getname);
2498 2505
2499unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt) 2506__poll_t sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
2500{ 2507{
2501 return 0; 2508 return 0;
2502} 2509}
@@ -2612,7 +2619,7 @@ static void sock_def_error_report(struct sock *sk)
2612 rcu_read_lock(); 2619 rcu_read_lock();
2613 wq = rcu_dereference(sk->sk_wq); 2620 wq = rcu_dereference(sk->sk_wq);
2614 if (skwq_has_sleeper(wq)) 2621 if (skwq_has_sleeper(wq))
2615 wake_up_interruptible_poll(&wq->wait, POLLERR); 2622 wake_up_interruptible_poll(&wq->wait, EPOLLERR);
2616 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); 2623 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
2617 rcu_read_unlock(); 2624 rcu_read_unlock();
2618} 2625}
@@ -2624,8 +2631,8 @@ static void sock_def_readable(struct sock *sk)
2624 rcu_read_lock(); 2631 rcu_read_lock();
2625 wq = rcu_dereference(sk->sk_wq); 2632 wq = rcu_dereference(sk->sk_wq);
2626 if (skwq_has_sleeper(wq)) 2633 if (skwq_has_sleeper(wq))
2627 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI | 2634 wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
2628 POLLRDNORM | POLLRDBAND); 2635 EPOLLRDNORM | EPOLLRDBAND);
2629 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); 2636 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
2630 rcu_read_unlock(); 2637 rcu_read_unlock();
2631} 2638}
@@ -2642,8 +2649,8 @@ static void sock_def_write_space(struct sock *sk)
2642 if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { 2649 if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
2643 wq = rcu_dereference(sk->sk_wq); 2650 wq = rcu_dereference(sk->sk_wq);
2644 if (skwq_has_sleeper(wq)) 2651 if (skwq_has_sleeper(wq))
2645 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | 2652 wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
2646 POLLWRNORM | POLLWRBAND); 2653 EPOLLWRNORM | EPOLLWRBAND);
2647 2654
2648 /* Should agree with poll, otherwise some programs break */ 2655 /* Should agree with poll, otherwise some programs break */
2649 if (sock_writeable(sk)) 2656 if (sock_writeable(sk))
@@ -3045,7 +3052,7 @@ static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
3045 3052
3046void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 3053void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
3047{ 3054{
3048 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val); 3055 __this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val);
3049} 3056}
3050EXPORT_SYMBOL_GPL(sock_prot_inuse_add); 3057EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
3051 3058
@@ -3055,21 +3062,50 @@ int sock_prot_inuse_get(struct net *net, struct proto *prot)
3055 int res = 0; 3062 int res = 0;
3056 3063
3057 for_each_possible_cpu(cpu) 3064 for_each_possible_cpu(cpu)
3058 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx]; 3065 res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
3059 3066
3060 return res >= 0 ? res : 0; 3067 return res >= 0 ? res : 0;
3061} 3068}
3062EXPORT_SYMBOL_GPL(sock_prot_inuse_get); 3069EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
3063 3070
3071static void sock_inuse_add(struct net *net, int val)
3072{
3073 this_cpu_add(*net->core.sock_inuse, val);
3074}
3075
3076int sock_inuse_get(struct net *net)
3077{
3078 int cpu, res = 0;
3079
3080 for_each_possible_cpu(cpu)
3081 res += *per_cpu_ptr(net->core.sock_inuse, cpu);
3082
3083 return res;
3084}
3085
3086EXPORT_SYMBOL_GPL(sock_inuse_get);
3087
3064static int __net_init sock_inuse_init_net(struct net *net) 3088static int __net_init sock_inuse_init_net(struct net *net)
3065{ 3089{
3066 net->core.inuse = alloc_percpu(struct prot_inuse); 3090 net->core.prot_inuse = alloc_percpu(struct prot_inuse);
3067 return net->core.inuse ? 0 : -ENOMEM; 3091 if (net->core.prot_inuse == NULL)
3092 return -ENOMEM;
3093
3094 net->core.sock_inuse = alloc_percpu(int);
3095 if (net->core.sock_inuse == NULL)
3096 goto out;
3097
3098 return 0;
3099
3100out:
3101 free_percpu(net->core.prot_inuse);
3102 return -ENOMEM;
3068} 3103}
3069 3104
3070static void __net_exit sock_inuse_exit_net(struct net *net) 3105static void __net_exit sock_inuse_exit_net(struct net *net)
3071{ 3106{
3072 free_percpu(net->core.inuse); 3107 free_percpu(net->core.prot_inuse);
3108 free_percpu(net->core.sock_inuse);
3073} 3109}
3074 3110
3075static struct pernet_operations net_inuse_ops = { 3111static struct pernet_operations net_inuse_ops = {
@@ -3112,6 +3148,10 @@ static inline void assign_proto_idx(struct proto *prot)
3112static inline void release_proto_idx(struct proto *prot) 3148static inline void release_proto_idx(struct proto *prot)
3113{ 3149{
3114} 3150}
3151
3152static void sock_inuse_add(struct net *net, int val)
3153{
3154}
3115#endif 3155#endif
3116 3156
3117static void req_prot_cleanup(struct request_sock_ops *rsk_prot) 3157static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
@@ -3151,8 +3191,10 @@ static int req_prot_init(const struct proto *prot)
3151int proto_register(struct proto *prot, int alloc_slab) 3191int proto_register(struct proto *prot, int alloc_slab)
3152{ 3192{
3153 if (alloc_slab) { 3193 if (alloc_slab) {
3154 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, 3194 prot->slab = kmem_cache_create_usercopy(prot->name,
3195 prot->obj_size, 0,
3155 SLAB_HWCACHE_ALIGN | prot->slab_flags, 3196 SLAB_HWCACHE_ALIGN | prot->slab_flags,
3197 prot->useroffset, prot->usersize,
3156 NULL); 3198 NULL);
3157 3199
3158 if (prot->slab == NULL) { 3200 if (prot->slab == NULL) {
@@ -3319,7 +3361,6 @@ static int proto_seq_open(struct inode *inode, struct file *file)
3319} 3361}
3320 3362
3321static const struct file_operations proto_seq_fops = { 3363static const struct file_operations proto_seq_fops = {
3322 .owner = THIS_MODULE,
3323 .open = proto_seq_open, 3364 .open = proto_seq_open,
3324 .read = seq_read, 3365 .read = seq_read,
3325 .llseek = seq_lseek, 3366 .llseek = seq_lseek,
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 5eeb1d20cc38..064acb04be0f 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -94,6 +94,16 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
94 return more_reuse; 94 return more_reuse;
95} 95}
96 96
97static void reuseport_free_rcu(struct rcu_head *head)
98{
99 struct sock_reuseport *reuse;
100
101 reuse = container_of(head, struct sock_reuseport, rcu);
102 if (reuse->prog)
103 bpf_prog_destroy(reuse->prog);
104 kfree(reuse);
105}
106
97/** 107/**
98 * reuseport_add_sock - Add a socket to the reuseport group of another. 108 * reuseport_add_sock - Add a socket to the reuseport group of another.
99 * @sk: New socket to add to the group. 109 * @sk: New socket to add to the group.
@@ -102,7 +112,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
102 */ 112 */
103int reuseport_add_sock(struct sock *sk, struct sock *sk2) 113int reuseport_add_sock(struct sock *sk, struct sock *sk2)
104{ 114{
105 struct sock_reuseport *reuse; 115 struct sock_reuseport *old_reuse, *reuse;
106 116
107 if (!rcu_access_pointer(sk2->sk_reuseport_cb)) { 117 if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
108 int err = reuseport_alloc(sk2); 118 int err = reuseport_alloc(sk2);
@@ -113,10 +123,13 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2)
113 123
114 spin_lock_bh(&reuseport_lock); 124 spin_lock_bh(&reuseport_lock);
115 reuse = rcu_dereference_protected(sk2->sk_reuseport_cb, 125 reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
116 lockdep_is_held(&reuseport_lock)), 126 lockdep_is_held(&reuseport_lock));
117 WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, 127 old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
118 lockdep_is_held(&reuseport_lock)), 128 lockdep_is_held(&reuseport_lock));
119 "socket already in reuseport group"); 129 if (old_reuse && old_reuse->num_socks != 1) {
130 spin_unlock_bh(&reuseport_lock);
131 return -EBUSY;
132 }
120 133
121 if (reuse->num_socks == reuse->max_socks) { 134 if (reuse->num_socks == reuse->max_socks) {
122 reuse = reuseport_grow(reuse); 135 reuse = reuseport_grow(reuse);
@@ -134,19 +147,11 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2)
134 147
135 spin_unlock_bh(&reuseport_lock); 148 spin_unlock_bh(&reuseport_lock);
136 149
150 if (old_reuse)
151 call_rcu(&old_reuse->rcu, reuseport_free_rcu);
137 return 0; 152 return 0;
138} 153}
139 154
140static void reuseport_free_rcu(struct rcu_head *head)
141{
142 struct sock_reuseport *reuse;
143
144 reuse = container_of(head, struct sock_reuseport, rcu);
145 if (reuse->prog)
146 bpf_prog_destroy(reuse->prog);
147 kfree(reuse);
148}
149
150void reuseport_detach_sock(struct sock *sk) 155void reuseport_detach_sock(struct sock *sk)
151{ 156{
152 struct sock_reuseport *reuse; 157 struct sock_reuseport *reuse;
@@ -235,7 +240,9 @@ struct sock *reuseport_select_sock(struct sock *sk,
235 240
236 if (prog && skb) 241 if (prog && skb)
237 sk2 = run_bpf(reuse, socks, prog, skb, hdr_len); 242 sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
238 else 243
244 /* no bpf or invalid bpf result: fall back to hash usage */
245 if (!sk2)
239 sk2 = reuse->socks[reciprocal_scale(hash, socks)]; 246 sk2 = reuse->socks[reciprocal_scale(hash, socks)];
240 } 247 }
241 248
diff --git a/net/core/stream.c b/net/core/stream.c
index 1cff9c6270c6..7d329fb1f553 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -38,8 +38,8 @@ void sk_stream_write_space(struct sock *sk)
38 rcu_read_lock(); 38 rcu_read_lock();
39 wq = rcu_dereference(sk->sk_wq); 39 wq = rcu_dereference(sk->sk_wq);
40 if (skwq_has_sleeper(wq)) 40 if (skwq_has_sleeper(wq))
41 wake_up_interruptible_poll(&wq->wait, POLLOUT | 41 wake_up_interruptible_poll(&wq->wait, EPOLLOUT |
42 POLLWRNORM | POLLWRBAND); 42 EPOLLWRNORM | EPOLLWRBAND);
43 if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) 43 if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
44 sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT); 44 sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
45 rcu_read_unlock(); 45 rcu_read_unlock();
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index a47ad6cd41c0..f2d0462611c3 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -25,6 +25,7 @@
25 25
26static int zero = 0; 26static int zero = 0;
27static int one = 1; 27static int one = 1;
28static int two __maybe_unused = 2;
28static int min_sndbuf = SOCK_MIN_SNDBUF; 29static int min_sndbuf = SOCK_MIN_SNDBUF;
29static int min_rcvbuf = SOCK_MIN_RCVBUF; 30static int min_rcvbuf = SOCK_MIN_RCVBUF;
30static int max_skb_frags = MAX_SKB_FRAGS; 31static int max_skb_frags = MAX_SKB_FRAGS;
@@ -250,6 +251,46 @@ static int proc_do_rss_key(struct ctl_table *table, int write,
250 return proc_dostring(&fake_table, write, buffer, lenp, ppos); 251 return proc_dostring(&fake_table, write, buffer, lenp, ppos);
251} 252}
252 253
254#ifdef CONFIG_BPF_JIT
255static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
256 void __user *buffer, size_t *lenp,
257 loff_t *ppos)
258{
259 int ret, jit_enable = *(int *)table->data;
260 struct ctl_table tmp = *table;
261
262 if (write && !capable(CAP_SYS_ADMIN))
263 return -EPERM;
264
265 tmp.data = &jit_enable;
266 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
267 if (write && !ret) {
268 if (jit_enable < 2 ||
269 (jit_enable == 2 && bpf_dump_raw_ok())) {
270 *(int *)table->data = jit_enable;
271 if (jit_enable == 2)
272 pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n");
273 } else {
274 ret = -EPERM;
275 }
276 }
277 return ret;
278}
279
280# ifdef CONFIG_HAVE_EBPF_JIT
281static int
282proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
283 void __user *buffer, size_t *lenp,
284 loff_t *ppos)
285{
286 if (!capable(CAP_SYS_ADMIN))
287 return -EPERM;
288
289 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
290}
291# endif
292#endif
293
253static struct ctl_table net_core_table[] = { 294static struct ctl_table net_core_table[] = {
254#ifdef CONFIG_NET 295#ifdef CONFIG_NET
255 { 296 {
@@ -325,13 +366,14 @@ static struct ctl_table net_core_table[] = {
325 .data = &bpf_jit_enable, 366 .data = &bpf_jit_enable,
326 .maxlen = sizeof(int), 367 .maxlen = sizeof(int),
327 .mode = 0644, 368 .mode = 0644,
328#ifndef CONFIG_BPF_JIT_ALWAYS_ON 369 .proc_handler = proc_dointvec_minmax_bpf_enable,
329 .proc_handler = proc_dointvec 370# ifdef CONFIG_BPF_JIT_ALWAYS_ON
330#else
331 .proc_handler = proc_dointvec_minmax,
332 .extra1 = &one, 371 .extra1 = &one,
333 .extra2 = &one, 372 .extra2 = &one,
334#endif 373# else
374 .extra1 = &zero,
375 .extra2 = &two,
376# endif
335 }, 377 },
336# ifdef CONFIG_HAVE_EBPF_JIT 378# ifdef CONFIG_HAVE_EBPF_JIT
337 { 379 {
@@ -339,14 +381,18 @@ static struct ctl_table net_core_table[] = {
339 .data = &bpf_jit_harden, 381 .data = &bpf_jit_harden,
340 .maxlen = sizeof(int), 382 .maxlen = sizeof(int),
341 .mode = 0600, 383 .mode = 0600,
342 .proc_handler = proc_dointvec, 384 .proc_handler = proc_dointvec_minmax_bpf_restricted,
385 .extra1 = &zero,
386 .extra2 = &two,
343 }, 387 },
344 { 388 {
345 .procname = "bpf_jit_kallsyms", 389 .procname = "bpf_jit_kallsyms",
346 .data = &bpf_jit_kallsyms, 390 .data = &bpf_jit_kallsyms,
347 .maxlen = sizeof(int), 391 .maxlen = sizeof(int),
348 .mode = 0600, 392 .mode = 0600,
349 .proc_handler = proc_dointvec, 393 .proc_handler = proc_dointvec_minmax_bpf_restricted,
394 .extra1 = &zero,
395 .extra2 = &one,
350 }, 396 },
351# endif 397# endif
352#endif 398#endif
diff --git a/net/core/xdp.c b/net/core/xdp.c
new file mode 100644
index 000000000000..097a0f74e004
--- /dev/null
+++ b/net/core/xdp.c
@@ -0,0 +1,73 @@
1/* net/core/xdp.c
2 *
3 * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
4 * Released under terms in GPL version 2. See COPYING.
5 */
6#include <linux/types.h>
7#include <linux/mm.h>
8
9#include <net/xdp.h>
10
11#define REG_STATE_NEW 0x0
12#define REG_STATE_REGISTERED 0x1
13#define REG_STATE_UNREGISTERED 0x2
14#define REG_STATE_UNUSED 0x3
15
16void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
17{
18 /* Simplify driver cleanup code paths, allow unreg "unused" */
19 if (xdp_rxq->reg_state == REG_STATE_UNUSED)
20 return;
21
22 WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
23
24 xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
25 xdp_rxq->dev = NULL;
26}
27EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
28
29static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq)
30{
31 memset(xdp_rxq, 0, sizeof(*xdp_rxq));
32}
33
34/* Returns 0 on success, negative on failure */
35int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
36 struct net_device *dev, u32 queue_index)
37{
38 if (xdp_rxq->reg_state == REG_STATE_UNUSED) {
39 WARN(1, "Driver promised not to register this");
40 return -EINVAL;
41 }
42
43 if (xdp_rxq->reg_state == REG_STATE_REGISTERED) {
44 WARN(1, "Missing unregister, handled but fix driver");
45 xdp_rxq_info_unreg(xdp_rxq);
46 }
47
48 if (!dev) {
49 WARN(1, "Missing net_device from driver");
50 return -ENODEV;
51 }
52
53 /* State either UNREGISTERED or NEW */
54 xdp_rxq_info_init(xdp_rxq);
55 xdp_rxq->dev = dev;
56 xdp_rxq->queue_index = queue_index;
57
58 xdp_rxq->reg_state = REG_STATE_REGISTERED;
59 return 0;
60}
61EXPORT_SYMBOL_GPL(xdp_rxq_info_reg);
62
63void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
64{
65 xdp_rxq->reg_state = REG_STATE_UNUSED;
66}
67EXPORT_SYMBOL_GPL(xdp_rxq_info_unused);
68
69bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq)
70{
71 return (xdp_rxq->reg_state == REG_STATE_REGISTERED);
72}
73EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg);
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 8c0ef71bed2f..b270e84d9c13 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -39,23 +39,6 @@ config IP_DCCP_DEBUG
39 39
40 Just say N. 40 Just say N.
41 41
42config NET_DCCPPROBE
43 tristate "DCCP connection probing"
44 depends on PROC_FS && KPROBES
45 ---help---
46 This module allows for capturing the changes to DCCP connection
47 state in response to incoming packets. It is used for debugging
48 DCCP congestion avoidance modules. If you don't understand
49 what was just said, you don't need it: say N.
50
51 Documentation on how to use DCCP connection probing can be found
52 at:
53
54 http://www.linuxfoundation.org/collaborate/workgroups/networking/dccpprobe
55
56 To compile this code as a module, choose M here: the
57 module will be called dccp_probe.
58
59 42
60endmenu 43endmenu
61 44
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 2e7b56097bc4..5b4ff37bc806 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -21,9 +21,10 @@ obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o
21dccp_ipv6-y := ipv6.o 21dccp_ipv6-y := ipv6.o
22 22
23obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o 23obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
24obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o
25 24
26dccp-$(CONFIG_SYSCTL) += sysctl.o 25dccp-$(CONFIG_SYSCTL) += sysctl.o
27 26
28dccp_diag-y := diag.o 27dccp_diag-y := diag.o
29dccp_probe-y := probe.o 28
29# build with local directory for trace.h
30CFLAGS_proto.o := -I$(src)
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 3de0d0362d7f..2a24f7d171a5 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -228,7 +228,7 @@ static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets,
228 } 228 }
229 229
230 if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) { 230 if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) {
231 DCCP_CRIT("Ack Vector buffer overflow: dropping old entries\n"); 231 DCCP_CRIT("Ack Vector buffer overflow: dropping old entries");
232 av->av_overflow = true; 232 av->av_overflow = true;
233 } 233 }
234 234
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 0c55ffb859bf..f91e3816806b 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -316,7 +316,7 @@ int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
316 int flags, int *addr_len); 316 int flags, int *addr_len);
317void dccp_shutdown(struct sock *sk, int how); 317void dccp_shutdown(struct sock *sk, int how);
318int inet_dccp_listen(struct socket *sock, int backlog); 318int inet_dccp_listen(struct socket *sock, int backlog);
319unsigned int dccp_poll(struct file *file, struct socket *sock, 319__poll_t dccp_poll(struct file *file, struct socket *sock,
320 poll_table *wait); 320 poll_table *wait);
321int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); 321int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
322void dccp_req_err(struct sock *sk, u64 seq); 322void dccp_req_err(struct sock *sk, u64 seq);
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 178bb9833311..37ccbe62eb1a 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -63,9 +63,10 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
63 */ 63 */
64 local_bh_disable(); 64 local_bh_disable();
65 inet_twsk_schedule(tw, timeo); 65 inet_twsk_schedule(tw, timeo);
66 /* Linkage updates. */ 66 /* Linkage updates.
67 __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); 67 * Note that access to tw after this point is illegal.
68 inet_twsk_put(tw); 68 */
69 inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
69 local_bh_enable(); 70 local_bh_enable();
70 } else { 71 } else {
71 /* Sorry, if we're out of memory, just CLOSE this 72 /* Sorry, if we're out of memory, just CLOSE this
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
deleted file mode 100644
index 3d3fda05b32d..000000000000
--- a/net/dccp/probe.c
+++ /dev/null
@@ -1,203 +0,0 @@
1/*
2 * dccp_probe - Observe the DCCP flow with kprobes.
3 *
4 * The idea for this came from Werner Almesberger's umlsim
5 * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
6 *
7 * Modified for DCCP from Stephen Hemminger's code
8 * Copyright (C) 2006, Ian McDonald <ian.mcdonald@jandi.co.nz>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25#include <linux/kernel.h>
26#include <linux/kprobes.h>
27#include <linux/socket.h>
28#include <linux/dccp.h>
29#include <linux/proc_fs.h>
30#include <linux/module.h>
31#include <linux/kfifo.h>
32#include <linux/vmalloc.h>
33#include <linux/time64.h>
34#include <linux/gfp.h>
35#include <net/net_namespace.h>
36
37#include "dccp.h"
38#include "ccid.h"
39#include "ccids/ccid3.h"
40
41static int port;
42
43static int bufsize = 64 * 1024;
44
45static const char procname[] = "dccpprobe";
46
47static struct {
48 struct kfifo fifo;
49 spinlock_t lock;
50 wait_queue_head_t wait;
51 struct timespec64 tstart;
52} dccpw;
53
54static void printl(const char *fmt, ...)
55{
56 va_list args;
57 int len;
58 struct timespec64 now;
59 char tbuf[256];
60
61 va_start(args, fmt);
62 getnstimeofday64(&now);
63
64 now = timespec64_sub(now, dccpw.tstart);
65
66 len = sprintf(tbuf, "%lu.%06lu ",
67 (unsigned long) now.tv_sec,
68 (unsigned long) now.tv_nsec / NSEC_PER_USEC);
69 len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
70 va_end(args);
71
72 kfifo_in_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
73 wake_up(&dccpw.wait);
74}
75
76static int jdccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
77{
78 const struct inet_sock *inet = inet_sk(sk);
79 struct ccid3_hc_tx_sock *hc = NULL;
80
81 if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3)
82 hc = ccid3_hc_tx_sk(sk);
83
84 if (port == 0 || ntohs(inet->inet_dport) == port ||
85 ntohs(inet->inet_sport) == port) {
86 if (hc)
87 printl("%pI4:%u %pI4:%u %d %d %d %d %u %llu %llu %d\n",
88 &inet->inet_saddr, ntohs(inet->inet_sport),
89 &inet->inet_daddr, ntohs(inet->inet_dport), size,
90 hc->tx_s, hc->tx_rtt, hc->tx_p,
91 hc->tx_x_calc, hc->tx_x_recv >> 6,
92 hc->tx_x >> 6, hc->tx_t_ipi);
93 else
94 printl("%pI4:%u %pI4:%u %d\n",
95 &inet->inet_saddr, ntohs(inet->inet_sport),
96 &inet->inet_daddr, ntohs(inet->inet_dport),
97 size);
98 }
99
100 jprobe_return();
101 return 0;
102}
103
104static struct jprobe dccp_send_probe = {
105 .kp = {
106 .symbol_name = "dccp_sendmsg",
107 },
108 .entry = jdccp_sendmsg,
109};
110
111static int dccpprobe_open(struct inode *inode, struct file *file)
112{
113 kfifo_reset(&dccpw.fifo);
114 getnstimeofday64(&dccpw.tstart);
115 return 0;
116}
117
118static ssize_t dccpprobe_read(struct file *file, char __user *buf,
119 size_t len, loff_t *ppos)
120{
121 int error = 0, cnt = 0;
122 unsigned char *tbuf;
123
124 if (!buf)
125 return -EINVAL;
126
127 if (len == 0)
128 return 0;
129
130 tbuf = vmalloc(len);
131 if (!tbuf)
132 return -ENOMEM;
133
134 error = wait_event_interruptible(dccpw.wait,
135 kfifo_len(&dccpw.fifo) != 0);
136 if (error)
137 goto out_free;
138
139 cnt = kfifo_out_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
140 error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
141
142out_free:
143 vfree(tbuf);
144
145 return error ? error : cnt;
146}
147
148static const struct file_operations dccpprobe_fops = {
149 .owner = THIS_MODULE,
150 .open = dccpprobe_open,
151 .read = dccpprobe_read,
152 .llseek = noop_llseek,
153};
154
155static __init int dccpprobe_init(void)
156{
157 int ret = -ENOMEM;
158
159 init_waitqueue_head(&dccpw.wait);
160 spin_lock_init(&dccpw.lock);
161 if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL))
162 return ret;
163 if (!proc_create(procname, S_IRUSR, init_net.proc_net, &dccpprobe_fops))
164 goto err0;
165
166 ret = register_jprobe(&dccp_send_probe);
167 if (ret) {
168 ret = request_module("dccp");
169 if (!ret)
170 ret = register_jprobe(&dccp_send_probe);
171 }
172
173 if (ret)
174 goto err1;
175
176 pr_info("DCCP watch registered (port=%d)\n", port);
177 return 0;
178err1:
179 remove_proc_entry(procname, init_net.proc_net);
180err0:
181 kfifo_free(&dccpw.fifo);
182 return ret;
183}
184module_init(dccpprobe_init);
185
186static __exit void dccpprobe_exit(void)
187{
188 kfifo_free(&dccpw.fifo);
189 remove_proc_entry(procname, init_net.proc_net);
190 unregister_jprobe(&dccp_send_probe);
191
192}
193module_exit(dccpprobe_exit);
194
195MODULE_PARM_DESC(port, "Port to match (0=all)");
196module_param(port, int, 0);
197
198MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
199module_param(bufsize, int, 0);
200
201MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>");
202MODULE_DESCRIPTION("DCCP snooper");
203MODULE_LICENSE("GPL");
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 9d43c1f40274..15bdc002d90c 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -38,6 +38,9 @@
38#include "dccp.h" 38#include "dccp.h"
39#include "feat.h" 39#include "feat.h"
40 40
41#define CREATE_TRACE_POINTS
42#include "trace.h"
43
41DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; 44DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42 45
43EXPORT_SYMBOL_GPL(dccp_statistics); 46EXPORT_SYMBOL_GPL(dccp_statistics);
@@ -110,7 +113,7 @@ void dccp_set_state(struct sock *sk, const int state)
110 /* Change state AFTER socket is unhashed to avoid closed 113 /* Change state AFTER socket is unhashed to avoid closed
111 * socket sitting in hash tables. 114 * socket sitting in hash tables.
112 */ 115 */
113 sk->sk_state = state; 116 inet_sk_set_state(sk, state);
114} 117}
115 118
116EXPORT_SYMBOL_GPL(dccp_set_state); 119EXPORT_SYMBOL_GPL(dccp_set_state);
@@ -318,10 +321,10 @@ EXPORT_SYMBOL_GPL(dccp_disconnect);
318 * take care of normal races (between the test and the event) and we don't 321 * take care of normal races (between the test and the event) and we don't
319 * go look at any of the socket buffers directly. 322 * go look at any of the socket buffers directly.
320 */ 323 */
321unsigned int dccp_poll(struct file *file, struct socket *sock, 324__poll_t dccp_poll(struct file *file, struct socket *sock,
322 poll_table *wait) 325 poll_table *wait)
323{ 326{
324 unsigned int mask; 327 __poll_t mask;
325 struct sock *sk = sock->sk; 328 struct sock *sk = sock->sk;
326 329
327 sock_poll_wait(file, sk_sleep(sk), wait); 330 sock_poll_wait(file, sk_sleep(sk), wait);
@@ -335,21 +338,21 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
335 338
336 mask = 0; 339 mask = 0;
337 if (sk->sk_err) 340 if (sk->sk_err)
338 mask = POLLERR; 341 mask = EPOLLERR;
339 342
340 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED) 343 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
341 mask |= POLLHUP; 344 mask |= EPOLLHUP;
342 if (sk->sk_shutdown & RCV_SHUTDOWN) 345 if (sk->sk_shutdown & RCV_SHUTDOWN)
343 mask |= POLLIN | POLLRDNORM | POLLRDHUP; 346 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
344 347
345 /* Connected? */ 348 /* Connected? */
346 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { 349 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
347 if (atomic_read(&sk->sk_rmem_alloc) > 0) 350 if (atomic_read(&sk->sk_rmem_alloc) > 0)
348 mask |= POLLIN | POLLRDNORM; 351 mask |= EPOLLIN | EPOLLRDNORM;
349 352
350 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { 353 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
351 if (sk_stream_is_writeable(sk)) { 354 if (sk_stream_is_writeable(sk)) {
352 mask |= POLLOUT | POLLWRNORM; 355 mask |= EPOLLOUT | EPOLLWRNORM;
353 } else { /* send SIGIO later */ 356 } else { /* send SIGIO later */
354 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 357 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
355 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 358 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
@@ -359,7 +362,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
359 * IO signal will be lost. 362 * IO signal will be lost.
360 */ 363 */
361 if (sk_stream_is_writeable(sk)) 364 if (sk_stream_is_writeable(sk))
362 mask |= POLLOUT | POLLWRNORM; 365 mask |= EPOLLOUT | EPOLLWRNORM;
363 } 366 }
364 } 367 }
365 } 368 }
@@ -761,6 +764,8 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
761 int rc, size; 764 int rc, size;
762 long timeo; 765 long timeo;
763 766
767 trace_dccp_probe(sk, len);
768
764 if (len > dp->dccps_mss_cache) 769 if (len > dp->dccps_mss_cache)
765 return -EMSGSIZE; 770 return -EMSGSIZE;
766 771
diff --git a/net/dccp/trace.h b/net/dccp/trace.h
new file mode 100644
index 000000000000..5062421beee9
--- /dev/null
+++ b/net/dccp/trace.h
@@ -0,0 +1,84 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#undef TRACE_SYSTEM
3#define TRACE_SYSTEM dccp
4
5#if !defined(_TRACE_DCCP_H) || defined(TRACE_HEADER_MULTI_READ)
6#define _TRACE_DCCP_H
7
8#include <net/sock.h>
9#include "dccp.h"
10#include "ccids/ccid3.h"
11#include <linux/tracepoint.h>
12#include <trace/events/net_probe_common.h>
13
14TRACE_EVENT(dccp_probe,
15
16 TP_PROTO(struct sock *sk, size_t size),
17
18 TP_ARGS(sk, size),
19
20 TP_STRUCT__entry(
21 /* sockaddr_in6 is always bigger than sockaddr_in */
22 __array(__u8, saddr, sizeof(struct sockaddr_in6))
23 __array(__u8, daddr, sizeof(struct sockaddr_in6))
24 __field(__u16, sport)
25 __field(__u16, dport)
26 __field(__u16, size)
27 __field(__u16, tx_s)
28 __field(__u32, tx_rtt)
29 __field(__u32, tx_p)
30 __field(__u32, tx_x_calc)
31 __field(__u64, tx_x_recv)
32 __field(__u64, tx_x)
33 __field(__u32, tx_t_ipi)
34 ),
35
36 TP_fast_assign(
37 const struct inet_sock *inet = inet_sk(sk);
38 struct ccid3_hc_tx_sock *hc = NULL;
39
40 if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3)
41 hc = ccid3_hc_tx_sk(sk);
42
43 memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
44 memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
45
46 TP_STORE_ADDR_PORTS(__entry, inet, sk);
47
48 /* For filtering use */
49 __entry->sport = ntohs(inet->inet_sport);
50 __entry->dport = ntohs(inet->inet_dport);
51
52 __entry->size = size;
53 if (hc) {
54 __entry->tx_s = hc->tx_s;
55 __entry->tx_rtt = hc->tx_rtt;
56 __entry->tx_p = hc->tx_p;
57 __entry->tx_x_calc = hc->tx_x_calc;
58 __entry->tx_x_recv = hc->tx_x_recv >> 6;
59 __entry->tx_x = hc->tx_x >> 6;
60 __entry->tx_t_ipi = hc->tx_t_ipi;
61 } else {
62 __entry->tx_s = 0;
63 memset(&__entry->tx_rtt, 0, (void *)&__entry->tx_t_ipi -
64 (void *)&__entry->tx_rtt +
65 sizeof(__entry->tx_t_ipi));
66 }
67 ),
68
69 TP_printk("src=%pISpc dest=%pISpc size=%d tx_s=%d tx_rtt=%d "
70 "tx_p=%d tx_x_calc=%u tx_x_recv=%llu tx_x=%llu tx_t_ipi=%d",
71 __entry->saddr, __entry->daddr, __entry->size,
72 __entry->tx_s, __entry->tx_rtt, __entry->tx_p,
73 __entry->tx_x_calc, __entry->tx_x_recv, __entry->tx_x,
74 __entry->tx_t_ipi)
75);
76
77#endif /* _TRACE_TCP_H */
78
79/* This part must be outside protection */
80#undef TRACE_INCLUDE_PATH
81#define TRACE_INCLUDE_PATH .
82#undef TRACE_INCLUDE_FILE
83#define TRACE_INCLUDE_FILE trace
84#include <trace/define_trace.h>
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 518cea17b811..91dd09f79808 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1209,14 +1209,14 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len
1209} 1209}
1210 1210
1211 1211
1212static unsigned int dn_poll(struct file *file, struct socket *sock, poll_table *wait) 1212static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait)
1213{ 1213{
1214 struct sock *sk = sock->sk; 1214 struct sock *sk = sock->sk;
1215 struct dn_scp *scp = DN_SK(sk); 1215 struct dn_scp *scp = DN_SK(sk);
1216 int mask = datagram_poll(file, sock, wait); 1216 __poll_t mask = datagram_poll(file, sock, wait);
1217 1217
1218 if (!skb_queue_empty(&scp->other_receive_queue)) 1218 if (!skb_queue_empty(&scp->other_receive_queue))
1219 mask |= POLLRDBAND; 1219 mask |= EPOLLRDBAND;
1220 1220
1221 return mask; 1221 return mask;
1222} 1222}
@@ -2320,7 +2320,6 @@ static int dn_socket_seq_open(struct inode *inode, struct file *file)
2320} 2320}
2321 2321
2322static const struct file_operations dn_socket_seq_fops = { 2322static const struct file_operations dn_socket_seq_fops = {
2323 .owner = THIS_MODULE,
2324 .open = dn_socket_seq_open, 2323 .open = dn_socket_seq_open,
2325 .read = seq_read, 2324 .read = seq_read,
2326 .llseek = seq_lseek, 2325 .llseek = seq_lseek,
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 9153247dad28..c9f5e1ebb9c8 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1389,7 +1389,6 @@ static int dn_dev_seq_open(struct inode *inode, struct file *file)
1389} 1389}
1390 1390
1391static const struct file_operations dn_dev_seq_fops = { 1391static const struct file_operations dn_dev_seq_fops = {
1392 .owner = THIS_MODULE,
1393 .open = dn_dev_seq_open, 1392 .open = dn_dev_seq_open,
1394 .read = seq_read, 1393 .read = seq_read,
1395 .llseek = seq_lseek, 1394 .llseek = seq_lseek,
@@ -1418,9 +1417,12 @@ void __init dn_dev_init(void)
1418 1417
1419 dn_dev_devices_on(); 1418 dn_dev_devices_on();
1420 1419
1421 rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, 0); 1420 rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWADDR,
1422 rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, 0); 1421 dn_nl_newaddr, NULL, 0);
1423 rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, 0); 1422 rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELADDR,
1423 dn_nl_deladdr, NULL, 0);
1424 rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR,
1425 NULL, dn_nl_dump_ifaddr, 0);
1424 1426
1425 proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops); 1427 proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops);
1426 1428
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index b37a1b833c77..fce94cbd4378 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -792,8 +792,10 @@ void __init dn_fib_init(void)
792 792
793 register_dnaddr_notifier(&dn_fib_dnaddr_notifier); 793 register_dnaddr_notifier(&dn_fib_dnaddr_notifier);
794 794
795 rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, 0); 795 rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWROUTE,
796 rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, 0); 796 dn_fib_rtm_newroute, NULL, 0);
797 rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELROUTE,
798 dn_fib_rtm_delroute, NULL, 0);
797} 799}
798 800
799 801
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 528119a5618e..6e37d9e6345e 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -597,7 +597,6 @@ static int dn_neigh_seq_open(struct inode *inode, struct file *file)
597} 597}
598 598
599static const struct file_operations dn_neigh_seq_fops = { 599static const struct file_operations dn_neigh_seq_fops = {
600 .owner = THIS_MODULE,
601 .open = dn_neigh_seq_open, 600 .open = dn_neigh_seq_open,
602 .read = seq_read, 601 .read = seq_read,
603 .llseek = seq_lseek, 602 .llseek = seq_lseek,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 324cb9f2f551..ef20b8e31669 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -199,11 +199,11 @@ static void dn_dst_check_expire(struct timer_list *unused)
199 lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) { 199 lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
200 if (atomic_read(&rt->dst.__refcnt) > 1 || 200 if (atomic_read(&rt->dst.__refcnt) > 1 ||
201 (now - rt->dst.lastuse) < expire) { 201 (now - rt->dst.lastuse) < expire) {
202 rtp = &rt->dst.dn_next; 202 rtp = &rt->dn_next;
203 continue; 203 continue;
204 } 204 }
205 *rtp = rt->dst.dn_next; 205 *rtp = rt->dn_next;
206 rt->dst.dn_next = NULL; 206 rt->dn_next = NULL;
207 dst_dev_put(&rt->dst); 207 dst_dev_put(&rt->dst);
208 dst_release(&rt->dst); 208 dst_release(&rt->dst);
209 } 209 }
@@ -233,11 +233,11 @@ static int dn_dst_gc(struct dst_ops *ops)
233 lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) { 233 lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
234 if (atomic_read(&rt->dst.__refcnt) > 1 || 234 if (atomic_read(&rt->dst.__refcnt) > 1 ||
235 (now - rt->dst.lastuse) < expire) { 235 (now - rt->dst.lastuse) < expire) {
236 rtp = &rt->dst.dn_next; 236 rtp = &rt->dn_next;
237 continue; 237 continue;
238 } 238 }
239 *rtp = rt->dst.dn_next; 239 *rtp = rt->dn_next;
240 rt->dst.dn_next = NULL; 240 rt->dn_next = NULL;
241 dst_dev_put(&rt->dst); 241 dst_dev_put(&rt->dst);
242 dst_release(&rt->dst); 242 dst_release(&rt->dst);
243 break; 243 break;
@@ -333,8 +333,8 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
333 lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) { 333 lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) {
334 if (compare_keys(&rth->fld, &rt->fld)) { 334 if (compare_keys(&rth->fld, &rt->fld)) {
335 /* Put it first */ 335 /* Put it first */
336 *rthp = rth->dst.dn_next; 336 *rthp = rth->dn_next;
337 rcu_assign_pointer(rth->dst.dn_next, 337 rcu_assign_pointer(rth->dn_next,
338 dn_rt_hash_table[hash].chain); 338 dn_rt_hash_table[hash].chain);
339 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth); 339 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
340 340
@@ -345,10 +345,10 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
345 *rp = rth; 345 *rp = rth;
346 return 0; 346 return 0;
347 } 347 }
348 rthp = &rth->dst.dn_next; 348 rthp = &rth->dn_next;
349 } 349 }
350 350
351 rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain); 351 rcu_assign_pointer(rt->dn_next, dn_rt_hash_table[hash].chain);
352 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt); 352 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
353 353
354 dst_hold_and_use(&rt->dst, now); 354 dst_hold_and_use(&rt->dst, now);
@@ -369,8 +369,8 @@ static void dn_run_flush(struct timer_list *unused)
369 goto nothing_to_declare; 369 goto nothing_to_declare;
370 370
371 for(; rt; rt = next) { 371 for(; rt; rt = next) {
372 next = rcu_dereference_raw(rt->dst.dn_next); 372 next = rcu_dereference_raw(rt->dn_next);
373 RCU_INIT_POINTER(rt->dst.dn_next, NULL); 373 RCU_INIT_POINTER(rt->dn_next, NULL);
374 dst_dev_put(&rt->dst); 374 dst_dev_put(&rt->dst);
375 dst_release(&rt->dst); 375 dst_release(&rt->dst);
376 } 376 }
@@ -1183,6 +1183,7 @@ make_route:
1183 if (rt == NULL) 1183 if (rt == NULL)
1184 goto e_nobufs; 1184 goto e_nobufs;
1185 1185
1186 rt->dn_next = NULL;
1186 memset(&rt->fld, 0, sizeof(rt->fld)); 1187 memset(&rt->fld, 0, sizeof(rt->fld));
1187 rt->fld.saddr = oldflp->saddr; 1188 rt->fld.saddr = oldflp->saddr;
1188 rt->fld.daddr = oldflp->daddr; 1189 rt->fld.daddr = oldflp->daddr;
@@ -1252,7 +1253,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *
1252 if (!(flags & MSG_TRYHARD)) { 1253 if (!(flags & MSG_TRYHARD)) {
1253 rcu_read_lock_bh(); 1254 rcu_read_lock_bh();
1254 for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt; 1255 for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
1255 rt = rcu_dereference_bh(rt->dst.dn_next)) { 1256 rt = rcu_dereference_bh(rt->dn_next)) {
1256 if ((flp->daddr == rt->fld.daddr) && 1257 if ((flp->daddr == rt->fld.daddr) &&
1257 (flp->saddr == rt->fld.saddr) && 1258 (flp->saddr == rt->fld.saddr) &&
1258 (flp->flowidn_mark == rt->fld.flowidn_mark) && 1259 (flp->flowidn_mark == rt->fld.flowidn_mark) &&
@@ -1448,6 +1449,7 @@ make_route:
1448 if (rt == NULL) 1449 if (rt == NULL)
1449 goto e_nobufs; 1450 goto e_nobufs;
1450 1451
1452 rt->dn_next = NULL;
1451 memset(&rt->fld, 0, sizeof(rt->fld)); 1453 memset(&rt->fld, 0, sizeof(rt->fld));
1452 rt->rt_saddr = fld.saddr; 1454 rt->rt_saddr = fld.saddr;
1453 rt->rt_daddr = fld.daddr; 1455 rt->rt_daddr = fld.daddr;
@@ -1529,7 +1531,7 @@ static int dn_route_input(struct sk_buff *skb)
1529 1531
1530 rcu_read_lock(); 1532 rcu_read_lock();
1531 for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL; 1533 for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
1532 rt = rcu_dereference(rt->dst.dn_next)) { 1534 rt = rcu_dereference(rt->dn_next)) {
1533 if ((rt->fld.saddr == cb->src) && 1535 if ((rt->fld.saddr == cb->src) &&
1534 (rt->fld.daddr == cb->dst) && 1536 (rt->fld.daddr == cb->dst) &&
1535 (rt->fld.flowidn_oif == 0) && 1537 (rt->fld.flowidn_oif == 0) &&
@@ -1749,7 +1751,7 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
1749 rcu_read_lock_bh(); 1751 rcu_read_lock_bh();
1750 for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0; 1752 for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
1751 rt; 1753 rt;
1752 rt = rcu_dereference_bh(rt->dst.dn_next), idx++) { 1754 rt = rcu_dereference_bh(rt->dn_next), idx++) {
1753 if (idx < s_idx) 1755 if (idx < s_idx)
1754 continue; 1756 continue;
1755 skb_dst_set(skb, dst_clone(&rt->dst)); 1757 skb_dst_set(skb, dst_clone(&rt->dst));
@@ -1795,7 +1797,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou
1795{ 1797{
1796 struct dn_rt_cache_iter_state *s = seq->private; 1798 struct dn_rt_cache_iter_state *s = seq->private;
1797 1799
1798 rt = rcu_dereference_bh(rt->dst.dn_next); 1800 rt = rcu_dereference_bh(rt->dn_next);
1799 while (!rt) { 1801 while (!rt) {
1800 rcu_read_unlock_bh(); 1802 rcu_read_unlock_bh();
1801 if (--s->bucket < 0) 1803 if (--s->bucket < 0)
@@ -1858,7 +1860,6 @@ static int dn_rt_cache_seq_open(struct inode *inode, struct file *file)
1858} 1860}
1859 1861
1860static const struct file_operations dn_rt_cache_seq_fops = { 1862static const struct file_operations dn_rt_cache_seq_fops = {
1861 .owner = THIS_MODULE,
1862 .open = dn_rt_cache_seq_open, 1863 .open = dn_rt_cache_seq_open,
1863 .read = seq_read, 1864 .read = seq_read,
1864 .llseek = seq_lseek, 1865 .llseek = seq_lseek,
@@ -1921,11 +1922,11 @@ void __init dn_route_init(void)
1921 &dn_rt_cache_seq_fops); 1922 &dn_rt_cache_seq_fops);
1922 1923
1923#ifdef CONFIG_DECNET_ROUTER 1924#ifdef CONFIG_DECNET_ROUTER
1924 rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, 1925 rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE,
1925 dn_fib_dump, 0); 1926 dn_cache_getroute, dn_fib_dump, 0);
1926#else 1927#else
1927 rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, 1928 rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE,
1928 dn_cache_dump, 0); 1929 dn_cache_getroute, dn_cache_dump, 0);
1929#endif 1930#endif
1930} 1931}
1931 1932
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index af781010753b..49da67034f29 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -52,11 +52,11 @@
52 * @name: Name to look up 52 * @name: Name to look up
53 * @namelen: Length of name 53 * @namelen: Length of name
54 * @options: Request options (or NULL if no options) 54 * @options: Request options (or NULL if no options)
55 * @_result: Where to place the returned data. 55 * @_result: Where to place the returned data (or NULL)
56 * @_expiry: Where to store the result expiry time (or NULL) 56 * @_expiry: Where to store the result expiry time (or NULL)
57 * 57 *
58 * The data will be returned in the pointer at *result, and the caller is 58 * The data will be returned in the pointer at *result, if provided, and the
59 * responsible for freeing it. 59 * caller is responsible for freeing it.
60 * 60 *
61 * The description should be of the form "[<query_type>:]<domain_name>", and 61 * The description should be of the form "[<query_type>:]<domain_name>", and
62 * the options need to be appropriate for the query type requested. If no 62 * the options need to be appropriate for the query type requested. If no
@@ -81,7 +81,7 @@ int dns_query(const char *type, const char *name, size_t namelen,
81 kenter("%s,%*.*s,%zu,%s", 81 kenter("%s,%*.*s,%zu,%s",
82 type, (int)namelen, (int)namelen, name, namelen, options); 82 type, (int)namelen, (int)namelen, name, namelen, options);
83 83
84 if (!name || namelen == 0 || !_result) 84 if (!name || namelen == 0)
85 return -EINVAL; 85 return -EINVAL;
86 86
87 /* construct the query key description as "[<type>:]<name>" */ 87 /* construct the query key description as "[<type>:]<name>" */
@@ -146,13 +146,15 @@ int dns_query(const char *type, const char *name, size_t namelen,
146 upayload = user_key_payload_locked(rkey); 146 upayload = user_key_payload_locked(rkey);
147 len = upayload->datalen; 147 len = upayload->datalen;
148 148
149 ret = -ENOMEM; 149 if (_result) {
150 *_result = kmalloc(len + 1, GFP_KERNEL); 150 ret = -ENOMEM;
151 if (!*_result) 151 *_result = kmalloc(len + 1, GFP_KERNEL);
152 goto put; 152 if (!*_result)
153 goto put;
153 154
154 memcpy(*_result, upayload->data, len); 155 memcpy(*_result, upayload->data, len);
155 (*_result)[len] = '\0'; 156 (*_result)[len] = '\0';
157 }
156 158
157 if (_expiry) 159 if (_expiry)
158 *_expiry = rkey->expiry; 160 *_expiry = rkey->expiry;
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 03c3bdf25468..bbf2c82cf7b2 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -16,6 +16,15 @@ config NET_DSA
16 16
17if NET_DSA 17if NET_DSA
18 18
19config NET_DSA_LEGACY
20 bool "Support for older platform device and Device Tree registration"
21 default y
22 ---help---
23 Say Y if you want to enable support for the older platform device and
24 deprecated Device Tree binding registration.
25
26 This feature is scheduled for removal in 4.17.
27
19# tagging formats 28# tagging formats
20config NET_DSA_TAG_BRCM 29config NET_DSA_TAG_BRCM
21 bool 30 bool
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 0e13c1f95d13..9e4d3536f977 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,7 +1,8 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2# the core 2# the core
3obj-$(CONFIG_NET_DSA) += dsa_core.o 3obj-$(CONFIG_NET_DSA) += dsa_core.o
4dsa_core-y += dsa.o dsa2.o legacy.o master.o port.o slave.o switch.o 4dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o
5dsa_core-$(CONFIG_NET_DSA_LEGACY) += legacy.o
5 6
6# tagging formats 7# tagging formats
7dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o 8dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 1e287420ff49..adf50fbc4c13 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -241,7 +241,7 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
241 for (port = 0; port < ds->num_ports; port++) { 241 for (port = 0; port < ds->num_ports; port++) {
242 dp = &ds->ports[port]; 242 dp = &ds->ports[port];
243 243
244 if (dsa_port_is_user(dp)) 244 if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
245 dp->cpu_dp = dst->cpu_dp; 245 dp->cpu_dp = dst->cpu_dp;
246 } 246 }
247 } 247 }
@@ -271,13 +271,12 @@ static int dsa_port_setup(struct dsa_port *dp)
271 break; 271 break;
272 case DSA_PORT_TYPE_CPU: 272 case DSA_PORT_TYPE_CPU:
273 case DSA_PORT_TYPE_DSA: 273 case DSA_PORT_TYPE_DSA:
274 err = dsa_port_fixed_link_register_of(dp); 274 err = dsa_port_link_register_of(dp);
275 if (err) { 275 if (err) {
276 dev_err(ds->dev, "failed to register fixed link for port %d.%d\n", 276 dev_err(ds->dev, "failed to setup link for port %d.%d\n",
277 ds->index, dp->index); 277 ds->index, dp->index);
278 return err; 278 return err;
279 } 279 }
280
281 break; 280 break;
282 case DSA_PORT_TYPE_USER: 281 case DSA_PORT_TYPE_USER:
283 err = dsa_slave_create(dp); 282 err = dsa_slave_create(dp);
@@ -301,7 +300,7 @@ static void dsa_port_teardown(struct dsa_port *dp)
301 break; 300 break;
302 case DSA_PORT_TYPE_CPU: 301 case DSA_PORT_TYPE_CPU:
303 case DSA_PORT_TYPE_DSA: 302 case DSA_PORT_TYPE_DSA:
304 dsa_port_fixed_link_unregister_of(dp); 303 dsa_port_link_unregister_of(dp);
305 break; 304 break;
306 case DSA_PORT_TYPE_USER: 305 case DSA_PORT_TYPE_USER:
307 if (dp->slave) { 306 if (dp->slave) {
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 7d036696e8c4..70de7895e5b8 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -97,8 +97,17 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
97bool dsa_schedule_work(struct work_struct *work); 97bool dsa_schedule_work(struct work_struct *work);
98 98
99/* legacy.c */ 99/* legacy.c */
100#if IS_ENABLED(CONFIG_NET_DSA_LEGACY)
100int dsa_legacy_register(void); 101int dsa_legacy_register(void);
101void dsa_legacy_unregister(void); 102void dsa_legacy_unregister(void);
103#else
104static inline int dsa_legacy_register(void)
105{
106 return 0;
107}
108
109static inline void dsa_legacy_unregister(void) { }
110#endif
102int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 111int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
103 struct net_device *dev, 112 struct net_device *dev,
104 const unsigned char *addr, u16 vid, 113 const unsigned char *addr, u16 vid,
@@ -157,8 +166,8 @@ int dsa_port_vlan_add(struct dsa_port *dp,
157 struct switchdev_trans *trans); 166 struct switchdev_trans *trans);
158int dsa_port_vlan_del(struct dsa_port *dp, 167int dsa_port_vlan_del(struct dsa_port *dp,
159 const struct switchdev_obj_port_vlan *vlan); 168 const struct switchdev_obj_port_vlan *vlan);
160int dsa_port_fixed_link_register_of(struct dsa_port *dp); 169int dsa_port_link_register_of(struct dsa_port *dp);
161void dsa_port_fixed_link_unregister_of(struct dsa_port *dp); 170void dsa_port_link_unregister_of(struct dsa_port *dp);
162 171
163/* slave.c */ 172/* slave.c */
164extern const struct dsa_device_ops notag_netdev_ops; 173extern const struct dsa_device_ops notag_netdev_ops;
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index 84611d7fcfa2..cb54b81d0bd9 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -86,7 +86,7 @@ static int dsa_cpu_dsa_setups(struct dsa_switch *ds)
86 if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) 86 if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
87 continue; 87 continue;
88 88
89 ret = dsa_port_fixed_link_register_of(&ds->ports[port]); 89 ret = dsa_port_link_register_of(&ds->ports[port]);
90 if (ret) 90 if (ret)
91 return ret; 91 return ret;
92 } 92 }
@@ -275,7 +275,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
275 for (port = 0; port < ds->num_ports; port++) { 275 for (port = 0; port < ds->num_ports; port++) {
276 if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) 276 if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
277 continue; 277 continue;
278 dsa_port_fixed_link_unregister_of(&ds->ports[port]); 278 dsa_port_link_unregister_of(&ds->ports[port]);
279 } 279 }
280 280
281 if (ds->slave_mii_bus && ds->ops->phy_read) 281 if (ds->slave_mii_bus && ds->ops->phy_read)
@@ -718,26 +718,6 @@ static int dsa_resume(struct device *d)
718} 718}
719#endif 719#endif
720 720
721/* legacy way, bypassing the bridge *****************************************/
722int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
723 struct net_device *dev,
724 const unsigned char *addr, u16 vid,
725 u16 flags)
726{
727 struct dsa_port *dp = dsa_slave_to_port(dev);
728
729 return dsa_port_fdb_add(dp, addr, vid);
730}
731
732int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
733 struct net_device *dev,
734 const unsigned char *addr, u16 vid)
735{
736 struct dsa_port *dp = dsa_slave_to_port(dev);
737
738 return dsa_port_fdb_del(dp, addr, vid);
739}
740
741static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume); 721static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume);
742 722
743static const struct of_device_id dsa_of_match_table[] = { 723static const struct of_device_id dsa_of_match_table[] = {
diff --git a/net/dsa/port.c b/net/dsa/port.c
index bb4be2679904..7acc1169d75e 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -273,7 +273,56 @@ int dsa_port_vlan_del(struct dsa_port *dp,
273 return 0; 273 return 0;
274} 274}
275 275
276int dsa_port_fixed_link_register_of(struct dsa_port *dp) 276static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
277{
278 struct device_node *port_dn = dp->dn;
279 struct device_node *phy_dn;
280 struct dsa_switch *ds = dp->ds;
281 struct phy_device *phydev;
282 int port = dp->index;
283 int err = 0;
284
285 phy_dn = of_parse_phandle(port_dn, "phy-handle", 0);
286 if (!phy_dn)
287 return 0;
288
289 phydev = of_phy_find_device(phy_dn);
290 if (!phydev) {
291 err = -EPROBE_DEFER;
292 goto err_put_of;
293 }
294
295 if (enable) {
296 err = genphy_config_init(phydev);
297 if (err < 0)
298 goto err_put_dev;
299
300 err = genphy_resume(phydev);
301 if (err < 0)
302 goto err_put_dev;
303
304 err = genphy_read_status(phydev);
305 if (err < 0)
306 goto err_put_dev;
307 } else {
308 err = genphy_suspend(phydev);
309 if (err < 0)
310 goto err_put_dev;
311 }
312
313 if (ds->ops->adjust_link)
314 ds->ops->adjust_link(ds, port, phydev);
315
316 dev_dbg(ds->dev, "enabled port's phy: %s", phydev_name(phydev));
317
318err_put_dev:
319 put_device(&phydev->mdio.dev);
320err_put_of:
321 of_node_put(phy_dn);
322 return err;
323}
324
325static int dsa_port_fixed_link_register_of(struct dsa_port *dp)
277{ 326{
278 struct device_node *dn = dp->dn; 327 struct device_node *dn = dp->dn;
279 struct dsa_switch *ds = dp->ds; 328 struct dsa_switch *ds = dp->ds;
@@ -282,38 +331,44 @@ int dsa_port_fixed_link_register_of(struct dsa_port *dp)
282 int mode; 331 int mode;
283 int err; 332 int err;
284 333
285 if (of_phy_is_fixed_link(dn)) { 334 err = of_phy_register_fixed_link(dn);
286 err = of_phy_register_fixed_link(dn); 335 if (err) {
287 if (err) { 336 dev_err(ds->dev,
288 dev_err(ds->dev, 337 "failed to register the fixed PHY of port %d\n",
289 "failed to register the fixed PHY of port %d\n", 338 port);
290 port); 339 return err;
291 return err; 340 }
292 }
293 341
294 phydev = of_phy_find_device(dn); 342 phydev = of_phy_find_device(dn);
295 343
296 mode = of_get_phy_mode(dn); 344 mode = of_get_phy_mode(dn);
297 if (mode < 0) 345 if (mode < 0)
298 mode = PHY_INTERFACE_MODE_NA; 346 mode = PHY_INTERFACE_MODE_NA;
299 phydev->interface = mode; 347 phydev->interface = mode;
300 348
301 genphy_config_init(phydev); 349 genphy_config_init(phydev);
302 genphy_read_status(phydev); 350 genphy_read_status(phydev);
303 351
304 if (ds->ops->adjust_link) 352 if (ds->ops->adjust_link)
305 ds->ops->adjust_link(ds, port, phydev); 353 ds->ops->adjust_link(ds, port, phydev);
306 354
307 put_device(&phydev->mdio.dev); 355 put_device(&phydev->mdio.dev);
308 }
309 356
310 return 0; 357 return 0;
311} 358}
312 359
313void dsa_port_fixed_link_unregister_of(struct dsa_port *dp) 360int dsa_port_link_register_of(struct dsa_port *dp)
314{ 361{
315 struct device_node *dn = dp->dn; 362 if (of_phy_is_fixed_link(dp->dn))
363 return dsa_port_fixed_link_register_of(dp);
364 else
365 return dsa_port_setup_phy_of(dp, true);
366}
316 367
317 if (of_phy_is_fixed_link(dn)) 368void dsa_port_link_unregister_of(struct dsa_port *dp)
318 of_phy_deregister_fixed_link(dn); 369{
370 if (of_phy_is_fixed_link(dp->dn))
371 of_phy_deregister_fixed_link(dp->dn);
372 else
373 dsa_port_setup_phy_of(dp, false);
319} 374}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index a95a55f79137..f52307296de4 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -708,14 +708,12 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
708 struct dsa_slave_priv *p = netdev_priv(dev); 708 struct dsa_slave_priv *p = netdev_priv(dev);
709 struct dsa_mall_tc_entry *mall_tc_entry; 709 struct dsa_mall_tc_entry *mall_tc_entry;
710 __be16 protocol = cls->common.protocol; 710 __be16 protocol = cls->common.protocol;
711 struct net *net = dev_net(dev);
712 struct dsa_switch *ds = dp->ds; 711 struct dsa_switch *ds = dp->ds;
713 struct net_device *to_dev; 712 struct net_device *to_dev;
714 const struct tc_action *a; 713 const struct tc_action *a;
715 struct dsa_port *to_dp; 714 struct dsa_port *to_dp;
716 int err = -EOPNOTSUPP; 715 int err = -EOPNOTSUPP;
717 LIST_HEAD(actions); 716 LIST_HEAD(actions);
718 int ifindex;
719 717
720 if (!ds->ops->port_mirror_add) 718 if (!ds->ops->port_mirror_add)
721 return err; 719 return err;
@@ -729,8 +727,7 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
729 if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) { 727 if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) {
730 struct dsa_mall_mirror_tc_entry *mirror; 728 struct dsa_mall_mirror_tc_entry *mirror;
731 729
732 ifindex = tcf_mirred_ifindex(a); 730 to_dev = tcf_mirred_dev(a);
733 to_dev = __dev_get_by_index(net, ifindex);
734 if (!to_dev) 731 if (!to_dev)
735 return -EINVAL; 732 return -EINVAL;
736 733
@@ -943,6 +940,26 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
943 .set_rxnfc = dsa_slave_set_rxnfc, 940 .set_rxnfc = dsa_slave_set_rxnfc,
944}; 941};
945 942
943/* legacy way, bypassing the bridge *****************************************/
944int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
945 struct net_device *dev,
946 const unsigned char *addr, u16 vid,
947 u16 flags)
948{
949 struct dsa_port *dp = dsa_slave_to_port(dev);
950
951 return dsa_port_fdb_add(dp, addr, vid);
952}
953
954int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
955 struct net_device *dev,
956 const unsigned char *addr, u16 vid)
957{
958 struct dsa_port *dp = dsa_slave_to_port(dev);
959
960 return dsa_port_fdb_del(dp, addr, vid);
961}
962
946static const struct net_device_ops dsa_slave_netdev_ops = { 963static const struct net_device_ops dsa_slave_netdev_ops = {
947 .ndo_open = dsa_slave_open, 964 .ndo_open = dsa_slave_open,
948 .ndo_stop = dsa_slave_close, 965 .ndo_stop = dsa_slave_close,
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 29608d087a7c..b93511726069 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -83,29 +83,52 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
83static int dsa_switch_fdb_add(struct dsa_switch *ds, 83static int dsa_switch_fdb_add(struct dsa_switch *ds,
84 struct dsa_notifier_fdb_info *info) 84 struct dsa_notifier_fdb_info *info)
85{ 85{
86 /* Do not care yet about other switch chips of the fabric */ 86 int port = dsa_towards_port(ds, info->sw_index, info->port);
87 if (ds->index != info->sw_index)
88 return 0;
89 87
90 if (!ds->ops->port_fdb_add) 88 if (!ds->ops->port_fdb_add)
91 return -EOPNOTSUPP; 89 return -EOPNOTSUPP;
92 90
93 return ds->ops->port_fdb_add(ds, info->port, info->addr, 91 return ds->ops->port_fdb_add(ds, port, info->addr, info->vid);
94 info->vid);
95} 92}
96 93
97static int dsa_switch_fdb_del(struct dsa_switch *ds, 94static int dsa_switch_fdb_del(struct dsa_switch *ds,
98 struct dsa_notifier_fdb_info *info) 95 struct dsa_notifier_fdb_info *info)
99{ 96{
100 /* Do not care yet about other switch chips of the fabric */ 97 int port = dsa_towards_port(ds, info->sw_index, info->port);
101 if (ds->index != info->sw_index)
102 return 0;
103 98
104 if (!ds->ops->port_fdb_del) 99 if (!ds->ops->port_fdb_del)
105 return -EOPNOTSUPP; 100 return -EOPNOTSUPP;
106 101
107 return ds->ops->port_fdb_del(ds, info->port, info->addr, 102 return ds->ops->port_fdb_del(ds, port, info->addr, info->vid);
108 info->vid); 103}
104
105static int
106dsa_switch_mdb_prepare_bitmap(struct dsa_switch *ds,
107 const struct switchdev_obj_port_mdb *mdb,
108 const unsigned long *bitmap)
109{
110 int port, err;
111
112 if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
113 return -EOPNOTSUPP;
114
115 for_each_set_bit(port, bitmap, ds->num_ports) {
116 err = ds->ops->port_mdb_prepare(ds, port, mdb);
117 if (err)
118 return err;
119 }
120
121 return 0;
122}
123
124static void dsa_switch_mdb_add_bitmap(struct dsa_switch *ds,
125 const struct switchdev_obj_port_mdb *mdb,
126 const unsigned long *bitmap)
127{
128 int port;
129
130 for_each_set_bit(port, bitmap, ds->num_ports)
131 ds->ops->port_mdb_add(ds, port, mdb);
109} 132}
110 133
111static int dsa_switch_mdb_add(struct dsa_switch *ds, 134static int dsa_switch_mdb_add(struct dsa_switch *ds,
@@ -114,7 +137,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
114 const struct switchdev_obj_port_mdb *mdb = info->mdb; 137 const struct switchdev_obj_port_mdb *mdb = info->mdb;
115 struct switchdev_trans *trans = info->trans; 138 struct switchdev_trans *trans = info->trans;
116 DECLARE_BITMAP(group, ds->num_ports); 139 DECLARE_BITMAP(group, ds->num_ports);
117 int port, err; 140 int port;
118 141
119 /* Build a mask of Multicast group members */ 142 /* Build a mask of Multicast group members */
120 bitmap_zero(group, ds->num_ports); 143 bitmap_zero(group, ds->num_ports);
@@ -124,21 +147,10 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
124 if (dsa_is_dsa_port(ds, port)) 147 if (dsa_is_dsa_port(ds, port))
125 set_bit(port, group); 148 set_bit(port, group);
126 149
127 if (switchdev_trans_ph_prepare(trans)) { 150 if (switchdev_trans_ph_prepare(trans))
128 if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add) 151 return dsa_switch_mdb_prepare_bitmap(ds, mdb, group);
129 return -EOPNOTSUPP;
130
131 for_each_set_bit(port, group, ds->num_ports) {
132 err = ds->ops->port_mdb_prepare(ds, port, mdb, trans);
133 if (err)
134 return err;
135 }
136
137 return 0;
138 }
139 152
140 for_each_set_bit(port, group, ds->num_ports) 153 dsa_switch_mdb_add_bitmap(ds, mdb, group);
141 ds->ops->port_mdb_add(ds, port, mdb, trans);
142 154
143 return 0; 155 return 0;
144} 156}
@@ -157,13 +169,43 @@ static int dsa_switch_mdb_del(struct dsa_switch *ds,
157 return 0; 169 return 0;
158} 170}
159 171
172static int
173dsa_switch_vlan_prepare_bitmap(struct dsa_switch *ds,
174 const struct switchdev_obj_port_vlan *vlan,
175 const unsigned long *bitmap)
176{
177 int port, err;
178
179 if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
180 return -EOPNOTSUPP;
181
182 for_each_set_bit(port, bitmap, ds->num_ports) {
183 err = ds->ops->port_vlan_prepare(ds, port, vlan);
184 if (err)
185 return err;
186 }
187
188 return 0;
189}
190
191static void
192dsa_switch_vlan_add_bitmap(struct dsa_switch *ds,
193 const struct switchdev_obj_port_vlan *vlan,
194 const unsigned long *bitmap)
195{
196 int port;
197
198 for_each_set_bit(port, bitmap, ds->num_ports)
199 ds->ops->port_vlan_add(ds, port, vlan);
200}
201
160static int dsa_switch_vlan_add(struct dsa_switch *ds, 202static int dsa_switch_vlan_add(struct dsa_switch *ds,
161 struct dsa_notifier_vlan_info *info) 203 struct dsa_notifier_vlan_info *info)
162{ 204{
163 const struct switchdev_obj_port_vlan *vlan = info->vlan; 205 const struct switchdev_obj_port_vlan *vlan = info->vlan;
164 struct switchdev_trans *trans = info->trans; 206 struct switchdev_trans *trans = info->trans;
165 DECLARE_BITMAP(members, ds->num_ports); 207 DECLARE_BITMAP(members, ds->num_ports);
166 int port, err; 208 int port;
167 209
168 /* Build a mask of VLAN members */ 210 /* Build a mask of VLAN members */
169 bitmap_zero(members, ds->num_ports); 211 bitmap_zero(members, ds->num_ports);
@@ -173,21 +215,10 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
173 if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) 215 if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
174 set_bit(port, members); 216 set_bit(port, members);
175 217
176 if (switchdev_trans_ph_prepare(trans)) { 218 if (switchdev_trans_ph_prepare(trans))
177 if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add) 219 return dsa_switch_vlan_prepare_bitmap(ds, vlan, members);
178 return -EOPNOTSUPP;
179
180 for_each_set_bit(port, members, ds->num_ports) {
181 err = ds->ops->port_vlan_prepare(ds, port, vlan, trans);
182 if (err)
183 return err;
184 }
185
186 return 0;
187 }
188 220
189 for_each_set_bit(port, members, ds->num_ports) 221 dsa_switch_vlan_add_bitmap(ds, vlan, members);
190 ds->ops->port_vlan_add(ds, port, vlan, trans);
191 222
192 return 0; 223 return 0;
193} 224}
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index e6e0b7b6025c..2b06bb91318b 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -70,6 +70,18 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
70 if (skb_cow_head(skb, BRCM_TAG_LEN) < 0) 70 if (skb_cow_head(skb, BRCM_TAG_LEN) < 0)
71 return NULL; 71 return NULL;
72 72
73 /* The Ethernet switch we are interfaced with needs packets to be at
74 * least 64 bytes (including FCS) otherwise they will be discarded when
75 * they enter the switch port logic. When Broadcom tags are enabled, we
76 * need to make sure that packets are at least 68 bytes
77 * (including FCS and tag) because the length verification is done after
78 * the Broadcom tag is stripped off the ingress packet.
79 *
80 * Let dsa_slave_xmit() free the SKB
81 */
82 if (__skb_put_padto(skb, ETH_ZLEN + BRCM_TAG_LEN, false))
83 return NULL;
84
73 skb_push(skb, BRCM_TAG_LEN); 85 skb_push(skb, BRCM_TAG_LEN);
74 86
75 if (offset) 87 if (offset)
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 8475434af7d5..11535bc70743 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -13,10 +13,13 @@
13 */ 13 */
14 14
15#include <linux/etherdevice.h> 15#include <linux/etherdevice.h>
16#include <linux/if_vlan.h>
16 17
17#include "dsa_priv.h" 18#include "dsa_priv.h"
18 19
19#define MTK_HDR_LEN 4 20#define MTK_HDR_LEN 4
21#define MTK_HDR_XMIT_UNTAGGED 0
22#define MTK_HDR_XMIT_TAGGED_TPID_8100 1
20#define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) 23#define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0)
21#define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0) 24#define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0)
22 25
@@ -25,20 +28,37 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
25{ 28{
26 struct dsa_port *dp = dsa_slave_to_port(dev); 29 struct dsa_port *dp = dsa_slave_to_port(dev);
27 u8 *mtk_tag; 30 u8 *mtk_tag;
31 bool is_vlan_skb = true;
28 32
29 if (skb_cow_head(skb, MTK_HDR_LEN) < 0) 33 /* Build the special tag after the MAC Source Address. If VLAN header
30 return NULL; 34 * is present, it's required that VLAN header and special tag is
31 35 * being combined. Only in this way we can allow the switch can parse
32 skb_push(skb, MTK_HDR_LEN); 36 * the both special and VLAN tag at the same time and then look up VLAN
37 * table with VID.
38 */
39 if (!skb_vlan_tagged(skb)) {
40 if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
41 return NULL;
33 42
34 memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN); 43 skb_push(skb, MTK_HDR_LEN);
44 memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
45 is_vlan_skb = false;
46 }
35 47
36 /* Build the tag after the MAC Source Address */
37 mtk_tag = skb->data + 2 * ETH_ALEN; 48 mtk_tag = skb->data + 2 * ETH_ALEN;
38 mtk_tag[0] = 0; 49
50 /* Mark tag attribute on special tag insertion to notify hardware
51 * whether that's a combined special tag with 802.1Q header.
52 */
53 mtk_tag[0] = is_vlan_skb ? MTK_HDR_XMIT_TAGGED_TPID_8100 :
54 MTK_HDR_XMIT_UNTAGGED;
39 mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; 55 mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
40 mtk_tag[2] = 0; 56
41 mtk_tag[3] = 0; 57 /* Tag control information is kept for 802.1Q */
58 if (!is_vlan_skb) {
59 mtk_tag[2] = 0;
60 mtk_tag[3] = 0;
61 }
42 62
43 return skb; 63 return skb;
44} 64}
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index c6c8ad1d4b6d..47a0a6649a9d 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -43,7 +43,6 @@ obj-$(CONFIG_INET_DIAG) += inet_diag.o
43obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o 43obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
44obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o 44obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
45obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o 45obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o
46obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
47obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o 46obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o
48obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o 47obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
49obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o 48obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f00499a46927..e4329e161943 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -121,6 +121,7 @@
121#endif 121#endif
122#include <net/l3mdev.h> 122#include <net/l3mdev.h>
123 123
124#include <trace/events/sock.h>
124 125
125/* The inetsw table contains everything that inet_create needs to 126/* The inetsw table contains everything that inet_create needs to
126 * build a new socket. 127 * build a new socket.
@@ -789,7 +790,8 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
789 int addr_len = 0; 790 int addr_len = 0;
790 int err; 791 int err;
791 792
792 sock_rps_record_flow(sk); 793 if (likely(!(flags & MSG_ERRQUEUE)))
794 sock_rps_record_flow(sk);
793 795
794 err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, 796 err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
795 flags & ~MSG_DONTWAIT, &addr_len); 797 flags & ~MSG_DONTWAIT, &addr_len);
@@ -826,7 +828,7 @@ int inet_shutdown(struct socket *sock, int how)
826 case TCP_CLOSE: 828 case TCP_CLOSE:
827 err = -ENOTCONN; 829 err = -ENOTCONN;
828 /* Hack to wake up other listeners, who can poll for 830 /* Hack to wake up other listeners, who can poll for
829 POLLHUP, even on eg. unconnected UDP sockets -- RR */ 831 EPOLLHUP, even on eg. unconnected UDP sockets -- RR */
830 /* fall through */ 832 /* fall through */
831 default: 833 default:
832 sk->sk_shutdown |= how; 834 sk->sk_shutdown |= how;
@@ -870,6 +872,9 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
870 struct sock *sk = sock->sk; 872 struct sock *sk = sock->sk;
871 int err = 0; 873 int err = 0;
872 struct net *net = sock_net(sk); 874 struct net *net = sock_net(sk);
875 void __user *p = (void __user *)arg;
876 struct ifreq ifr;
877 struct rtentry rt;
873 878
874 switch (cmd) { 879 switch (cmd) {
875 case SIOCGSTAMP: 880 case SIOCGSTAMP:
@@ -880,8 +885,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
880 break; 885 break;
881 case SIOCADDRT: 886 case SIOCADDRT:
882 case SIOCDELRT: 887 case SIOCDELRT:
888 if (copy_from_user(&rt, p, sizeof(struct rtentry)))
889 return -EFAULT;
890 err = ip_rt_ioctl(net, cmd, &rt);
891 break;
883 case SIOCRTMSG: 892 case SIOCRTMSG:
884 err = ip_rt_ioctl(net, cmd, (void __user *)arg); 893 err = -EINVAL;
885 break; 894 break;
886 case SIOCDARP: 895 case SIOCDARP:
887 case SIOCGARP: 896 case SIOCGARP:
@@ -889,17 +898,26 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
889 err = arp_ioctl(net, cmd, (void __user *)arg); 898 err = arp_ioctl(net, cmd, (void __user *)arg);
890 break; 899 break;
891 case SIOCGIFADDR: 900 case SIOCGIFADDR:
892 case SIOCSIFADDR:
893 case SIOCGIFBRDADDR: 901 case SIOCGIFBRDADDR:
894 case SIOCSIFBRDADDR:
895 case SIOCGIFNETMASK: 902 case SIOCGIFNETMASK:
896 case SIOCSIFNETMASK:
897 case SIOCGIFDSTADDR: 903 case SIOCGIFDSTADDR:
904 case SIOCGIFPFLAGS:
905 if (copy_from_user(&ifr, p, sizeof(struct ifreq)))
906 return -EFAULT;
907 err = devinet_ioctl(net, cmd, &ifr);
908 if (!err && copy_to_user(p, &ifr, sizeof(struct ifreq)))
909 err = -EFAULT;
910 break;
911
912 case SIOCSIFADDR:
913 case SIOCSIFBRDADDR:
914 case SIOCSIFNETMASK:
898 case SIOCSIFDSTADDR: 915 case SIOCSIFDSTADDR:
899 case SIOCSIFPFLAGS: 916 case SIOCSIFPFLAGS:
900 case SIOCGIFPFLAGS:
901 case SIOCSIFFLAGS: 917 case SIOCSIFFLAGS:
902 err = devinet_ioctl(net, cmd, (void __user *)arg); 918 if (copy_from_user(&ifr, p, sizeof(struct ifreq)))
919 return -EFAULT;
920 err = devinet_ioctl(net, cmd, &ifr);
903 break; 921 break;
904 default: 922 default:
905 if (sk->sk_prot->ioctl) 923 if (sk->sk_prot->ioctl)
@@ -1220,6 +1238,19 @@ int inet_sk_rebuild_header(struct sock *sk)
1220} 1238}
1221EXPORT_SYMBOL(inet_sk_rebuild_header); 1239EXPORT_SYMBOL(inet_sk_rebuild_header);
1222 1240
1241void inet_sk_set_state(struct sock *sk, int state)
1242{
1243 trace_inet_sock_set_state(sk, sk->sk_state, state);
1244 sk->sk_state = state;
1245}
1246EXPORT_SYMBOL(inet_sk_set_state);
1247
1248void inet_sk_state_store(struct sock *sk, int newstate)
1249{
1250 trace_inet_sock_set_state(sk, sk->sk_state, newstate);
1251 smp_store_release(&sk->sk_state, newstate);
1252}
1253
1223struct sk_buff *inet_gso_segment(struct sk_buff *skb, 1254struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1224 netdev_features_t features) 1255 netdev_features_t features)
1225{ 1256{
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 6c231b43974d..f28f06c91ead 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1425,7 +1425,6 @@ static int arp_seq_open(struct inode *inode, struct file *file)
1425} 1425}
1426 1426
1427static const struct file_operations arp_seq_fops = { 1427static const struct file_operations arp_seq_fops = {
1428 .owner = THIS_MODULE,
1429 .open = arp_seq_open, 1428 .open = arp_seq_open,
1430 .read = seq_read, 1429 .read = seq_read,
1431 .llseek = seq_lseek, 1430 .llseek = seq_lseek,
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 7a93359fbc72..40f001782c1b 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -946,11 +946,10 @@ static int inet_abc_len(__be32 addr)
946} 946}
947 947
948 948
949int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) 949int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
950{ 950{
951 struct ifreq ifr;
952 struct sockaddr_in sin_orig; 951 struct sockaddr_in sin_orig;
953 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; 952 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
954 struct in_device *in_dev; 953 struct in_device *in_dev;
955 struct in_ifaddr **ifap = NULL; 954 struct in_ifaddr **ifap = NULL;
956 struct in_ifaddr *ifa = NULL; 955 struct in_ifaddr *ifa = NULL;
@@ -959,22 +958,16 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
959 int ret = -EFAULT; 958 int ret = -EFAULT;
960 int tryaddrmatch = 0; 959 int tryaddrmatch = 0;
961 960
962 /* 961 ifr->ifr_name[IFNAMSIZ - 1] = 0;
963 * Fetch the caller's info block into kernel space
964 */
965
966 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
967 goto out;
968 ifr.ifr_name[IFNAMSIZ - 1] = 0;
969 962
970 /* save original address for comparison */ 963 /* save original address for comparison */
971 memcpy(&sin_orig, sin, sizeof(*sin)); 964 memcpy(&sin_orig, sin, sizeof(*sin));
972 965
973 colon = strchr(ifr.ifr_name, ':'); 966 colon = strchr(ifr->ifr_name, ':');
974 if (colon) 967 if (colon)
975 *colon = 0; 968 *colon = 0;
976 969
977 dev_load(net, ifr.ifr_name); 970 dev_load(net, ifr->ifr_name);
978 971
979 switch (cmd) { 972 switch (cmd) {
980 case SIOCGIFADDR: /* Get interface address */ 973 case SIOCGIFADDR: /* Get interface address */
@@ -1014,7 +1007,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1014 rtnl_lock(); 1007 rtnl_lock();
1015 1008
1016 ret = -ENODEV; 1009 ret = -ENODEV;
1017 dev = __dev_get_by_name(net, ifr.ifr_name); 1010 dev = __dev_get_by_name(net, ifr->ifr_name);
1018 if (!dev) 1011 if (!dev)
1019 goto done; 1012 goto done;
1020 1013
@@ -1031,7 +1024,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1031 This is checked above. */ 1024 This is checked above. */
1032 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 1025 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1033 ifap = &ifa->ifa_next) { 1026 ifap = &ifa->ifa_next) {
1034 if (!strcmp(ifr.ifr_name, ifa->ifa_label) && 1027 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1035 sin_orig.sin_addr.s_addr == 1028 sin_orig.sin_addr.s_addr ==
1036 ifa->ifa_local) { 1029 ifa->ifa_local) {
1037 break; /* found */ 1030 break; /* found */
@@ -1044,7 +1037,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1044 if (!ifa) { 1037 if (!ifa) {
1045 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 1038 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1046 ifap = &ifa->ifa_next) 1039 ifap = &ifa->ifa_next)
1047 if (!strcmp(ifr.ifr_name, ifa->ifa_label)) 1040 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1048 break; 1041 break;
1049 } 1042 }
1050 } 1043 }
@@ -1055,20 +1048,24 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1055 1048
1056 switch (cmd) { 1049 switch (cmd) {
1057 case SIOCGIFADDR: /* Get interface address */ 1050 case SIOCGIFADDR: /* Get interface address */
1051 ret = 0;
1058 sin->sin_addr.s_addr = ifa->ifa_local; 1052 sin->sin_addr.s_addr = ifa->ifa_local;
1059 goto rarok; 1053 break;
1060 1054
1061 case SIOCGIFBRDADDR: /* Get the broadcast address */ 1055 case SIOCGIFBRDADDR: /* Get the broadcast address */
1056 ret = 0;
1062 sin->sin_addr.s_addr = ifa->ifa_broadcast; 1057 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1063 goto rarok; 1058 break;
1064 1059
1065 case SIOCGIFDSTADDR: /* Get the destination address */ 1060 case SIOCGIFDSTADDR: /* Get the destination address */
1061 ret = 0;
1066 sin->sin_addr.s_addr = ifa->ifa_address; 1062 sin->sin_addr.s_addr = ifa->ifa_address;
1067 goto rarok; 1063 break;
1068 1064
1069 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 1065 case SIOCGIFNETMASK: /* Get the netmask for the interface */
1066 ret = 0;
1070 sin->sin_addr.s_addr = ifa->ifa_mask; 1067 sin->sin_addr.s_addr = ifa->ifa_mask;
1071 goto rarok; 1068 break;
1072 1069
1073 case SIOCSIFFLAGS: 1070 case SIOCSIFFLAGS:
1074 if (colon) { 1071 if (colon) {
@@ -1076,11 +1073,11 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1076 if (!ifa) 1073 if (!ifa)
1077 break; 1074 break;
1078 ret = 0; 1075 ret = 0;
1079 if (!(ifr.ifr_flags & IFF_UP)) 1076 if (!(ifr->ifr_flags & IFF_UP))
1080 inet_del_ifa(in_dev, ifap, 1); 1077 inet_del_ifa(in_dev, ifap, 1);
1081 break; 1078 break;
1082 } 1079 }
1083 ret = dev_change_flags(dev, ifr.ifr_flags); 1080 ret = dev_change_flags(dev, ifr->ifr_flags);
1084 break; 1081 break;
1085 1082
1086 case SIOCSIFADDR: /* Set interface address (and family) */ 1083 case SIOCSIFADDR: /* Set interface address (and family) */
@@ -1095,7 +1092,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1095 break; 1092 break;
1096 INIT_HLIST_NODE(&ifa->hash); 1093 INIT_HLIST_NODE(&ifa->hash);
1097 if (colon) 1094 if (colon)
1098 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); 1095 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1099 else 1096 else
1100 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1097 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1101 } else { 1098 } else {
@@ -1182,28 +1179,27 @@ done:
1182 rtnl_unlock(); 1179 rtnl_unlock();
1183out: 1180out:
1184 return ret; 1181 return ret;
1185rarok:
1186 rtnl_unlock();
1187 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1188 goto out;
1189} 1182}
1190 1183
1191static int inet_gifconf(struct net_device *dev, char __user *buf, int len) 1184static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1192{ 1185{
1193 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1186 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1194 struct in_ifaddr *ifa; 1187 struct in_ifaddr *ifa;
1195 struct ifreq ifr; 1188 struct ifreq ifr;
1196 int done = 0; 1189 int done = 0;
1197 1190
1191 if (WARN_ON(size > sizeof(struct ifreq)))
1192 goto out;
1193
1198 if (!in_dev) 1194 if (!in_dev)
1199 goto out; 1195 goto out;
1200 1196
1201 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 1197 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1202 if (!buf) { 1198 if (!buf) {
1203 done += sizeof(ifr); 1199 done += size;
1204 continue; 1200 continue;
1205 } 1201 }
1206 if (len < (int) sizeof(ifr)) 1202 if (len < size)
1207 break; 1203 break;
1208 memset(&ifr, 0, sizeof(struct ifreq)); 1204 memset(&ifr, 0, sizeof(struct ifreq));
1209 strcpy(ifr.ifr_name, ifa->ifa_label); 1205 strcpy(ifr.ifr_name, ifa->ifa_label);
@@ -1212,13 +1208,12 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1212 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr = 1208 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1213 ifa->ifa_local; 1209 ifa->ifa_local;
1214 1210
1215 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) { 1211 if (copy_to_user(buf + done, &ifr, size)) {
1216 done = -EFAULT; 1212 done = -EFAULT;
1217 break; 1213 break;
1218 } 1214 }
1219 buf += sizeof(struct ifreq); 1215 len -= size;
1220 len -= sizeof(struct ifreq); 1216 done += size;
1221 done += sizeof(struct ifreq);
1222 } 1217 }
1223out: 1218out:
1224 return done; 1219 return done;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 61fe6e4d23fc..296d0b956bfe 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -121,14 +121,32 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
121static void esp_output_done(struct crypto_async_request *base, int err) 121static void esp_output_done(struct crypto_async_request *base, int err)
122{ 122{
123 struct sk_buff *skb = base->data; 123 struct sk_buff *skb = base->data;
124 struct xfrm_offload *xo = xfrm_offload(skb);
124 void *tmp; 125 void *tmp;
125 struct dst_entry *dst = skb_dst(skb); 126 struct xfrm_state *x;
126 struct xfrm_state *x = dst->xfrm; 127
128 if (xo && (xo->flags & XFRM_DEV_RESUME))
129 x = skb->sp->xvec[skb->sp->len - 1];
130 else
131 x = skb_dst(skb)->xfrm;
127 132
128 tmp = ESP_SKB_CB(skb)->tmp; 133 tmp = ESP_SKB_CB(skb)->tmp;
129 esp_ssg_unref(x, tmp); 134 esp_ssg_unref(x, tmp);
130 kfree(tmp); 135 kfree(tmp);
131 xfrm_output_resume(skb, err); 136
137 if (xo && (xo->flags & XFRM_DEV_RESUME)) {
138 if (err) {
139 XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
140 kfree_skb(skb);
141 return;
142 }
143
144 skb_push(skb, skb->data - skb_mac_header(skb));
145 secpath_reset(skb);
146 xfrm_dev_resume(skb);
147 } else {
148 xfrm_output_resume(skb, err);
149 }
132} 150}
133 151
134/* Move ESP header back into place. */ 152/* Move ESP header back into place. */
@@ -825,17 +843,13 @@ static int esp_init_aead(struct xfrm_state *x)
825 char aead_name[CRYPTO_MAX_ALG_NAME]; 843 char aead_name[CRYPTO_MAX_ALG_NAME];
826 struct crypto_aead *aead; 844 struct crypto_aead *aead;
827 int err; 845 int err;
828 u32 mask = 0;
829 846
830 err = -ENAMETOOLONG; 847 err = -ENAMETOOLONG;
831 if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", 848 if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
832 x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME) 849 x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
833 goto error; 850 goto error;
834 851
835 if (x->xso.offload_handle) 852 aead = crypto_alloc_aead(aead_name, 0, 0);
836 mask |= CRYPTO_ALG_ASYNC;
837
838 aead = crypto_alloc_aead(aead_name, 0, mask);
839 err = PTR_ERR(aead); 853 err = PTR_ERR(aead);
840 if (IS_ERR(aead)) 854 if (IS_ERR(aead))
841 goto error; 855 goto error;
@@ -865,7 +879,6 @@ static int esp_init_authenc(struct xfrm_state *x)
865 char authenc_name[CRYPTO_MAX_ALG_NAME]; 879 char authenc_name[CRYPTO_MAX_ALG_NAME];
866 unsigned int keylen; 880 unsigned int keylen;
867 int err; 881 int err;
868 u32 mask = 0;
869 882
870 err = -EINVAL; 883 err = -EINVAL;
871 if (!x->ealg) 884 if (!x->ealg)
@@ -891,10 +904,7 @@ static int esp_init_authenc(struct xfrm_state *x)
891 goto error; 904 goto error;
892 } 905 }
893 906
894 if (x->xso.offload_handle) 907 aead = crypto_alloc_aead(authenc_name, 0, 0);
895 mask |= CRYPTO_ALG_ASYNC;
896
897 aead = crypto_alloc_aead(authenc_name, 0, mask);
898 err = PTR_ERR(aead); 908 err = PTR_ERR(aead);
899 if (IS_ERR(aead)) 909 if (IS_ERR(aead))
900 goto error; 910 goto error;
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 29b333a62ab0..da5635fc52c2 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -109,78 +109,39 @@ static void esp4_gso_encap(struct xfrm_state *x, struct sk_buff *skb)
109static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, 109static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
110 netdev_features_t features) 110 netdev_features_t features)
111{ 111{
112 __u32 seq;
113 int err = 0;
114 struct sk_buff *skb2;
115 struct xfrm_state *x; 112 struct xfrm_state *x;
116 struct ip_esp_hdr *esph; 113 struct ip_esp_hdr *esph;
117 struct crypto_aead *aead; 114 struct crypto_aead *aead;
118 struct sk_buff *segs = ERR_PTR(-EINVAL);
119 netdev_features_t esp_features = features; 115 netdev_features_t esp_features = features;
120 struct xfrm_offload *xo = xfrm_offload(skb); 116 struct xfrm_offload *xo = xfrm_offload(skb);
121 117
122 if (!xo) 118 if (!xo)
123 goto out; 119 return ERR_PTR(-EINVAL);
124 120
125 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) 121 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
126 goto out; 122 return ERR_PTR(-EINVAL);
127
128 seq = xo->seq.low;
129 123
130 x = skb->sp->xvec[skb->sp->len - 1]; 124 x = skb->sp->xvec[skb->sp->len - 1];
131 aead = x->data; 125 aead = x->data;
132 esph = ip_esp_hdr(skb); 126 esph = ip_esp_hdr(skb);
133 127
134 if (esph->spi != x->id.spi) 128 if (esph->spi != x->id.spi)
135 goto out; 129 return ERR_PTR(-EINVAL);
136 130
137 if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) 131 if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)))
138 goto out; 132 return ERR_PTR(-EINVAL);
139 133
140 __skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)); 134 __skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead));
141 135
142 skb->encap_hdr_csum = 1; 136 skb->encap_hdr_csum = 1;
143 137
144 if (!(features & NETIF_F_HW_ESP)) 138 if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
139 (x->xso.dev != skb->dev))
145 esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); 140 esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
146 141
147 segs = x->outer_mode->gso_segment(x, skb, esp_features); 142 xo->flags |= XFRM_GSO_SEGMENT;
148 if (IS_ERR_OR_NULL(segs))
149 goto out;
150
151 __skb_pull(skb, skb->data - skb_mac_header(skb));
152
153 skb2 = segs;
154 do {
155 struct sk_buff *nskb = skb2->next;
156
157 xo = xfrm_offload(skb2);
158 xo->flags |= XFRM_GSO_SEGMENT;
159 xo->seq.low = seq;
160 xo->seq.hi = xfrm_replay_seqhi(x, seq);
161
162 if(!(features & NETIF_F_HW_ESP))
163 xo->flags |= CRYPTO_FALLBACK;
164
165 x->outer_mode->xmit(x, skb2);
166 143
167 err = x->type_offload->xmit(x, skb2, esp_features); 144 return x->outer_mode->gso_segment(x, skb, esp_features);
168 if (err) {
169 kfree_skb_list(segs);
170 return ERR_PTR(err);
171 }
172
173 if (!skb_is_gso(skb2))
174 seq++;
175 else
176 seq += skb_shinfo(skb2)->gso_segs;
177
178 skb_push(skb2, skb2->mac_len);
179 skb2 = nskb;
180 } while (skb2);
181
182out:
183 return segs;
184} 145}
185 146
186static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb) 147static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb)
@@ -207,6 +168,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
207 struct crypto_aead *aead; 168 struct crypto_aead *aead;
208 struct esp_info esp; 169 struct esp_info esp;
209 bool hw_offload = true; 170 bool hw_offload = true;
171 __u32 seq;
210 172
211 esp.inplace = true; 173 esp.inplace = true;
212 174
@@ -245,23 +207,30 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
245 return esp.nfrags; 207 return esp.nfrags;
246 } 208 }
247 209
210 seq = xo->seq.low;
211
248 esph = esp.esph; 212 esph = esp.esph;
249 esph->spi = x->id.spi; 213 esph->spi = x->id.spi;
250 214
251 skb_push(skb, -skb_network_offset(skb)); 215 skb_push(skb, -skb_network_offset(skb));
252 216
253 if (xo->flags & XFRM_GSO_SEGMENT) { 217 if (xo->flags & XFRM_GSO_SEGMENT) {
254 esph->seq_no = htonl(xo->seq.low); 218 esph->seq_no = htonl(seq);
255 } else { 219
256 ip_hdr(skb)->tot_len = htons(skb->len); 220 if (!skb_is_gso(skb))
257 ip_send_check(ip_hdr(skb)); 221 xo->seq.low++;
222 else
223 xo->seq.low += skb_shinfo(skb)->gso_segs;
258 } 224 }
259 225
226 esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32));
227
228 ip_hdr(skb)->tot_len = htons(skb->len);
229 ip_send_check(ip_hdr(skb));
230
260 if (hw_offload) 231 if (hw_offload)
261 return 0; 232 return 0;
262 233
263 esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
264
265 err = esp_output_tail(x, skb, &esp); 234 err = esp_output_tail(x, skb, &esp);
266 if (err) 235 if (err)
267 return err; 236 return err;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 08259d078b1c..f05afaf3235c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -587,10 +587,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
587 * Handle IP routing ioctl calls. 587 * Handle IP routing ioctl calls.
588 * These are used to manipulate the routing tables 588 * These are used to manipulate the routing tables
589 */ 589 */
590int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) 590int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt)
591{ 591{
592 struct fib_config cfg; 592 struct fib_config cfg;
593 struct rtentry rt;
594 int err; 593 int err;
595 594
596 switch (cmd) { 595 switch (cmd) {
@@ -599,11 +598,8 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
599 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 598 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
600 return -EPERM; 599 return -EPERM;
601 600
602 if (copy_from_user(&rt, arg, sizeof(rt)))
603 return -EFAULT;
604
605 rtnl_lock(); 601 rtnl_lock();
606 err = rtentry_to_fib_config(net, cmd, &rt, &cfg); 602 err = rtentry_to_fib_config(net, cmd, rt, &cfg);
607 if (err == 0) { 603 if (err == 0) {
608 struct fib_table *tb; 604 struct fib_table *tb;
609 605
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5ddc4aefff12..5530cd6fdbc7 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2334,7 +2334,6 @@ static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2334} 2334}
2335 2335
2336static const struct file_operations fib_triestat_fops = { 2336static const struct file_operations fib_triestat_fops = {
2337 .owner = THIS_MODULE,
2338 .open = fib_triestat_seq_open, 2337 .open = fib_triestat_seq_open,
2339 .read = seq_read, 2338 .read = seq_read,
2340 .llseek = seq_lseek, 2339 .llseek = seq_lseek,
@@ -2521,7 +2520,6 @@ static int fib_trie_seq_open(struct inode *inode, struct file *file)
2521} 2520}
2522 2521
2523static const struct file_operations fib_trie_fops = { 2522static const struct file_operations fib_trie_fops = {
2524 .owner = THIS_MODULE,
2525 .open = fib_trie_seq_open, 2523 .open = fib_trie_seq_open,
2526 .read = seq_read, 2524 .read = seq_read,
2527 .llseek = seq_lseek, 2525 .llseek = seq_lseek,
@@ -2715,7 +2713,6 @@ static int fib_route_seq_open(struct inode *inode, struct file *file)
2715} 2713}
2716 2714
2717static const struct file_operations fib_route_fops = { 2715static const struct file_operations fib_route_fops = {
2718 .owner = THIS_MODULE,
2719 .open = fib_route_seq_open, 2716 .open = fib_route_seq_open,
2720 .read = seq_read, 2717 .read = seq_read,
2721 .llseek = seq_lseek, 2718 .llseek = seq_lseek,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 2d49717a7421..f2402581fef1 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -386,7 +386,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
386 pip->frag_off = htons(IP_DF); 386 pip->frag_off = htons(IP_DF);
387 pip->ttl = 1; 387 pip->ttl = 1;
388 pip->daddr = fl4.daddr; 388 pip->daddr = fl4.daddr;
389
390 rcu_read_lock();
389 pip->saddr = igmpv3_get_srcaddr(dev, &fl4); 391 pip->saddr = igmpv3_get_srcaddr(dev, &fl4);
392 rcu_read_unlock();
393
390 pip->protocol = IPPROTO_IGMP; 394 pip->protocol = IPPROTO_IGMP;
391 pip->tot_len = 0; /* filled in later */ 395 pip->tot_len = 0; /* filled in later */
392 ip_select_ident(net, skb, NULL); 396 ip_select_ident(net, skb, NULL);
@@ -2832,7 +2836,6 @@ static int igmp_mc_seq_open(struct inode *inode, struct file *file)
2832} 2836}
2833 2837
2834static const struct file_operations igmp_mc_seq_fops = { 2838static const struct file_operations igmp_mc_seq_fops = {
2835 .owner = THIS_MODULE,
2836 .open = igmp_mc_seq_open, 2839 .open = igmp_mc_seq_open,
2837 .read = seq_read, 2840 .read = seq_read,
2838 .llseek = seq_lseek, 2841 .llseek = seq_lseek,
@@ -2979,7 +2982,6 @@ static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
2979} 2982}
2980 2983
2981static const struct file_operations igmp_mcf_seq_fops = { 2984static const struct file_operations igmp_mcf_seq_fops = {
2982 .owner = THIS_MODULE,
2983 .open = igmp_mcf_seq_open, 2985 .open = igmp_mcf_seq_open,
2984 .read = seq_read, 2986 .read = seq_read,
2985 .llseek = seq_lseek, 2987 .llseek = seq_lseek,
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4ca46dc08e63..881ac6d046f2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -475,7 +475,6 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
475 } 475 }
476 spin_unlock_bh(&queue->fastopenq.lock); 476 spin_unlock_bh(&queue->fastopenq.lock);
477 } 477 }
478 mem_cgroup_sk_alloc(newsk);
479out: 478out:
480 release_sock(sk); 479 release_sock(sk);
481 if (req) 480 if (req)
@@ -685,7 +684,7 @@ static void reqsk_timer_handler(struct timer_list *t)
685 int max_retries, thresh; 684 int max_retries, thresh;
686 u8 defer_accept; 685 u8 defer_accept;
687 686
688 if (sk_state_load(sk_listener) != TCP_LISTEN) 687 if (inet_sk_state_load(sk_listener) != TCP_LISTEN)
689 goto drop; 688 goto drop;
690 689
691 max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; 690 max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
@@ -783,7 +782,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
783 if (newsk) { 782 if (newsk) {
784 struct inet_connection_sock *newicsk = inet_csk(newsk); 783 struct inet_connection_sock *newicsk = inet_csk(newsk);
785 784
786 newsk->sk_state = TCP_SYN_RECV; 785 inet_sk_set_state(newsk, TCP_SYN_RECV);
787 newicsk->icsk_bind_hash = NULL; 786 newicsk->icsk_bind_hash = NULL;
788 787
789 inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; 788 inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
@@ -877,7 +876,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
877 * It is OK, because this socket enters to hash table only 876 * It is OK, because this socket enters to hash table only
878 * after validation is complete. 877 * after validation is complete.
879 */ 878 */
880 sk_state_store(sk, TCP_LISTEN); 879 inet_sk_state_store(sk, TCP_LISTEN);
881 if (!sk->sk_prot->get_port(sk, inet->inet_num)) { 880 if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
882 inet->inet_sport = htons(inet->inet_num); 881 inet->inet_sport = htons(inet->inet_num);
883 882
@@ -888,7 +887,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
888 return 0; 887 return 0;
889 } 888 }
890 889
891 sk->sk_state = TCP_CLOSE; 890 inet_sk_set_state(sk, TCP_CLOSE);
892 return err; 891 return err;
893} 892}
894EXPORT_SYMBOL_GPL(inet_csk_listen_start); 893EXPORT_SYMBOL_GPL(inet_csk_listen_start);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index c9c35b61a027..a383f299ce24 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -564,12 +564,18 @@ static int inet_diag_bc_run(const struct nlattr *_bc,
564 case INET_DIAG_BC_JMP: 564 case INET_DIAG_BC_JMP:
565 yes = 0; 565 yes = 0;
566 break; 566 break;
567 case INET_DIAG_BC_S_EQ:
568 yes = entry->sport == op[1].no;
569 break;
567 case INET_DIAG_BC_S_GE: 570 case INET_DIAG_BC_S_GE:
568 yes = entry->sport >= op[1].no; 571 yes = entry->sport >= op[1].no;
569 break; 572 break;
570 case INET_DIAG_BC_S_LE: 573 case INET_DIAG_BC_S_LE:
571 yes = entry->sport <= op[1].no; 574 yes = entry->sport <= op[1].no;
572 break; 575 break;
576 case INET_DIAG_BC_D_EQ:
577 yes = entry->dport == op[1].no;
578 break;
573 case INET_DIAG_BC_D_GE: 579 case INET_DIAG_BC_D_GE:
574 yes = entry->dport >= op[1].no; 580 yes = entry->dport >= op[1].no;
575 break; 581 break;
@@ -802,8 +808,10 @@ static int inet_diag_bc_audit(const struct nlattr *attr,
802 if (!valid_devcond(bc, len, &min_len)) 808 if (!valid_devcond(bc, len, &min_len))
803 return -EINVAL; 809 return -EINVAL;
804 break; 810 break;
811 case INET_DIAG_BC_S_EQ:
805 case INET_DIAG_BC_S_GE: 812 case INET_DIAG_BC_S_GE:
806 case INET_DIAG_BC_S_LE: 813 case INET_DIAG_BC_S_LE:
814 case INET_DIAG_BC_D_EQ:
807 case INET_DIAG_BC_D_GE: 815 case INET_DIAG_BC_D_GE:
808 case INET_DIAG_BC_D_LE: 816 case INET_DIAG_BC_D_LE:
809 if (!valid_port_comparison(bc, len, &min_len)) 817 if (!valid_port_comparison(bc, len, &min_len))
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e7d15fb0d94d..31ff46daae97 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -19,6 +19,7 @@
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/wait.h> 20#include <linux/wait.h>
21#include <linux/vmalloc.h> 21#include <linux/vmalloc.h>
22#include <linux/bootmem.h>
22 23
23#include <net/addrconf.h> 24#include <net/addrconf.h>
24#include <net/inet_connection_sock.h> 25#include <net/inet_connection_sock.h>
@@ -168,6 +169,60 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
168} 169}
169EXPORT_SYMBOL_GPL(__inet_inherit_port); 170EXPORT_SYMBOL_GPL(__inet_inherit_port);
170 171
172static struct inet_listen_hashbucket *
173inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk)
174{
175 u32 hash;
176
177#if IS_ENABLED(CONFIG_IPV6)
178 if (sk->sk_family == AF_INET6)
179 hash = ipv6_portaddr_hash(sock_net(sk),
180 &sk->sk_v6_rcv_saddr,
181 inet_sk(sk)->inet_num);
182 else
183#endif
184 hash = ipv4_portaddr_hash(sock_net(sk),
185 inet_sk(sk)->inet_rcv_saddr,
186 inet_sk(sk)->inet_num);
187 return inet_lhash2_bucket(h, hash);
188}
189
190static void inet_hash2(struct inet_hashinfo *h, struct sock *sk)
191{
192 struct inet_listen_hashbucket *ilb2;
193
194 if (!h->lhash2)
195 return;
196
197 ilb2 = inet_lhash2_bucket_sk(h, sk);
198
199 spin_lock(&ilb2->lock);
200 if (sk->sk_reuseport && sk->sk_family == AF_INET6)
201 hlist_add_tail_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
202 &ilb2->head);
203 else
204 hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
205 &ilb2->head);
206 ilb2->count++;
207 spin_unlock(&ilb2->lock);
208}
209
210static void inet_unhash2(struct inet_hashinfo *h, struct sock *sk)
211{
212 struct inet_listen_hashbucket *ilb2;
213
214 if (!h->lhash2 ||
215 WARN_ON_ONCE(hlist_unhashed(&inet_csk(sk)->icsk_listen_portaddr_node)))
216 return;
217
218 ilb2 = inet_lhash2_bucket_sk(h, sk);
219
220 spin_lock(&ilb2->lock);
221 hlist_del_init_rcu(&inet_csk(sk)->icsk_listen_portaddr_node);
222 ilb2->count--;
223 spin_unlock(&ilb2->lock);
224}
225
171static inline int compute_score(struct sock *sk, struct net *net, 226static inline int compute_score(struct sock *sk, struct net *net,
172 const unsigned short hnum, const __be32 daddr, 227 const unsigned short hnum, const __be32 daddr,
173 const int dif, const int sdif, bool exact_dif) 228 const int dif, const int sdif, bool exact_dif)
@@ -207,6 +262,40 @@ static inline int compute_score(struct sock *sk, struct net *net,
207 */ 262 */
208 263
209/* called with rcu_read_lock() : No refcount taken on the socket */ 264/* called with rcu_read_lock() : No refcount taken on the socket */
265static struct sock *inet_lhash2_lookup(struct net *net,
266 struct inet_listen_hashbucket *ilb2,
267 struct sk_buff *skb, int doff,
268 const __be32 saddr, __be16 sport,
269 const __be32 daddr, const unsigned short hnum,
270 const int dif, const int sdif)
271{
272 bool exact_dif = inet_exact_dif_match(net, skb);
273 struct inet_connection_sock *icsk;
274 struct sock *sk, *result = NULL;
275 int score, hiscore = 0;
276 u32 phash = 0;
277
278 inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
279 sk = (struct sock *)icsk;
280 score = compute_score(sk, net, hnum, daddr,
281 dif, sdif, exact_dif);
282 if (score > hiscore) {
283 if (sk->sk_reuseport) {
284 phash = inet_ehashfn(net, daddr, hnum,
285 saddr, sport);
286 result = reuseport_select_sock(sk, phash,
287 skb, doff);
288 if (result)
289 return result;
290 }
291 result = sk;
292 hiscore = score;
293 }
294 }
295
296 return result;
297}
298
210struct sock *__inet_lookup_listener(struct net *net, 299struct sock *__inet_lookup_listener(struct net *net,
211 struct inet_hashinfo *hashinfo, 300 struct inet_hashinfo *hashinfo,
212 struct sk_buff *skb, int doff, 301 struct sk_buff *skb, int doff,
@@ -216,32 +305,57 @@ struct sock *__inet_lookup_listener(struct net *net,
216{ 305{
217 unsigned int hash = inet_lhashfn(net, hnum); 306 unsigned int hash = inet_lhashfn(net, hnum);
218 struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; 307 struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
219 int score, hiscore = 0, matches = 0, reuseport = 0;
220 bool exact_dif = inet_exact_dif_match(net, skb); 308 bool exact_dif = inet_exact_dif_match(net, skb);
309 struct inet_listen_hashbucket *ilb2;
221 struct sock *sk, *result = NULL; 310 struct sock *sk, *result = NULL;
311 int score, hiscore = 0;
312 unsigned int hash2;
222 u32 phash = 0; 313 u32 phash = 0;
223 314
315 if (ilb->count <= 10 || !hashinfo->lhash2)
316 goto port_lookup;
317
318 /* Too many sk in the ilb bucket (which is hashed by port alone).
319 * Try lhash2 (which is hashed by port and addr) instead.
320 */
321
322 hash2 = ipv4_portaddr_hash(net, daddr, hnum);
323 ilb2 = inet_lhash2_bucket(hashinfo, hash2);
324 if (ilb2->count > ilb->count)
325 goto port_lookup;
326
327 result = inet_lhash2_lookup(net, ilb2, skb, doff,
328 saddr, sport, daddr, hnum,
329 dif, sdif);
330 if (result)
331 return result;
332
333 /* Lookup lhash2 with INADDR_ANY */
334
335 hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
336 ilb2 = inet_lhash2_bucket(hashinfo, hash2);
337 if (ilb2->count > ilb->count)
338 goto port_lookup;
339
340 return inet_lhash2_lookup(net, ilb2, skb, doff,
341 saddr, sport, daddr, hnum,
342 dif, sdif);
343
344port_lookup:
224 sk_for_each_rcu(sk, &ilb->head) { 345 sk_for_each_rcu(sk, &ilb->head) {
225 score = compute_score(sk, net, hnum, daddr, 346 score = compute_score(sk, net, hnum, daddr,
226 dif, sdif, exact_dif); 347 dif, sdif, exact_dif);
227 if (score > hiscore) { 348 if (score > hiscore) {
228 reuseport = sk->sk_reuseport; 349 if (sk->sk_reuseport) {
229 if (reuseport) {
230 phash = inet_ehashfn(net, daddr, hnum, 350 phash = inet_ehashfn(net, daddr, hnum,
231 saddr, sport); 351 saddr, sport);
232 result = reuseport_select_sock(sk, phash, 352 result = reuseport_select_sock(sk, phash,
233 skb, doff); 353 skb, doff);
234 if (result) 354 if (result)
235 return result; 355 return result;
236 matches = 1;
237 } 356 }
238 result = sk; 357 result = sk;
239 hiscore = score; 358 hiscore = score;
240 } else if (score == hiscore && reuseport) {
241 matches++;
242 if (reciprocal_scale(phash, matches) == 0)
243 result = sk;
244 phash = next_pseudo_random32(phash);
245 } 359 }
246 } 360 }
247 return result; 361 return result;
@@ -430,7 +544,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
430 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 544 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
431 } else { 545 } else {
432 percpu_counter_inc(sk->sk_prot->orphan_count); 546 percpu_counter_inc(sk->sk_prot->orphan_count);
433 sk->sk_state = TCP_CLOSE; 547 inet_sk_set_state(sk, TCP_CLOSE);
434 sock_set_flag(sk, SOCK_DEAD); 548 sock_set_flag(sk, SOCK_DEAD);
435 inet_csk_destroy_sock(sk); 549 inet_csk_destroy_sock(sk);
436 } 550 }
@@ -483,6 +597,8 @@ int __inet_hash(struct sock *sk, struct sock *osk)
483 hlist_add_tail_rcu(&sk->sk_node, &ilb->head); 597 hlist_add_tail_rcu(&sk->sk_node, &ilb->head);
484 else 598 else
485 hlist_add_head_rcu(&sk->sk_node, &ilb->head); 599 hlist_add_head_rcu(&sk->sk_node, &ilb->head);
600 inet_hash2(hashinfo, sk);
601 ilb->count++;
486 sock_set_flag(sk, SOCK_RCU_FREE); 602 sock_set_flag(sk, SOCK_RCU_FREE);
487 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 603 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
488unlock: 604unlock:
@@ -509,28 +625,33 @@ EXPORT_SYMBOL_GPL(inet_hash);
509void inet_unhash(struct sock *sk) 625void inet_unhash(struct sock *sk)
510{ 626{
511 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 627 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
628 struct inet_listen_hashbucket *ilb = NULL;
512 spinlock_t *lock; 629 spinlock_t *lock;
513 bool listener = false;
514 int done;
515 630
516 if (sk_unhashed(sk)) 631 if (sk_unhashed(sk))
517 return; 632 return;
518 633
519 if (sk->sk_state == TCP_LISTEN) { 634 if (sk->sk_state == TCP_LISTEN) {
520 lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock; 635 ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
521 listener = true; 636 lock = &ilb->lock;
522 } else { 637 } else {
523 lock = inet_ehash_lockp(hashinfo, sk->sk_hash); 638 lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
524 } 639 }
525 spin_lock_bh(lock); 640 spin_lock_bh(lock);
641 if (sk_unhashed(sk))
642 goto unlock;
643
526 if (rcu_access_pointer(sk->sk_reuseport_cb)) 644 if (rcu_access_pointer(sk->sk_reuseport_cb))
527 reuseport_detach_sock(sk); 645 reuseport_detach_sock(sk);
528 if (listener) 646 if (ilb) {
529 done = __sk_del_node_init(sk); 647 inet_unhash2(hashinfo, sk);
530 else 648 __sk_del_node_init(sk);
531 done = __sk_nulls_del_node_init_rcu(sk); 649 ilb->count--;
532 if (done) 650 } else {
533 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 651 __sk_nulls_del_node_init_rcu(sk);
652 }
653 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
654unlock:
534 spin_unlock_bh(lock); 655 spin_unlock_bh(lock);
535} 656}
536EXPORT_SYMBOL_GPL(inet_unhash); 657EXPORT_SYMBOL_GPL(inet_unhash);
@@ -665,10 +786,37 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
665 for (i = 0; i < INET_LHTABLE_SIZE; i++) { 786 for (i = 0; i < INET_LHTABLE_SIZE; i++) {
666 spin_lock_init(&h->listening_hash[i].lock); 787 spin_lock_init(&h->listening_hash[i].lock);
667 INIT_HLIST_HEAD(&h->listening_hash[i].head); 788 INIT_HLIST_HEAD(&h->listening_hash[i].head);
789 h->listening_hash[i].count = 0;
668 } 790 }
791
792 h->lhash2 = NULL;
669} 793}
670EXPORT_SYMBOL_GPL(inet_hashinfo_init); 794EXPORT_SYMBOL_GPL(inet_hashinfo_init);
671 795
796void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
797 unsigned long numentries, int scale,
798 unsigned long low_limit,
799 unsigned long high_limit)
800{
801 unsigned int i;
802
803 h->lhash2 = alloc_large_system_hash(name,
804 sizeof(*h->lhash2),
805 numentries,
806 scale,
807 0,
808 NULL,
809 &h->lhash2_mask,
810 low_limit,
811 high_limit);
812
813 for (i = 0; i <= h->lhash2_mask; i++) {
814 spin_lock_init(&h->lhash2[i].lock);
815 INIT_HLIST_HEAD(&h->lhash2[i].head);
816 h->lhash2[i].count = 0;
817 }
818}
819
672int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) 820int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
673{ 821{
674 unsigned int locksz = sizeof(spinlock_t); 822 unsigned int locksz = sizeof(spinlock_t);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index b563e0c46bac..c3ea4906d237 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -97,7 +97,7 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
97 * Essentially we whip up a timewait bucket, copy the relevant info into it 97 * Essentially we whip up a timewait bucket, copy the relevant info into it
98 * from the SK, and mess with hash chains and list linkage. 98 * from the SK, and mess with hash chains and list linkage.
99 */ 99 */
100void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, 100void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
101 struct inet_hashinfo *hashinfo) 101 struct inet_hashinfo *hashinfo)
102{ 102{
103 const struct inet_sock *inet = inet_sk(sk); 103 const struct inet_sock *inet = inet_sk(sk);
@@ -119,18 +119,6 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
119 119
120 spin_lock(lock); 120 spin_lock(lock);
121 121
122 /*
123 * Step 2: Hash TW into tcp ehash chain.
124 * Notes :
125 * - tw_refcnt is set to 4 because :
126 * - We have one reference from bhash chain.
127 * - We have one reference from ehash chain.
128 * - We have one reference from timer.
129 * - One reference for ourself (our caller will release it).
130 * We can use atomic_set() because prior spin_lock()/spin_unlock()
131 * committed into memory all tw fields.
132 */
133 refcount_set(&tw->tw_refcnt, 4);
134 inet_twsk_add_node_rcu(tw, &ehead->chain); 122 inet_twsk_add_node_rcu(tw, &ehead->chain);
135 123
136 /* Step 3: Remove SK from hash chain */ 124 /* Step 3: Remove SK from hash chain */
@@ -138,8 +126,19 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
138 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 126 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
139 127
140 spin_unlock(lock); 128 spin_unlock(lock);
129
130 /* tw_refcnt is set to 3 because we have :
131 * - one reference for bhash chain.
132 * - one reference for ehash chain.
133 * - one reference for timer.
134 * We can use atomic_set() because prior spin_lock()/spin_unlock()
135 * committed into memory all tw fields.
136 * Also note that after this point, we lost our implicit reference
137 * so we are not allowed to use tw anymore.
138 */
139 refcount_set(&tw->tw_refcnt, 3);
141} 140}
142EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); 141EXPORT_SYMBOL_GPL(inet_twsk_hashdance);
143 142
144static void tw_timer_handler(struct timer_list *t) 143static void tw_timer_handler(struct timer_list *t)
145{ 144{
@@ -271,14 +270,14 @@ restart:
271 continue; 270 continue;
272 tw = inet_twsk(sk); 271 tw = inet_twsk(sk);
273 if ((tw->tw_family != family) || 272 if ((tw->tw_family != family) ||
274 atomic_read(&twsk_net(tw)->count)) 273 refcount_read(&twsk_net(tw)->count))
275 continue; 274 continue;
276 275
277 if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt))) 276 if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
278 continue; 277 continue;
279 278
280 if (unlikely((tw->tw_family != family) || 279 if (unlikely((tw->tw_family != family) ||
281 atomic_read(&twsk_net(tw)->count))) { 280 refcount_read(&twsk_net(tw)->count))) {
282 inet_twsk_put(tw); 281 inet_twsk_put(tw);
283 goto restart; 282 goto restart;
284 } 283 }
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 45ffd3d045d2..45d97e9b2759 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -114,7 +114,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
114static struct rtnl_link_ops ipgre_link_ops __read_mostly; 114static struct rtnl_link_ops ipgre_link_ops __read_mostly;
115static int ipgre_tunnel_init(struct net_device *dev); 115static int ipgre_tunnel_init(struct net_device *dev);
116static void erspan_build_header(struct sk_buff *skb, 116static void erspan_build_header(struct sk_buff *skb,
117 __be32 id, u32 index, bool truncate); 117 u32 id, u32 index,
118 bool truncate, bool is_ipv4);
118 119
119static unsigned int ipgre_net_id __read_mostly; 120static unsigned int ipgre_net_id __read_mostly;
120static unsigned int gre_tap_net_id __read_mostly; 121static unsigned int gre_tap_net_id __read_mostly;
@@ -255,34 +256,44 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
255{ 256{
256 struct net *net = dev_net(skb->dev); 257 struct net *net = dev_net(skb->dev);
257 struct metadata_dst *tun_dst = NULL; 258 struct metadata_dst *tun_dst = NULL;
259 struct erspan_base_hdr *ershdr;
260 struct erspan_metadata *pkt_md;
258 struct ip_tunnel_net *itn; 261 struct ip_tunnel_net *itn;
259 struct ip_tunnel *tunnel; 262 struct ip_tunnel *tunnel;
260 struct erspanhdr *ershdr;
261 const struct iphdr *iph; 263 const struct iphdr *iph;
262 __be32 index; 264 struct erspan_md2 *md2;
265 int ver;
263 int len; 266 int len;
264 267
265 itn = net_generic(net, erspan_net_id); 268 itn = net_generic(net, erspan_net_id);
266 len = gre_hdr_len + sizeof(*ershdr); 269 len = gre_hdr_len + sizeof(*ershdr);
267 270
271 /* Check based hdr len */
268 if (unlikely(!pskb_may_pull(skb, len))) 272 if (unlikely(!pskb_may_pull(skb, len)))
269 return PACKET_REJECT; 273 return PACKET_REJECT;
270 274
271 iph = ip_hdr(skb); 275 iph = ip_hdr(skb);
272 ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len); 276 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
277 ver = ershdr->ver;
273 278
274 /* The original GRE header does not have key field, 279 /* The original GRE header does not have key field,
275 * Use ERSPAN 10-bit session ID as key. 280 * Use ERSPAN 10-bit session ID as key.
276 */ 281 */
277 tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK); 282 tpi->key = cpu_to_be32(get_session_id(ershdr));
278 index = ershdr->md.index;
279 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, 283 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
280 tpi->flags | TUNNEL_KEY, 284 tpi->flags | TUNNEL_KEY,
281 iph->saddr, iph->daddr, tpi->key); 285 iph->saddr, iph->daddr, tpi->key);
282 286
283 if (tunnel) { 287 if (tunnel) {
288 len = gre_hdr_len + erspan_hdr_len(ver);
289 if (unlikely(!pskb_may_pull(skb, len)))
290 return PACKET_REJECT;
291
292 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
293 pkt_md = (struct erspan_metadata *)(ershdr + 1);
294
284 if (__iptunnel_pull_header(skb, 295 if (__iptunnel_pull_header(skb,
285 gre_hdr_len + sizeof(*ershdr), 296 len,
286 htons(ETH_P_TEB), 297 htons(ETH_P_TEB),
287 false, false) < 0) 298 false, false) < 0)
288 goto drop; 299 goto drop;
@@ -303,15 +314,14 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
303 return PACKET_REJECT; 314 return PACKET_REJECT;
304 315
305 md = ip_tunnel_info_opts(&tun_dst->u.tun_info); 316 md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
306 if (!md) 317 md->version = ver;
307 return PACKET_REJECT; 318 md2 = &md->u.md2;
319 memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
320 ERSPAN_V2_MDSIZE);
308 321
309 md->index = index;
310 info = &tun_dst->u.tun_info; 322 info = &tun_dst->u.tun_info;
311 info->key.tun_flags |= TUNNEL_ERSPAN_OPT; 323 info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
312 info->options_len = sizeof(*md); 324 info->options_len = sizeof(*md);
313 } else {
314 tunnel->index = ntohl(index);
315 } 325 }
316 326
317 skb_reset_mac_header(skb); 327 skb_reset_mac_header(skb);
@@ -405,14 +415,17 @@ static int gre_rcv(struct sk_buff *skb)
405 if (hdr_len < 0) 415 if (hdr_len < 0)
406 goto drop; 416 goto drop;
407 417
408 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) { 418 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
419 tpi.proto == htons(ETH_P_ERSPAN2))) {
409 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) 420 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
410 return 0; 421 return 0;
422 goto out;
411 } 423 }
412 424
413 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) 425 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
414 return 0; 426 return 0;
415 427
428out:
416 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 429 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
417drop: 430drop:
418 kfree_skb(skb); 431 kfree_skb(skb);
@@ -560,6 +573,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
560 bool truncate = false; 573 bool truncate = false;
561 struct flowi4 fl; 574 struct flowi4 fl;
562 int tunnel_hlen; 575 int tunnel_hlen;
576 int version;
563 __be16 df; 577 __be16 df;
564 578
565 tun_info = skb_tunnel_info(skb); 579 tun_info = skb_tunnel_info(skb);
@@ -568,9 +582,13 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
568 goto err_free_skb; 582 goto err_free_skb;
569 583
570 key = &tun_info->key; 584 key = &tun_info->key;
585 md = ip_tunnel_info_opts(tun_info);
586 if (!md)
587 goto err_free_rt;
571 588
572 /* ERSPAN has fixed 8 byte GRE header */ 589 /* ERSPAN has fixed 8 byte GRE header */
573 tunnel_hlen = 8 + sizeof(struct erspanhdr); 590 version = md->version;
591 tunnel_hlen = 8 + erspan_hdr_len(version);
574 592
575 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); 593 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
576 if (!rt) 594 if (!rt)
@@ -584,12 +602,18 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
584 truncate = true; 602 truncate = true;
585 } 603 }
586 604
587 md = ip_tunnel_info_opts(tun_info); 605 if (version == 1) {
588 if (!md) 606 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
607 ntohl(md->u.index), truncate, true);
608 } else if (version == 2) {
609 erspan_build_header_v2(skb,
610 ntohl(tunnel_id_to_key32(key->tun_id)),
611 md->u.md2.dir,
612 get_hwid(&md->u.md2),
613 truncate, true);
614 } else {
589 goto err_free_rt; 615 goto err_free_rt;
590 616 }
591 erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
592 ntohl(md->index), truncate);
593 617
594 gre_build_header(skb, 8, TUNNEL_SEQ, 618 gre_build_header(skb, 8, TUNNEL_SEQ,
595 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++)); 619 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
@@ -668,52 +692,6 @@ free_skb:
668 return NETDEV_TX_OK; 692 return NETDEV_TX_OK;
669} 693}
670 694
671static inline u8 tos_to_cos(u8 tos)
672{
673 u8 dscp, cos;
674
675 dscp = tos >> 2;
676 cos = dscp >> 3;
677 return cos;
678}
679
680static void erspan_build_header(struct sk_buff *skb,
681 __be32 id, u32 index, bool truncate)
682{
683 struct iphdr *iphdr = ip_hdr(skb);
684 struct ethhdr *eth = eth_hdr(skb);
685 enum erspan_encap_type enc_type;
686 struct erspanhdr *ershdr;
687 struct qtag_prefix {
688 __be16 eth_type;
689 __be16 tci;
690 } *qp;
691 u16 vlan_tci = 0;
692
693 enc_type = ERSPAN_ENCAP_NOVLAN;
694
695 /* If mirrored packet has vlan tag, extract tci and
696 * perserve vlan header in the mirrored frame.
697 */
698 if (eth->h_proto == htons(ETH_P_8021Q)) {
699 qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
700 vlan_tci = ntohs(qp->tci);
701 enc_type = ERSPAN_ENCAP_INFRAME;
702 }
703
704 skb_push(skb, sizeof(*ershdr));
705 ershdr = (struct erspanhdr *)skb->data;
706 memset(ershdr, 0, sizeof(*ershdr));
707
708 ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
709 (ERSPAN_VERSION << VER_OFFSET));
710 ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
711 ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) |
712 (enc_type << EN_OFFSET & EN_MASK) |
713 ((truncate << T_OFFSET) & T_MASK));
714 ershdr->md.index = htonl(index & INDEX_MASK);
715}
716
717static netdev_tx_t erspan_xmit(struct sk_buff *skb, 695static netdev_tx_t erspan_xmit(struct sk_buff *skb,
718 struct net_device *dev) 696 struct net_device *dev)
719{ 697{
@@ -737,7 +715,15 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
737 } 715 }
738 716
739 /* Push ERSPAN header */ 717 /* Push ERSPAN header */
740 erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate); 718 if (tunnel->erspan_ver == 1)
719 erspan_build_header(skb, ntohl(tunnel->parms.o_key),
720 tunnel->index,
721 truncate, true);
722 else
723 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
724 tunnel->dir, tunnel->hwid,
725 truncate, true);
726
741 tunnel->parms.o_flags &= ~TUNNEL_KEY; 727 tunnel->parms.o_flags &= ~TUNNEL_KEY;
742 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN)); 728 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
743 return NETDEV_TX_OK; 729 return NETDEV_TX_OK;
@@ -1209,13 +1195,32 @@ static int ipgre_netlink_parms(struct net_device *dev,
1209 if (data[IFLA_GRE_FWMARK]) 1195 if (data[IFLA_GRE_FWMARK])
1210 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); 1196 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1211 1197
1212 if (data[IFLA_GRE_ERSPAN_INDEX]) { 1198 if (data[IFLA_GRE_ERSPAN_VER]) {
1213 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); 1199 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1214 1200
1215 if (t->index & ~INDEX_MASK) 1201 if (t->erspan_ver != 1 && t->erspan_ver != 2)
1216 return -EINVAL; 1202 return -EINVAL;
1217 } 1203 }
1218 1204
1205 if (t->erspan_ver == 1) {
1206 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1207 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1208 if (t->index & ~INDEX_MASK)
1209 return -EINVAL;
1210 }
1211 } else if (t->erspan_ver == 2) {
1212 if (data[IFLA_GRE_ERSPAN_DIR]) {
1213 t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1214 if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1215 return -EINVAL;
1216 }
1217 if (data[IFLA_GRE_ERSPAN_HWID]) {
1218 t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1219 if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1220 return -EINVAL;
1221 }
1222 }
1223
1219 return 0; 1224 return 0;
1220} 1225}
1221 1226
@@ -1282,7 +1287,7 @@ static int erspan_tunnel_init(struct net_device *dev)
1282 tunnel->tun_hlen = 8; 1287 tunnel->tun_hlen = 8;
1283 tunnel->parms.iph.protocol = IPPROTO_GRE; 1288 tunnel->parms.iph.protocol = IPPROTO_GRE;
1284 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + 1289 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1285 sizeof(struct erspanhdr); 1290 erspan_hdr_len(tunnel->erspan_ver);
1286 t_hlen = tunnel->hlen + sizeof(struct iphdr); 1291 t_hlen = tunnel->hlen + sizeof(struct iphdr);
1287 1292
1288 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4; 1293 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
@@ -1413,6 +1418,12 @@ static size_t ipgre_get_size(const struct net_device *dev)
1413 nla_total_size(4) + 1418 nla_total_size(4) +
1414 /* IFLA_GRE_ERSPAN_INDEX */ 1419 /* IFLA_GRE_ERSPAN_INDEX */
1415 nla_total_size(4) + 1420 nla_total_size(4) +
1421 /* IFLA_GRE_ERSPAN_VER */
1422 nla_total_size(1) +
1423 /* IFLA_GRE_ERSPAN_DIR */
1424 nla_total_size(1) +
1425 /* IFLA_GRE_ERSPAN_HWID */
1426 nla_total_size(2) +
1416 0; 1427 0;
1417} 1428}
1418 1429
@@ -1455,9 +1466,18 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1455 goto nla_put_failure; 1466 goto nla_put_failure;
1456 } 1467 }
1457 1468
1458 if (t->index) 1469 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1470 goto nla_put_failure;
1471
1472 if (t->erspan_ver == 1) {
1459 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) 1473 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1460 goto nla_put_failure; 1474 goto nla_put_failure;
1475 } else if (t->erspan_ver == 2) {
1476 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1477 goto nla_put_failure;
1478 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1479 goto nla_put_failure;
1480 }
1461 1481
1462 return 0; 1482 return 0;
1463 1483
@@ -1493,6 +1513,9 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1493 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 }, 1513 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
1494 [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, 1514 [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
1495 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 }, 1515 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1516 [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
1517 [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
1518 [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
1496}; 1519};
1497 1520
1498static struct rtnl_link_ops ipgre_link_ops __read_mostly = { 1521static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 60fb1eb7d7d8..008be04ac1cc 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -808,6 +808,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
808 { 808 {
809 struct net_device *dev = NULL; 809 struct net_device *dev = NULL;
810 int ifindex; 810 int ifindex;
811 int midx;
811 812
812 if (optlen != sizeof(int)) 813 if (optlen != sizeof(int))
813 goto e_inval; 814 goto e_inval;
@@ -823,10 +824,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
823 err = -EADDRNOTAVAIL; 824 err = -EADDRNOTAVAIL;
824 if (!dev) 825 if (!dev)
825 break; 826 break;
827
828 midx = l3mdev_master_ifindex(dev);
826 dev_put(dev); 829 dev_put(dev);
827 830
828 err = -EINVAL; 831 err = -EINVAL;
829 if (sk->sk_bound_dev_if) 832 if (sk->sk_bound_dev_if &&
833 (!midx || midx != sk->sk_bound_dev_if))
830 break; 834 break;
831 835
832 inet->uc_index = ifindex; 836 inet->uc_index = ifindex;
@@ -1251,11 +1255,8 @@ int ip_setsockopt(struct sock *sk, int level,
1251 if (err == -ENOPROTOOPT && optname != IP_HDRINCL && 1255 if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
1252 optname != IP_IPSEC_POLICY && 1256 optname != IP_IPSEC_POLICY &&
1253 optname != IP_XFRM_POLICY && 1257 optname != IP_XFRM_POLICY &&
1254 !ip_mroute_opt(optname)) { 1258 !ip_mroute_opt(optname))
1255 lock_sock(sk);
1256 err = nf_setsockopt(sk, PF_INET, optname, optval, optlen); 1259 err = nf_setsockopt(sk, PF_INET, optname, optval, optlen);
1257 release_sock(sk);
1258 }
1259#endif 1260#endif
1260 return err; 1261 return err;
1261} 1262}
@@ -1280,12 +1281,9 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname,
1280 if (err == -ENOPROTOOPT && optname != IP_HDRINCL && 1281 if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
1281 optname != IP_IPSEC_POLICY && 1282 optname != IP_IPSEC_POLICY &&
1282 optname != IP_XFRM_POLICY && 1283 optname != IP_XFRM_POLICY &&
1283 !ip_mroute_opt(optname)) { 1284 !ip_mroute_opt(optname))
1284 lock_sock(sk); 1285 err = compat_nf_setsockopt(sk, PF_INET, optname, optval,
1285 err = compat_nf_setsockopt(sk, PF_INET, optname, 1286 optlen);
1286 optval, optlen);
1287 release_sock(sk);
1288 }
1289#endif 1287#endif
1290 return err; 1288 return err;
1291} 1289}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 6d21068f9b55..d786a8441bce 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -710,9 +710,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
710 } 710 }
711 } 711 }
712 712
713 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, 713 if (tunnel->fwmark) {
714 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, 714 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
715 tunnel->fwmark); 715 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
716 tunnel->fwmark);
717 }
718 else {
719 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
720 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
721 skb->mark);
722 }
716 723
717 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) 724 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
718 goto tx_error; 725 goto tx_error;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index abdebca848c9..f75802ad960f 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -329,39 +329,6 @@ set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port)
329 sin->sin_port = port; 329 sin->sin_port = port;
330} 330}
331 331
332static int __init ic_devinet_ioctl(unsigned int cmd, struct ifreq *arg)
333{
334 int res;
335
336 mm_segment_t oldfs = get_fs();
337 set_fs(get_ds());
338 res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg);
339 set_fs(oldfs);
340 return res;
341}
342
343static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg)
344{
345 int res;
346
347 mm_segment_t oldfs = get_fs();
348 set_fs(get_ds());
349 res = dev_ioctl(&init_net, cmd, (struct ifreq __user *) arg);
350 set_fs(oldfs);
351 return res;
352}
353
354static int __init ic_route_ioctl(unsigned int cmd, struct rtentry *arg)
355{
356 int res;
357
358 mm_segment_t oldfs = get_fs();
359 set_fs(get_ds());
360 res = ip_rt_ioctl(&init_net, cmd, (void __user *) arg);
361 set_fs(oldfs);
362 return res;
363}
364
365/* 332/*
366 * Set up interface addresses and routes. 333 * Set up interface addresses and routes.
367 */ 334 */
@@ -375,19 +342,19 @@ static int __init ic_setup_if(void)
375 memset(&ir, 0, sizeof(ir)); 342 memset(&ir, 0, sizeof(ir));
376 strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->dev->name); 343 strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->dev->name);
377 set_sockaddr(sin, ic_myaddr, 0); 344 set_sockaddr(sin, ic_myaddr, 0);
378 if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) { 345 if ((err = devinet_ioctl(&init_net, SIOCSIFADDR, &ir)) < 0) {
379 pr_err("IP-Config: Unable to set interface address (%d)\n", 346 pr_err("IP-Config: Unable to set interface address (%d)\n",
380 err); 347 err);
381 return -1; 348 return -1;
382 } 349 }
383 set_sockaddr(sin, ic_netmask, 0); 350 set_sockaddr(sin, ic_netmask, 0);
384 if ((err = ic_devinet_ioctl(SIOCSIFNETMASK, &ir)) < 0) { 351 if ((err = devinet_ioctl(&init_net, SIOCSIFNETMASK, &ir)) < 0) {
385 pr_err("IP-Config: Unable to set interface netmask (%d)\n", 352 pr_err("IP-Config: Unable to set interface netmask (%d)\n",
386 err); 353 err);
387 return -1; 354 return -1;
388 } 355 }
389 set_sockaddr(sin, ic_myaddr | ~ic_netmask, 0); 356 set_sockaddr(sin, ic_myaddr | ~ic_netmask, 0);
390 if ((err = ic_devinet_ioctl(SIOCSIFBRDADDR, &ir)) < 0) { 357 if ((err = devinet_ioctl(&init_net, SIOCSIFBRDADDR, &ir)) < 0) {
391 pr_err("IP-Config: Unable to set interface broadcast address (%d)\n", 358 pr_err("IP-Config: Unable to set interface broadcast address (%d)\n",
392 err); 359 err);
393 return -1; 360 return -1;
@@ -397,11 +364,11 @@ static int __init ic_setup_if(void)
397 * out, we'll try to muddle along. 364 * out, we'll try to muddle along.
398 */ 365 */
399 if (ic_dev_mtu != 0) { 366 if (ic_dev_mtu != 0) {
400 strcpy(ir.ifr_name, ic_dev->dev->name); 367 rtnl_lock();
401 ir.ifr_mtu = ic_dev_mtu; 368 if ((err = dev_set_mtu(ic_dev->dev, ic_dev_mtu)) < 0)
402 if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0)
403 pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n", 369 pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n",
404 ic_dev_mtu, err); 370 ic_dev_mtu, err);
371 rtnl_unlock();
405 } 372 }
406 return 0; 373 return 0;
407} 374}
@@ -423,7 +390,7 @@ static int __init ic_setup_routes(void)
423 set_sockaddr((struct sockaddr_in *) &rm.rt_genmask, 0, 0); 390 set_sockaddr((struct sockaddr_in *) &rm.rt_genmask, 0, 0);
424 set_sockaddr((struct sockaddr_in *) &rm.rt_gateway, ic_gateway, 0); 391 set_sockaddr((struct sockaddr_in *) &rm.rt_gateway, ic_gateway, 0);
425 rm.rt_flags = RTF_UP | RTF_GATEWAY; 392 rm.rt_flags = RTF_UP | RTF_GATEWAY;
426 if ((err = ic_route_ioctl(SIOCADDRT, &rm)) < 0) { 393 if ((err = ip_rt_ioctl(&init_net, SIOCADDRT, &rm)) < 0) {
427 pr_err("IP-Config: Cannot add default route (%d)\n", 394 pr_err("IP-Config: Cannot add default route (%d)\n",
428 err); 395 err);
429 return -1; 396 return -1;
@@ -1322,7 +1289,6 @@ static int pnp_seq_open(struct inode *indoe, struct file *file)
1322} 1289}
1323 1290
1324static const struct file_operations pnp_seq_fops = { 1291static const struct file_operations pnp_seq_fops = {
1325 .owner = THIS_MODULE,
1326 .open = pnp_seq_open, 1292 .open = pnp_seq_open,
1327 .read = seq_read, 1293 .read = seq_read,
1328 .llseek = seq_lseek, 1294 .llseek = seq_lseek,
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index fd5f19c988e4..b05689bbba31 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -3022,7 +3022,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
3022 const char *name = vif->dev ? vif->dev->name : "none"; 3022 const char *name = vif->dev ? vif->dev->name : "none";
3023 3023
3024 seq_printf(seq, 3024 seq_printf(seq,
3025 "%2zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 3025 "%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
3026 vif - mrt->vif_table, 3026 vif - mrt->vif_table,
3027 name, vif->bytes_in, vif->pkt_in, 3027 name, vif->bytes_in, vif->pkt_in,
3028 vif->bytes_out, vif->pkt_out, 3028 vif->bytes_out, vif->pkt_out,
@@ -3045,7 +3045,6 @@ static int ipmr_vif_open(struct inode *inode, struct file *file)
3045} 3045}
3046 3046
3047static const struct file_operations ipmr_vif_fops = { 3047static const struct file_operations ipmr_vif_fops = {
3048 .owner = THIS_MODULE,
3049 .open = ipmr_vif_open, 3048 .open = ipmr_vif_open,
3050 .read = seq_read, 3049 .read = seq_read,
3051 .llseek = seq_lseek, 3050 .llseek = seq_lseek,
@@ -3198,7 +3197,6 @@ static int ipmr_mfc_open(struct inode *inode, struct file *file)
3198} 3197}
3199 3198
3200static const struct file_operations ipmr_mfc_fops = { 3199static const struct file_operations ipmr_mfc_fops = {
3201 .owner = THIS_MODULE,
3202 .open = ipmr_mfc_open, 3200 .open = ipmr_mfc_open,
3203 .read = seq_read, 3201 .read = seq_read,
3204 .llseek = seq_lseek, 3202 .llseek = seq_lseek,
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index c0cc6aa8cfaa..e6774ccb7731 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -80,35 +80,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
80} 80}
81EXPORT_SYMBOL(ip_route_me_harder); 81EXPORT_SYMBOL(ip_route_me_harder);
82 82
83/* 83int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
84 * Extra routing may needed on local out, as the QUEUE target never
85 * returns control to the table.
86 */
87
88struct ip_rt_info {
89 __be32 daddr;
90 __be32 saddr;
91 u_int8_t tos;
92 u_int32_t mark;
93};
94
95static void nf_ip_saveroute(const struct sk_buff *skb,
96 struct nf_queue_entry *entry)
97{
98 struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
99
100 if (entry->state.hook == NF_INET_LOCAL_OUT) {
101 const struct iphdr *iph = ip_hdr(skb);
102
103 rt_info->tos = iph->tos;
104 rt_info->daddr = iph->daddr;
105 rt_info->saddr = iph->saddr;
106 rt_info->mark = skb->mark;
107 }
108}
109
110static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
111 const struct nf_queue_entry *entry)
112{ 84{
113 const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); 85 const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
114 86
@@ -119,10 +91,12 @@ static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
119 skb->mark == rt_info->mark && 91 skb->mark == rt_info->mark &&
120 iph->daddr == rt_info->daddr && 92 iph->daddr == rt_info->daddr &&
121 iph->saddr == rt_info->saddr)) 93 iph->saddr == rt_info->saddr))
122 return ip_route_me_harder(net, skb, RTN_UNSPEC); 94 return ip_route_me_harder(entry->state.net, skb,
95 RTN_UNSPEC);
123 } 96 }
124 return 0; 97 return 0;
125} 98}
99EXPORT_SYMBOL_GPL(nf_ip_reroute);
126 100
127__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, 101__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
128 unsigned int dataoff, u_int8_t protocol) 102 unsigned int dataoff, u_int8_t protocol)
@@ -155,9 +129,9 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
155} 129}
156EXPORT_SYMBOL(nf_ip_checksum); 130EXPORT_SYMBOL(nf_ip_checksum);
157 131
158static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, 132__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
159 unsigned int dataoff, unsigned int len, 133 unsigned int dataoff, unsigned int len,
160 u_int8_t protocol) 134 u_int8_t protocol)
161{ 135{
162 const struct iphdr *iph = ip_hdr(skb); 136 const struct iphdr *iph = ip_hdr(skb);
163 __sum16 csum = 0; 137 __sum16 csum = 0;
@@ -175,9 +149,10 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
175 } 149 }
176 return csum; 150 return csum;
177} 151}
152EXPORT_SYMBOL_GPL(nf_ip_checksum_partial);
178 153
179static int nf_ip_route(struct net *net, struct dst_entry **dst, 154int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
180 struct flowi *fl, bool strict __always_unused) 155 bool strict __always_unused)
181{ 156{
182 struct rtable *rt = ip_route_output_key(net, &fl->u.ip4); 157 struct rtable *rt = ip_route_output_key(net, &fl->u.ip4);
183 if (IS_ERR(rt)) 158 if (IS_ERR(rt))
@@ -185,19 +160,4 @@ static int nf_ip_route(struct net *net, struct dst_entry **dst,
185 *dst = &rt->dst; 160 *dst = &rt->dst;
186 return 0; 161 return 0;
187} 162}
188 163EXPORT_SYMBOL_GPL(nf_ip_route);
189static const struct nf_afinfo nf_ip_afinfo = {
190 .family = AF_INET,
191 .checksum = nf_ip_checksum,
192 .checksum_partial = nf_ip_checksum_partial,
193 .route = nf_ip_route,
194 .saveroute = nf_ip_saveroute,
195 .reroute = nf_ip_reroute,
196 .route_key_size = sizeof(struct ip_rt_info),
197};
198
199static int __init ipv4_netfilter_init(void)
200{
201 return nf_register_afinfo(&nf_ip_afinfo);
202}
203subsys_initcall(ipv4_netfilter_init);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index c11eb1744ab1..dfe6fa4ea554 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -72,11 +72,20 @@ endif # NF_TABLES_IPV4
72 72
73config NF_TABLES_ARP 73config NF_TABLES_ARP
74 tristate "ARP nf_tables support" 74 tristate "ARP nf_tables support"
75 select NETFILTER_FAMILY_ARP
75 help 76 help
76 This option enables the ARP support for nf_tables. 77 This option enables the ARP support for nf_tables.
77 78
78endif # NF_TABLES 79endif # NF_TABLES
79 80
81config NF_FLOW_TABLE_IPV4
82 tristate "Netfilter flow table IPv4 module"
83 depends on NF_FLOW_TABLE
84 help
85 This option adds the flow table IPv4 support.
86
87 To compile it as a module, choose M here.
88
80config NF_DUP_IPV4 89config NF_DUP_IPV4
81 tristate "Netfilter IPv4 packet duplication to alternate destination" 90 tristate "Netfilter IPv4 packet duplication to alternate destination"
82 depends on !NF_CONNTRACK || NF_CONNTRACK 91 depends on !NF_CONNTRACK || NF_CONNTRACK
@@ -148,6 +157,7 @@ config NF_NAT_SNMP_BASIC
148 depends on NF_CONNTRACK_SNMP 157 depends on NF_CONNTRACK_SNMP
149 depends on NETFILTER_ADVANCED 158 depends on NETFILTER_ADVANCED
150 default NF_NAT && NF_CONNTRACK_SNMP 159 default NF_NAT && NF_CONNTRACK_SNMP
160 select ASN1
151 ---help--- 161 ---help---
152 162
153 This module implements an Application Layer Gateway (ALG) for 163 This module implements an Application Layer Gateway (ALG) for
@@ -333,6 +343,7 @@ config IP_NF_TARGET_CLUSTERIP
333 depends on NF_CONNTRACK_IPV4 343 depends on NF_CONNTRACK_IPV4
334 depends on NETFILTER_ADVANCED 344 depends on NETFILTER_ADVANCED
335 select NF_CONNTRACK_MARK 345 select NF_CONNTRACK_MARK
346 select NETFILTER_FAMILY_ARP
336 help 347 help
337 The CLUSTERIP target allows you to build load-balancing clusters of 348 The CLUSTERIP target allows you to build load-balancing clusters of
338 network servers without having a dedicated load-balancing 349 network servers without having a dedicated load-balancing
@@ -392,6 +403,7 @@ endif # IP_NF_IPTABLES
392config IP_NF_ARPTABLES 403config IP_NF_ARPTABLES
393 tristate "ARP tables support" 404 tristate "ARP tables support"
394 select NETFILTER_XTABLES 405 select NETFILTER_XTABLES
406 select NETFILTER_FAMILY_ARP
395 depends on NETFILTER_ADVANCED 407 depends on NETFILTER_ADVANCED
396 help 408 help
397 arptables is a general, extensible packet identification framework. 409 arptables is a general, extensible packet identification framework.
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index adcdae358365..2dad20eefd26 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -27,9 +27,15 @@ obj-$(CONFIG_NF_REJECT_IPV4) += nf_reject_ipv4.o
27# NAT helpers (nf_conntrack) 27# NAT helpers (nf_conntrack)
28obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o 28obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o
29obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o 29obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o
30
31nf_nat_snmp_basic-y := nf_nat_snmp_basic-asn1.o nf_nat_snmp_basic_main.o
32nf_nat_snmp_basic-y : nf_nat_snmp_basic-asn1.h nf_nat_snmp_basic-asn1.c
30obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o 33obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
34clean-files := nf_nat_snmp_basic-asn1.c nf_nat_snmp_basic-asn1.h
35
31obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o 36obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
32 37
38
33# NAT protocols (nf_nat) 39# NAT protocols (nf_nat)
34obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o 40obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
35 41
@@ -43,6 +49,9 @@ obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
43obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o 49obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
44obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o 50obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
45 51
52# flow table support
53obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o
54
46# generic IP tables 55# generic IP tables
47obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o 56obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
48 57
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 0c3c944a7b72..4ffe302f9b82 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -202,13 +202,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
202 202
203 local_bh_disable(); 203 local_bh_disable();
204 addend = xt_write_recseq_begin(); 204 addend = xt_write_recseq_begin();
205 private = table->private; 205 private = READ_ONCE(table->private); /* Address dependency. */
206 cpu = smp_processor_id(); 206 cpu = smp_processor_id();
207 /*
208 * Ensure we load private-> members after we've fetched the base
209 * pointer.
210 */
211 smp_read_barrier_depends();
212 table_base = private->entries; 207 table_base = private->entries;
213 jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; 208 jumpstack = (struct arpt_entry **)private->jumpstack[cpu];
214 209
@@ -810,9 +805,8 @@ static int get_info(struct net *net, void __user *user,
810 if (compat) 805 if (compat)
811 xt_compat_lock(NFPROTO_ARP); 806 xt_compat_lock(NFPROTO_ARP);
812#endif 807#endif
813 t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), 808 t = xt_request_find_table_lock(net, NFPROTO_ARP, name);
814 "arptable_%s", name); 809 if (!IS_ERR(t)) {
815 if (t) {
816 struct arpt_getinfo info; 810 struct arpt_getinfo info;
817 const struct xt_table_info *private = t->private; 811 const struct xt_table_info *private = t->private;
818#ifdef CONFIG_COMPAT 812#ifdef CONFIG_COMPAT
@@ -841,7 +835,7 @@ static int get_info(struct net *net, void __user *user,
841 xt_table_unlock(t); 835 xt_table_unlock(t);
842 module_put(t->me); 836 module_put(t->me);
843 } else 837 } else
844 ret = -ENOENT; 838 ret = PTR_ERR(t);
845#ifdef CONFIG_COMPAT 839#ifdef CONFIG_COMPAT
846 if (compat) 840 if (compat)
847 xt_compat_unlock(NFPROTO_ARP); 841 xt_compat_unlock(NFPROTO_ARP);
@@ -866,7 +860,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
866 get.name[sizeof(get.name) - 1] = '\0'; 860 get.name[sizeof(get.name) - 1] = '\0';
867 861
868 t = xt_find_table_lock(net, NFPROTO_ARP, get.name); 862 t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
869 if (t) { 863 if (!IS_ERR(t)) {
870 const struct xt_table_info *private = t->private; 864 const struct xt_table_info *private = t->private;
871 865
872 if (get.size == private->size) 866 if (get.size == private->size)
@@ -878,7 +872,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
878 module_put(t->me); 872 module_put(t->me);
879 xt_table_unlock(t); 873 xt_table_unlock(t);
880 } else 874 } else
881 ret = -ENOENT; 875 ret = PTR_ERR(t);
882 876
883 return ret; 877 return ret;
884} 878}
@@ -903,10 +897,9 @@ static int __do_replace(struct net *net, const char *name,
903 goto out; 897 goto out;
904 } 898 }
905 899
906 t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), 900 t = xt_request_find_table_lock(net, NFPROTO_ARP, name);
907 "arptable_%s", name); 901 if (IS_ERR(t)) {
908 if (!t) { 902 ret = PTR_ERR(t);
909 ret = -ENOENT;
910 goto free_newinfo_counters_untrans; 903 goto free_newinfo_counters_untrans;
911 } 904 }
912 905
@@ -1020,8 +1013,8 @@ static int do_add_counters(struct net *net, const void __user *user,
1020 return PTR_ERR(paddc); 1013 return PTR_ERR(paddc);
1021 1014
1022 t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name); 1015 t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name);
1023 if (!t) { 1016 if (IS_ERR(t)) {
1024 ret = -ENOENT; 1017 ret = PTR_ERR(t);
1025 goto free; 1018 goto free;
1026 } 1019 }
1027 1020
@@ -1408,7 +1401,7 @@ static int compat_get_entries(struct net *net,
1408 1401
1409 xt_compat_lock(NFPROTO_ARP); 1402 xt_compat_lock(NFPROTO_ARP);
1410 t = xt_find_table_lock(net, NFPROTO_ARP, get.name); 1403 t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
1411 if (t) { 1404 if (!IS_ERR(t)) {
1412 const struct xt_table_info *private = t->private; 1405 const struct xt_table_info *private = t->private;
1413 struct xt_table_info info; 1406 struct xt_table_info info;
1414 1407
@@ -1423,7 +1416,7 @@ static int compat_get_entries(struct net *net,
1423 module_put(t->me); 1416 module_put(t->me);
1424 xt_table_unlock(t); 1417 xt_table_unlock(t);
1425 } else 1418 } else
1426 ret = -ENOENT; 1419 ret = PTR_ERR(t);
1427 1420
1428 xt_compat_unlock(NFPROTO_ARP); 1421 xt_compat_unlock(NFPROTO_ARP);
1429 return ret; 1422 return ret;
@@ -1658,7 +1651,6 @@ static int __init arp_tables_init(void)
1658 if (ret < 0) 1651 if (ret < 0)
1659 goto err4; 1652 goto err4;
1660 1653
1661 pr_info("arp_tables: (C) 2002 David S. Miller\n");
1662 return 0; 1654 return 0;
1663 1655
1664err4: 1656err4:
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2e0d339028bb..9a71f3149507 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -260,13 +260,8 @@ ipt_do_table(struct sk_buff *skb,
260 WARN_ON(!(table->valid_hooks & (1 << hook))); 260 WARN_ON(!(table->valid_hooks & (1 << hook)));
261 local_bh_disable(); 261 local_bh_disable();
262 addend = xt_write_recseq_begin(); 262 addend = xt_write_recseq_begin();
263 private = table->private; 263 private = READ_ONCE(table->private); /* Address dependency. */
264 cpu = smp_processor_id(); 264 cpu = smp_processor_id();
265 /*
266 * Ensure we load private-> members after we've fetched the base
267 * pointer.
268 */
269 smp_read_barrier_depends();
270 table_base = private->entries; 265 table_base = private->entries;
271 jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; 266 jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
272 267
@@ -973,9 +968,8 @@ static int get_info(struct net *net, void __user *user,
973 if (compat) 968 if (compat)
974 xt_compat_lock(AF_INET); 969 xt_compat_lock(AF_INET);
975#endif 970#endif
976 t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), 971 t = xt_request_find_table_lock(net, AF_INET, name);
977 "iptable_%s", name); 972 if (!IS_ERR(t)) {
978 if (t) {
979 struct ipt_getinfo info; 973 struct ipt_getinfo info;
980 const struct xt_table_info *private = t->private; 974 const struct xt_table_info *private = t->private;
981#ifdef CONFIG_COMPAT 975#ifdef CONFIG_COMPAT
@@ -1005,7 +999,7 @@ static int get_info(struct net *net, void __user *user,
1005 xt_table_unlock(t); 999 xt_table_unlock(t);
1006 module_put(t->me); 1000 module_put(t->me);
1007 } else 1001 } else
1008 ret = -ENOENT; 1002 ret = PTR_ERR(t);
1009#ifdef CONFIG_COMPAT 1003#ifdef CONFIG_COMPAT
1010 if (compat) 1004 if (compat)
1011 xt_compat_unlock(AF_INET); 1005 xt_compat_unlock(AF_INET);
@@ -1030,7 +1024,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
1030 get.name[sizeof(get.name) - 1] = '\0'; 1024 get.name[sizeof(get.name) - 1] = '\0';
1031 1025
1032 t = xt_find_table_lock(net, AF_INET, get.name); 1026 t = xt_find_table_lock(net, AF_INET, get.name);
1033 if (t) { 1027 if (!IS_ERR(t)) {
1034 const struct xt_table_info *private = t->private; 1028 const struct xt_table_info *private = t->private;
1035 if (get.size == private->size) 1029 if (get.size == private->size)
1036 ret = copy_entries_to_user(private->size, 1030 ret = copy_entries_to_user(private->size,
@@ -1041,7 +1035,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
1041 module_put(t->me); 1035 module_put(t->me);
1042 xt_table_unlock(t); 1036 xt_table_unlock(t);
1043 } else 1037 } else
1044 ret = -ENOENT; 1038 ret = PTR_ERR(t);
1045 1039
1046 return ret; 1040 return ret;
1047} 1041}
@@ -1064,10 +1058,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1064 goto out; 1058 goto out;
1065 } 1059 }
1066 1060
1067 t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), 1061 t = xt_request_find_table_lock(net, AF_INET, name);
1068 "iptable_%s", name); 1062 if (IS_ERR(t)) {
1069 if (!t) { 1063 ret = PTR_ERR(t);
1070 ret = -ENOENT;
1071 goto free_newinfo_counters_untrans; 1064 goto free_newinfo_counters_untrans;
1072 } 1065 }
1073 1066
@@ -1181,8 +1174,8 @@ do_add_counters(struct net *net, const void __user *user,
1181 return PTR_ERR(paddc); 1174 return PTR_ERR(paddc);
1182 1175
1183 t = xt_find_table_lock(net, AF_INET, tmp.name); 1176 t = xt_find_table_lock(net, AF_INET, tmp.name);
1184 if (!t) { 1177 if (IS_ERR(t)) {
1185 ret = -ENOENT; 1178 ret = PTR_ERR(t);
1186 goto free; 1179 goto free;
1187 } 1180 }
1188 1181
@@ -1625,7 +1618,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1625 1618
1626 xt_compat_lock(AF_INET); 1619 xt_compat_lock(AF_INET);
1627 t = xt_find_table_lock(net, AF_INET, get.name); 1620 t = xt_find_table_lock(net, AF_INET, get.name);
1628 if (t) { 1621 if (!IS_ERR(t)) {
1629 const struct xt_table_info *private = t->private; 1622 const struct xt_table_info *private = t->private;
1630 struct xt_table_info info; 1623 struct xt_table_info info;
1631 ret = compat_table_info(private, &info); 1624 ret = compat_table_info(private, &info);
@@ -1639,7 +1632,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1639 module_put(t->me); 1632 module_put(t->me);
1640 xt_table_unlock(t); 1633 xt_table_unlock(t);
1641 } else 1634 } else
1642 ret = -ENOENT; 1635 ret = PTR_ERR(t);
1643 1636
1644 xt_compat_unlock(AF_INET); 1637 xt_compat_unlock(AF_INET);
1645 return ret; 1638 return ret;
@@ -1941,7 +1934,6 @@ static int __init ip_tables_init(void)
1941 if (ret < 0) 1934 if (ret < 0)
1942 goto err5; 1935 goto err5;
1943 1936
1944 pr_info("(C) 2000-2006 Netfilter Core Team\n");
1945 return 0; 1937 return 0;
1946 1938
1947err5: 1939err5:
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 69060e3abe85..3a84a60f6b39 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -431,7 +431,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
431 struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; 431 struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
432 const struct ipt_entry *e = par->entryinfo; 432 const struct ipt_entry *e = par->entryinfo;
433 struct clusterip_config *config; 433 struct clusterip_config *config;
434 int ret; 434 int ret, i;
435 435
436 if (par->nft_compat) { 436 if (par->nft_compat) {
437 pr_err("cannot use CLUSTERIP target from nftables compat\n"); 437 pr_err("cannot use CLUSTERIP target from nftables compat\n");
@@ -450,8 +450,18 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
450 pr_info("Please specify destination IP\n"); 450 pr_info("Please specify destination IP\n");
451 return -EINVAL; 451 return -EINVAL;
452 } 452 }
453 453 if (cipinfo->num_local_nodes > ARRAY_SIZE(cipinfo->local_nodes)) {
454 /* FIXME: further sanity checks */ 454 pr_info("bad num_local_nodes %u\n", cipinfo->num_local_nodes);
455 return -EINVAL;
456 }
457 for (i = 0; i < cipinfo->num_local_nodes; i++) {
458 if (cipinfo->local_nodes[i] - 1 >=
459 sizeof(config->local_nodes) * 8) {
460 pr_info("bad local_nodes[%d] %u\n",
461 i, cipinfo->local_nodes[i]);
462 return -EINVAL;
463 }
464 }
455 465
456 config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1); 466 config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1);
457 if (!config) { 467 if (!config) {
@@ -776,7 +786,6 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
776} 786}
777 787
778static const struct file_operations clusterip_proc_fops = { 788static const struct file_operations clusterip_proc_fops = {
779 .owner = THIS_MODULE,
780 .open = clusterip_proc_open, 789 .open = clusterip_proc_open,
781 .read = seq_read, 790 .read = seq_read,
782 .write = clusterip_proc_write, 791 .write = clusterip_proc_write,
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 7667f223d7f8..9ac92ea7b93c 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -38,12 +38,6 @@ static unsigned int
38iptable_filter_hook(void *priv, struct sk_buff *skb, 38iptable_filter_hook(void *priv, struct sk_buff *skb,
39 const struct nf_hook_state *state) 39 const struct nf_hook_state *state)
40{ 40{
41 if (state->hook == NF_INET_LOCAL_OUT &&
42 (skb->len < sizeof(struct iphdr) ||
43 ip_hdrlen(skb) < sizeof(struct iphdr)))
44 /* root is playing with raw sockets. */
45 return NF_ACCEPT;
46
47 return ipt_do_table(skb, state, state->net->ipv4.iptable_filter); 41 return ipt_do_table(skb, state, state->net->ipv4.iptable_filter);
48} 42}
49 43
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index aebdb337fd7e..dea138ca8925 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -49,11 +49,6 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
49 u_int32_t mark; 49 u_int32_t mark;
50 int err; 50 int err;
51 51
52 /* root is playing with raw sockets. */
53 if (skb->len < sizeof(struct iphdr) ||
54 ip_hdrlen(skb) < sizeof(struct iphdr))
55 return NF_ACCEPT;
56
57 /* Save things which could affect route */ 52 /* Save things which could affect route */
58 mark = skb->mark; 53 mark = skb->mark;
59 iph = ip_hdr(skb); 54 iph = ip_hdr(skb);
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index a1a07b338ccf..0f7255cc65ee 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -72,6 +72,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
72 { 72 {
73 .hook = iptable_nat_ipv4_in, 73 .hook = iptable_nat_ipv4_in,
74 .pf = NFPROTO_IPV4, 74 .pf = NFPROTO_IPV4,
75 .nat_hook = true,
75 .hooknum = NF_INET_PRE_ROUTING, 76 .hooknum = NF_INET_PRE_ROUTING,
76 .priority = NF_IP_PRI_NAT_DST, 77 .priority = NF_IP_PRI_NAT_DST,
77 }, 78 },
@@ -79,6 +80,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
79 { 80 {
80 .hook = iptable_nat_ipv4_out, 81 .hook = iptable_nat_ipv4_out,
81 .pf = NFPROTO_IPV4, 82 .pf = NFPROTO_IPV4,
83 .nat_hook = true,
82 .hooknum = NF_INET_POST_ROUTING, 84 .hooknum = NF_INET_POST_ROUTING,
83 .priority = NF_IP_PRI_NAT_SRC, 85 .priority = NF_IP_PRI_NAT_SRC,
84 }, 86 },
@@ -86,6 +88,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
86 { 88 {
87 .hook = iptable_nat_ipv4_local_fn, 89 .hook = iptable_nat_ipv4_local_fn,
88 .pf = NFPROTO_IPV4, 90 .pf = NFPROTO_IPV4,
91 .nat_hook = true,
89 .hooknum = NF_INET_LOCAL_OUT, 92 .hooknum = NF_INET_LOCAL_OUT,
90 .priority = NF_IP_PRI_NAT_DST, 93 .priority = NF_IP_PRI_NAT_DST,
91 }, 94 },
@@ -93,6 +96,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
93 { 96 {
94 .hook = iptable_nat_ipv4_fn, 97 .hook = iptable_nat_ipv4_fn,
95 .pf = NFPROTO_IPV4, 98 .pf = NFPROTO_IPV4,
99 .nat_hook = true,
96 .hooknum = NF_INET_LOCAL_IN, 100 .hooknum = NF_INET_LOCAL_IN,
97 .priority = NF_IP_PRI_NAT_SRC, 101 .priority = NF_IP_PRI_NAT_SRC,
98 }, 102 },
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 2642ecd2645c..960625aabf04 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -3,6 +3,7 @@
3 * 3 *
4 * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 4 * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
5 */ 5 */
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
6#include <linux/module.h> 7#include <linux/module.h>
7#include <linux/netfilter_ipv4/ip_tables.h> 8#include <linux/netfilter_ipv4/ip_tables.h>
8#include <linux/slab.h> 9#include <linux/slab.h>
@@ -12,6 +13,10 @@
12 13
13static int __net_init iptable_raw_table_init(struct net *net); 14static int __net_init iptable_raw_table_init(struct net *net);
14 15
16static bool raw_before_defrag __read_mostly;
17MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag");
18module_param(raw_before_defrag, bool, 0000);
19
15static const struct xt_table packet_raw = { 20static const struct xt_table packet_raw = {
16 .name = "raw", 21 .name = "raw",
17 .valid_hooks = RAW_VALID_HOOKS, 22 .valid_hooks = RAW_VALID_HOOKS,
@@ -21,17 +26,20 @@ static const struct xt_table packet_raw = {
21 .table_init = iptable_raw_table_init, 26 .table_init = iptable_raw_table_init,
22}; 27};
23 28
29static const struct xt_table packet_raw_before_defrag = {
30 .name = "raw",
31 .valid_hooks = RAW_VALID_HOOKS,
32 .me = THIS_MODULE,
33 .af = NFPROTO_IPV4,
34 .priority = NF_IP_PRI_RAW_BEFORE_DEFRAG,
35 .table_init = iptable_raw_table_init,
36};
37
24/* The work comes in here from netfilter.c. */ 38/* The work comes in here from netfilter.c. */
25static unsigned int 39static unsigned int
26iptable_raw_hook(void *priv, struct sk_buff *skb, 40iptable_raw_hook(void *priv, struct sk_buff *skb,
27 const struct nf_hook_state *state) 41 const struct nf_hook_state *state)
28{ 42{
29 if (state->hook == NF_INET_LOCAL_OUT &&
30 (skb->len < sizeof(struct iphdr) ||
31 ip_hdrlen(skb) < sizeof(struct iphdr)))
32 /* root is playing with raw sockets. */
33 return NF_ACCEPT;
34
35 return ipt_do_table(skb, state, state->net->ipv4.iptable_raw); 43 return ipt_do_table(skb, state, state->net->ipv4.iptable_raw);
36} 44}
37 45
@@ -40,15 +48,19 @@ static struct nf_hook_ops *rawtable_ops __read_mostly;
40static int __net_init iptable_raw_table_init(struct net *net) 48static int __net_init iptable_raw_table_init(struct net *net)
41{ 49{
42 struct ipt_replace *repl; 50 struct ipt_replace *repl;
51 const struct xt_table *table = &packet_raw;
43 int ret; 52 int ret;
44 53
54 if (raw_before_defrag)
55 table = &packet_raw_before_defrag;
56
45 if (net->ipv4.iptable_raw) 57 if (net->ipv4.iptable_raw)
46 return 0; 58 return 0;
47 59
48 repl = ipt_alloc_initial_table(&packet_raw); 60 repl = ipt_alloc_initial_table(table);
49 if (repl == NULL) 61 if (repl == NULL)
50 return -ENOMEM; 62 return -ENOMEM;
51 ret = ipt_register_table(net, &packet_raw, repl, rawtable_ops, 63 ret = ipt_register_table(net, table, repl, rawtable_ops,
52 &net->ipv4.iptable_raw); 64 &net->ipv4.iptable_raw);
53 kfree(repl); 65 kfree(repl);
54 return ret; 66 return ret;
@@ -69,8 +81,15 @@ static struct pernet_operations iptable_raw_net_ops = {
69static int __init iptable_raw_init(void) 81static int __init iptable_raw_init(void)
70{ 82{
71 int ret; 83 int ret;
84 const struct xt_table *table = &packet_raw;
85
86 if (raw_before_defrag) {
87 table = &packet_raw_before_defrag;
88
89 pr_info("Enabling raw table before defrag\n");
90 }
72 91
73 rawtable_ops = xt_hook_ops_alloc(&packet_raw, iptable_raw_hook); 92 rawtable_ops = xt_hook_ops_alloc(table, iptable_raw_hook);
74 if (IS_ERR(rawtable_ops)) 93 if (IS_ERR(rawtable_ops))
75 return PTR_ERR(rawtable_ops); 94 return PTR_ERR(rawtable_ops);
76 95
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index ff226596e4b5..e5379fe57b64 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -43,12 +43,6 @@ static unsigned int
43iptable_security_hook(void *priv, struct sk_buff *skb, 43iptable_security_hook(void *priv, struct sk_buff *skb,
44 const struct nf_hook_state *state) 44 const struct nf_hook_state *state)
45{ 45{
46 if (state->hook == NF_INET_LOCAL_OUT &&
47 (skb->len < sizeof(struct iphdr) ||
48 ip_hdrlen(skb) < sizeof(struct iphdr)))
49 /* Somebody is playing with raw sockets. */
50 return NF_ACCEPT;
51
52 return ipt_do_table(skb, state, state->net->ipv4.iptable_security); 46 return ipt_do_table(skb, state, state->net->ipv4.iptable_security);
53} 47}
54 48
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 89af9d88ca21..b50721d9d30e 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -154,11 +154,6 @@ static unsigned int ipv4_conntrack_local(void *priv,
154 struct sk_buff *skb, 154 struct sk_buff *skb,
155 const struct nf_hook_state *state) 155 const struct nf_hook_state *state)
156{ 156{
157 /* root is playing with raw sockets. */
158 if (skb->len < sizeof(struct iphdr) ||
159 ip_hdrlen(skb) < sizeof(struct iphdr))
160 return NF_ACCEPT;
161
162 if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */ 157 if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */
163 return NF_ACCEPT; 158 return NF_ACCEPT;
164 159
@@ -218,15 +213,19 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
218 struct nf_conntrack_tuple tuple; 213 struct nf_conntrack_tuple tuple;
219 214
220 memset(&tuple, 0, sizeof(tuple)); 215 memset(&tuple, 0, sizeof(tuple));
216
217 lock_sock(sk);
221 tuple.src.u3.ip = inet->inet_rcv_saddr; 218 tuple.src.u3.ip = inet->inet_rcv_saddr;
222 tuple.src.u.tcp.port = inet->inet_sport; 219 tuple.src.u.tcp.port = inet->inet_sport;
223 tuple.dst.u3.ip = inet->inet_daddr; 220 tuple.dst.u3.ip = inet->inet_daddr;
224 tuple.dst.u.tcp.port = inet->inet_dport; 221 tuple.dst.u.tcp.port = inet->inet_dport;
225 tuple.src.l3num = PF_INET; 222 tuple.src.l3num = PF_INET;
226 tuple.dst.protonum = sk->sk_protocol; 223 tuple.dst.protonum = sk->sk_protocol;
224 release_sock(sk);
227 225
228 /* We only do TCP and SCTP at the moment: is there a better way? */ 226 /* We only do TCP and SCTP at the moment: is there a better way? */
229 if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) { 227 if (tuple.dst.protonum != IPPROTO_TCP &&
228 tuple.dst.protonum != IPPROTO_SCTP) {
230 pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n"); 229 pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
231 return -ENOPROTOOPT; 230 return -ENOPROTOOPT;
232 } 231 }
@@ -368,7 +367,7 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
368MODULE_ALIAS("ip_conntrack"); 367MODULE_ALIAS("ip_conntrack");
369MODULE_LICENSE("GPL"); 368MODULE_LICENSE("GPL");
370 369
371static struct nf_conntrack_l4proto *builtin_l4proto4[] = { 370static const struct nf_conntrack_l4proto * const builtin_l4proto4[] = {
372 &nf_conntrack_l4proto_tcp4, 371 &nf_conntrack_l4proto_tcp4,
373 &nf_conntrack_l4proto_udp4, 372 &nf_conntrack_l4proto_udp4,
374 &nf_conntrack_l4proto_icmp, 373 &nf_conntrack_l4proto_icmp,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 1849fedd9b81..5c15beafa711 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -22,7 +22,7 @@
22#include <net/netfilter/nf_conntrack_zones.h> 22#include <net/netfilter/nf_conntrack_zones.h>
23#include <net/netfilter/nf_log.h> 23#include <net/netfilter/nf_log.h>
24 24
25static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; 25static const unsigned int nf_ct_icmp_timeout = 30*HZ;
26 26
27static inline struct nf_icmp_net *icmp_pernet(struct net *net) 27static inline struct nf_icmp_net *icmp_pernet(struct net *net)
28{ 28{
@@ -351,7 +351,7 @@ static struct nf_proto_net *icmp_get_net_proto(struct net *net)
351 return &net->ct.nf_ct_proto.icmp.pn; 351 return &net->ct.nf_ct_proto.icmp.pn;
352} 352}
353 353
354struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = 354const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
355{ 355{
356 .l3proto = PF_INET, 356 .l3proto = PF_INET,
357 .l4proto = IPPROTO_ICMP, 357 .l4proto = IPPROTO_ICMP,
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 37fe1616ca0b..a0d3ad60a411 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -78,6 +78,8 @@ static unsigned int ipv4_conntrack_defrag(void *priv,
78 if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb))) 78 if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb)))
79 return NF_ACCEPT; 79 return NF_ACCEPT;
80#endif 80#endif
81 if (skb->_nfct == IP_CT_UNTRACKED)
82 return NF_ACCEPT;
81#endif 83#endif
82 /* Gather fragments. */ 84 /* Gather fragments. */
83 if (ip_is_fragment(ip_hdr(skb))) { 85 if (ip_is_fragment(ip_hdr(skb))) {
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
new file mode 100644
index 000000000000..25d2975da156
--- /dev/null
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -0,0 +1,285 @@
1#include <linux/kernel.h>
2#include <linux/init.h>
3#include <linux/module.h>
4#include <linux/netfilter.h>
5#include <linux/rhashtable.h>
6#include <linux/ip.h>
7#include <linux/netdevice.h>
8#include <net/ip.h>
9#include <net/neighbour.h>
10#include <net/netfilter/nf_flow_table.h>
11#include <net/netfilter/nf_tables.h>
12/* For layer 4 checksum field offset. */
13#include <linux/tcp.h>
14#include <linux/udp.h>
15
16static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
17 __be32 addr, __be32 new_addr)
18{
19 struct tcphdr *tcph;
20
21 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
22 skb_try_make_writable(skb, thoff + sizeof(*tcph)))
23 return -1;
24
25 tcph = (void *)(skb_network_header(skb) + thoff);
26 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
27
28 return 0;
29}
30
31static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
32 __be32 addr, __be32 new_addr)
33{
34 struct udphdr *udph;
35
36 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
37 skb_try_make_writable(skb, thoff + sizeof(*udph)))
38 return -1;
39
40 udph = (void *)(skb_network_header(skb) + thoff);
41 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
42 inet_proto_csum_replace4(&udph->check, skb, addr,
43 new_addr, true);
44 if (!udph->check)
45 udph->check = CSUM_MANGLED_0;
46 }
47
48 return 0;
49}
50
51static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
52 unsigned int thoff, __be32 addr,
53 __be32 new_addr)
54{
55 switch (iph->protocol) {
56 case IPPROTO_TCP:
57 if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
58 return NF_DROP;
59 break;
60 case IPPROTO_UDP:
61 if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
62 return NF_DROP;
63 break;
64 }
65
66 return 0;
67}
68
69static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
70 struct iphdr *iph, unsigned int thoff,
71 enum flow_offload_tuple_dir dir)
72{
73 __be32 addr, new_addr;
74
75 switch (dir) {
76 case FLOW_OFFLOAD_DIR_ORIGINAL:
77 addr = iph->saddr;
78 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
79 iph->saddr = new_addr;
80 break;
81 case FLOW_OFFLOAD_DIR_REPLY:
82 addr = iph->daddr;
83 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
84 iph->daddr = new_addr;
85 break;
86 default:
87 return -1;
88 }
89 csum_replace4(&iph->check, addr, new_addr);
90
91 return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
92}
93
94static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
95 struct iphdr *iph, unsigned int thoff,
96 enum flow_offload_tuple_dir dir)
97{
98 __be32 addr, new_addr;
99
100 switch (dir) {
101 case FLOW_OFFLOAD_DIR_ORIGINAL:
102 addr = iph->daddr;
103 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
104 iph->daddr = new_addr;
105 break;
106 case FLOW_OFFLOAD_DIR_REPLY:
107 addr = iph->saddr;
108 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
109 iph->saddr = new_addr;
110 break;
111 default:
112 return -1;
113 }
114
115 return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
116}
117
118static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
119 enum flow_offload_tuple_dir dir)
120{
121 struct iphdr *iph = ip_hdr(skb);
122 unsigned int thoff = iph->ihl * 4;
123
124 if (flow->flags & FLOW_OFFLOAD_SNAT &&
125 (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
126 nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
127 return -1;
128 if (flow->flags & FLOW_OFFLOAD_DNAT &&
129 (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
130 nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
131 return -1;
132
133 return 0;
134}
135
136static bool ip_has_options(unsigned int thoff)
137{
138 return thoff != sizeof(struct iphdr);
139}
140
141static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
142 struct flow_offload_tuple *tuple)
143{
144 struct flow_ports *ports;
145 unsigned int thoff;
146 struct iphdr *iph;
147
148 if (!pskb_may_pull(skb, sizeof(*iph)))
149 return -1;
150
151 iph = ip_hdr(skb);
152 thoff = iph->ihl * 4;
153
154 if (ip_is_fragment(iph) ||
155 unlikely(ip_has_options(thoff)))
156 return -1;
157
158 if (iph->protocol != IPPROTO_TCP &&
159 iph->protocol != IPPROTO_UDP)
160 return -1;
161
162 thoff = iph->ihl * 4;
163 if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
164 return -1;
165
166 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
167
168 tuple->src_v4.s_addr = iph->saddr;
169 tuple->dst_v4.s_addr = iph->daddr;
170 tuple->src_port = ports->source;
171 tuple->dst_port = ports->dest;
172 tuple->l3proto = AF_INET;
173 tuple->l4proto = iph->protocol;
174 tuple->iifidx = dev->ifindex;
175
176 return 0;
177}
178
179/* Based on ip_exceeds_mtu(). */
180static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
181{
182 if (skb->len <= mtu)
183 return false;
184
185 if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
186 return false;
187
188 if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
189 return false;
190
191 return true;
192}
193
194static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt)
195{
196 u32 mtu;
197
198 mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
199 if (__nf_flow_exceeds_mtu(skb, mtu))
200 return true;
201
202 return false;
203}
204
205unsigned int
206nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
207 const struct nf_hook_state *state)
208{
209 struct flow_offload_tuple_rhash *tuplehash;
210 struct nf_flowtable *flow_table = priv;
211 struct flow_offload_tuple tuple = {};
212 enum flow_offload_tuple_dir dir;
213 struct flow_offload *flow;
214 struct net_device *outdev;
215 const struct rtable *rt;
216 struct iphdr *iph;
217 __be32 nexthop;
218
219 if (skb->protocol != htons(ETH_P_IP))
220 return NF_ACCEPT;
221
222 if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
223 return NF_ACCEPT;
224
225 tuplehash = flow_offload_lookup(flow_table, &tuple);
226 if (tuplehash == NULL)
227 return NF_ACCEPT;
228
229 outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
230 if (!outdev)
231 return NF_ACCEPT;
232
233 dir = tuplehash->tuple.dir;
234 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
235
236 rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
237 if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
238 return NF_ACCEPT;
239
240 if (skb_try_make_writable(skb, sizeof(*iph)))
241 return NF_DROP;
242
243 if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
244 nf_flow_nat_ip(flow, skb, dir) < 0)
245 return NF_DROP;
246
247 flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
248 iph = ip_hdr(skb);
249 ip_decrease_ttl(iph);
250
251 skb->dev = outdev;
252 nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
253 neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
254
255 return NF_STOLEN;
256}
257EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
258
259static struct nf_flowtable_type flowtable_ipv4 = {
260 .family = NFPROTO_IPV4,
261 .params = &nf_flow_offload_rhash_params,
262 .gc = nf_flow_offload_work_gc,
263 .free = nf_flow_table_free,
264 .hook = nf_flow_offload_ip_hook,
265 .owner = THIS_MODULE,
266};
267
268static int __init nf_flow_ipv4_module_init(void)
269{
270 nft_register_flowtable_type(&flowtable_ipv4);
271
272 return 0;
273}
274
275static void __exit nf_flow_ipv4_module_exit(void)
276{
277 nft_unregister_flowtable_type(&flowtable_ipv4);
278}
279
280module_init(nf_flow_ipv4_module_init);
281module_exit(nf_flow_ipv4_module_exit);
282
283MODULE_LICENSE("GPL");
284MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
285MODULE_ALIAS_NF_FLOWTABLE(AF_INET);
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 0443ca4120b0..f7ff6a364d7b 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -356,11 +356,6 @@ nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
356#endif 356#endif
357 unsigned int ret; 357 unsigned int ret;
358 358
359 /* root is playing with raw sockets. */
360 if (skb->len < sizeof(struct iphdr) ||
361 ip_hdrlen(skb) < sizeof(struct iphdr))
362 return NF_ACCEPT;
363
364 ret = nf_nat_ipv4_fn(priv, skb, state, do_chain); 359 ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
365#ifdef CONFIG_XFRM 360#ifdef CONFIG_XFRM
366 if (ret != NF_DROP && ret != NF_STOLEN && 361 if (ret != NF_DROP && ret != NF_STOLEN &&
@@ -396,11 +391,6 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
396 unsigned int ret; 391 unsigned int ret;
397 int err; 392 int err;
398 393
399 /* root is playing with raw sockets. */
400 if (skb->len < sizeof(struct iphdr) ||
401 ip_hdrlen(skb) < sizeof(struct iphdr))
402 return NF_ACCEPT;
403
404 ret = nf_nat_ipv4_fn(priv, skb, state, do_chain); 394 ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
405 if (ret != NF_DROP && ret != NF_STOLEN && 395 if (ret != NF_DROP && ret != NF_STOLEN &&
406 (ct = nf_ct_get(skb, &ctinfo)) != NULL) { 396 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.asn1 b/net/ipv4/netfilter/nf_nat_snmp_basic.asn1
new file mode 100644
index 000000000000..24b73268f362
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.asn1
@@ -0,0 +1,177 @@
1Message ::=
2 SEQUENCE {
3 version
4 INTEGER ({snmp_version}),
5
6 community
7 OCTET STRING,
8
9 pdu
10 PDUs
11 }
12
13
14ObjectName ::=
15 OBJECT IDENTIFIER
16
17ObjectSyntax ::=
18 CHOICE {
19 simple
20 SimpleSyntax,
21
22 application-wide
23 ApplicationSyntax
24 }
25
26SimpleSyntax ::=
27 CHOICE {
28 integer-value
29 INTEGER,
30
31 string-value
32 OCTET STRING,
33
34 objectID-value
35 OBJECT IDENTIFIER
36 }
37
38ApplicationSyntax ::=
39 CHOICE {
40 ipAddress-value
41 IpAddress,
42
43 counter-value
44 Counter32,
45
46 timeticks-value
47 TimeTicks,
48
49 arbitrary-value
50 Opaque,
51
52 big-counter-value
53 Counter64,
54
55 unsigned-integer-value
56 Unsigned32
57 }
58
59IpAddress ::=
60 [APPLICATION 0]
61 IMPLICIT OCTET STRING OPTIONAL ({snmp_helper})
62
63Counter32 ::=
64 [APPLICATION 1]
65 IMPLICIT INTEGER OPTIONAL
66
67Unsigned32 ::=
68 [APPLICATION 2]
69 IMPLICIT INTEGER OPTIONAL
70
71Gauge32 ::= Unsigned32 OPTIONAL
72
73TimeTicks ::=
74 [APPLICATION 3]
75 IMPLICIT INTEGER OPTIONAL
76
77Opaque ::=
78 [APPLICATION 4]
79 IMPLICIT OCTET STRING OPTIONAL
80
81Counter64 ::=
82 [APPLICATION 6]
83 IMPLICIT INTEGER OPTIONAL
84
85PDUs ::=
86 CHOICE {
87 get-request
88 GetRequest-PDU,
89
90 get-next-request
91 GetNextRequest-PDU,
92
93 get-bulk-request
94 GetBulkRequest-PDU,
95
96 response
97 Response-PDU,
98
99 set-request
100 SetRequest-PDU,
101
102 inform-request
103 InformRequest-PDU,
104
105 snmpV2-trap
106 SNMPv2-Trap-PDU,
107
108 report
109 Report-PDU
110 }
111
112GetRequest-PDU ::=
113 [0] IMPLICIT PDU OPTIONAL
114
115GetNextRequest-PDU ::=
116 [1] IMPLICIT PDU OPTIONAL
117
118Response-PDU ::=
119 [2] IMPLICIT PDU OPTIONAL
120
121SetRequest-PDU ::=
122 [3] IMPLICIT PDU OPTIONAL
123
124-- [4] is obsolete
125
126GetBulkRequest-PDU ::=
127 [5] IMPLICIT PDU OPTIONAL
128
129InformRequest-PDU ::=
130 [6] IMPLICIT PDU OPTIONAL
131
132SNMPv2-Trap-PDU ::=
133 [7] IMPLICIT PDU OPTIONAL
134
135Report-PDU ::=
136 [8] IMPLICIT PDU OPTIONAL
137
138PDU ::=
139 SEQUENCE {
140 request-id
141 INTEGER,
142
143 error-status
144 INTEGER,
145
146 error-index
147 INTEGER,
148
149 variable-bindings
150 VarBindList
151 }
152
153
154VarBind ::=
155 SEQUENCE {
156 name
157 ObjectName,
158
159 CHOICE {
160 value
161 ObjectSyntax,
162
163 unSpecified
164 NULL,
165
166 noSuchObject
167 [0] IMPLICIT NULL,
168
169 noSuchInstance
170 [1] IMPLICIT NULL,
171
172 endOfMibView
173 [2] IMPLICIT NULL
174 }
175}
176
177VarBindList ::= SEQUENCE OF VarBind
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
deleted file mode 100644
index d5b1e0b3f687..000000000000
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ /dev/null
@@ -1,1286 +0,0 @@
1/*
2 * nf_nat_snmp_basic.c
3 *
4 * Basic SNMP Application Layer Gateway
5 *
6 * This IP NAT module is intended for use with SNMP network
7 * discovery and monitoring applications where target networks use
8 * conflicting private address realms.
9 *
10 * Static NAT is used to remap the networks from the view of the network
11 * management system at the IP layer, and this module remaps some application
12 * layer addresses to match.
13 *
14 * The simplest form of ALG is performed, where only tagged IP addresses
15 * are modified. The module does not need to be MIB aware and only scans
16 * messages at the ASN.1/BER level.
17 *
18 * Currently, only SNMPv1 and SNMPv2 are supported.
19 *
20 * More information on ALG and associated issues can be found in
21 * RFC 2962
22 *
23 * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory
24 * McLean & Jochen Friedrich, stripped down for use in the kernel.
25 *
26 * Copyright (c) 2000 RP Internet (www.rpi.net.au).
27 *
28 * This program is free software; you can redistribute it and/or modify
29 * it under the terms of the GNU General Public License as published by
30 * the Free Software Foundation; either version 2 of the License, or
31 * (at your option) any later version.
32 * This program is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details.
36 * You should have received a copy of the GNU General Public License
37 * along with this program; if not, see <http://www.gnu.org/licenses/>.
38 *
39 * Author: James Morris <jmorris@intercode.com.au>
40 *
41 * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
42 */
43#include <linux/module.h>
44#include <linux/moduleparam.h>
45#include <linux/types.h>
46#include <linux/kernel.h>
47#include <linux/slab.h>
48#include <linux/in.h>
49#include <linux/ip.h>
50#include <linux/udp.h>
51#include <net/checksum.h>
52#include <net/udp.h>
53
54#include <net/netfilter/nf_nat.h>
55#include <net/netfilter/nf_conntrack_expect.h>
56#include <net/netfilter/nf_conntrack_helper.h>
57#include <net/netfilter/nf_nat_helper.h>
58#include <linux/netfilter/nf_conntrack_snmp.h>
59
60MODULE_LICENSE("GPL");
61MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
62MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
63MODULE_ALIAS("ip_nat_snmp_basic");
64
65#define SNMP_PORT 161
66#define SNMP_TRAP_PORT 162
67#define NOCT1(n) (*(u8 *)(n))
68
69static int debug;
70static DEFINE_SPINLOCK(snmp_lock);
71
72/*
73 * Application layer address mapping mimics the NAT mapping, but
74 * only for the first octet in this case (a more flexible system
75 * can be implemented if needed).
76 */
77struct oct1_map
78{
79 u_int8_t from;
80 u_int8_t to;
81};
82
83
84/*****************************************************************************
85 *
86 * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse)
87 *
88 *****************************************************************************/
89
90/* Class */
91#define ASN1_UNI 0 /* Universal */
92#define ASN1_APL 1 /* Application */
93#define ASN1_CTX 2 /* Context */
94#define ASN1_PRV 3 /* Private */
95
96/* Tag */
97#define ASN1_EOC 0 /* End Of Contents */
98#define ASN1_BOL 1 /* Boolean */
99#define ASN1_INT 2 /* Integer */
100#define ASN1_BTS 3 /* Bit String */
101#define ASN1_OTS 4 /* Octet String */
102#define ASN1_NUL 5 /* Null */
103#define ASN1_OJI 6 /* Object Identifier */
104#define ASN1_OJD 7 /* Object Description */
105#define ASN1_EXT 8 /* External */
106#define ASN1_SEQ 16 /* Sequence */
107#define ASN1_SET 17 /* Set */
108#define ASN1_NUMSTR 18 /* Numerical String */
109#define ASN1_PRNSTR 19 /* Printable String */
110#define ASN1_TEXSTR 20 /* Teletext String */
111#define ASN1_VIDSTR 21 /* Video String */
112#define ASN1_IA5STR 22 /* IA5 String */
113#define ASN1_UNITIM 23 /* Universal Time */
114#define ASN1_GENTIM 24 /* General Time */
115#define ASN1_GRASTR 25 /* Graphical String */
116#define ASN1_VISSTR 26 /* Visible String */
117#define ASN1_GENSTR 27 /* General String */
118
119/* Primitive / Constructed methods*/
120#define ASN1_PRI 0 /* Primitive */
121#define ASN1_CON 1 /* Constructed */
122
123/*
124 * Error codes.
125 */
126#define ASN1_ERR_NOERROR 0
127#define ASN1_ERR_DEC_EMPTY 2
128#define ASN1_ERR_DEC_EOC_MISMATCH 3
129#define ASN1_ERR_DEC_LENGTH_MISMATCH 4
130#define ASN1_ERR_DEC_BADVALUE 5
131
132/*
133 * ASN.1 context.
134 */
135struct asn1_ctx
136{
137 int error; /* Error condition */
138 unsigned char *pointer; /* Octet just to be decoded */
139 unsigned char *begin; /* First octet */
140 unsigned char *end; /* Octet after last octet */
141};
142
143/*
144 * Octet string (not null terminated)
145 */
146struct asn1_octstr
147{
148 unsigned char *data;
149 unsigned int len;
150};
151
152static void asn1_open(struct asn1_ctx *ctx,
153 unsigned char *buf,
154 unsigned int len)
155{
156 ctx->begin = buf;
157 ctx->end = buf + len;
158 ctx->pointer = buf;
159 ctx->error = ASN1_ERR_NOERROR;
160}
161
162static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
163{
164 if (ctx->pointer >= ctx->end) {
165 ctx->error = ASN1_ERR_DEC_EMPTY;
166 return 0;
167 }
168 *ch = *(ctx->pointer)++;
169 return 1;
170}
171
172static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
173{
174 unsigned char ch;
175
176 *tag = 0;
177
178 do
179 {
180 if (!asn1_octet_decode(ctx, &ch))
181 return 0;
182 *tag <<= 7;
183 *tag |= ch & 0x7F;
184 } while ((ch & 0x80) == 0x80);
185 return 1;
186}
187
188static unsigned char asn1_id_decode(struct asn1_ctx *ctx,
189 unsigned int *cls,
190 unsigned int *con,
191 unsigned int *tag)
192{
193 unsigned char ch;
194
195 if (!asn1_octet_decode(ctx, &ch))
196 return 0;
197
198 *cls = (ch & 0xC0) >> 6;
199 *con = (ch & 0x20) >> 5;
200 *tag = (ch & 0x1F);
201
202 if (*tag == 0x1F) {
203 if (!asn1_tag_decode(ctx, tag))
204 return 0;
205 }
206 return 1;
207}
208
209static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
210 unsigned int *def,
211 unsigned int *len)
212{
213 unsigned char ch, cnt;
214
215 if (!asn1_octet_decode(ctx, &ch))
216 return 0;
217
218 if (ch == 0x80)
219 *def = 0;
220 else {
221 *def = 1;
222
223 if (ch < 0x80)
224 *len = ch;
225 else {
226 cnt = ch & 0x7F;
227 *len = 0;
228
229 while (cnt > 0) {
230 if (!asn1_octet_decode(ctx, &ch))
231 return 0;
232 *len <<= 8;
233 *len |= ch;
234 cnt--;
235 }
236 }
237 }
238
239 /* don't trust len bigger than ctx buffer */
240 if (*len > ctx->end - ctx->pointer)
241 return 0;
242
243 return 1;
244}
245
246static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
247 unsigned char **eoc,
248 unsigned int *cls,
249 unsigned int *con,
250 unsigned int *tag)
251{
252 unsigned int def, len;
253
254 if (!asn1_id_decode(ctx, cls, con, tag))
255 return 0;
256
257 def = len = 0;
258 if (!asn1_length_decode(ctx, &def, &len))
259 return 0;
260
261 /* primitive shall be definite, indefinite shall be constructed */
262 if (*con == ASN1_PRI && !def)
263 return 0;
264
265 if (def)
266 *eoc = ctx->pointer + len;
267 else
268 *eoc = NULL;
269 return 1;
270}
271
272static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc)
273{
274 unsigned char ch;
275
276 if (eoc == NULL) {
277 if (!asn1_octet_decode(ctx, &ch))
278 return 0;
279
280 if (ch != 0x00) {
281 ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
282 return 0;
283 }
284
285 if (!asn1_octet_decode(ctx, &ch))
286 return 0;
287
288 if (ch != 0x00) {
289 ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
290 return 0;
291 }
292 return 1;
293 } else {
294 if (ctx->pointer != eoc) {
295 ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH;
296 return 0;
297 }
298 return 1;
299 }
300}
301
302static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc)
303{
304 ctx->pointer = eoc;
305 return 1;
306}
307
308static unsigned char asn1_long_decode(struct asn1_ctx *ctx,
309 unsigned char *eoc,
310 long *integer)
311{
312 unsigned char ch;
313 unsigned int len;
314
315 if (!asn1_octet_decode(ctx, &ch))
316 return 0;
317
318 *integer = (signed char) ch;
319 len = 1;
320
321 while (ctx->pointer < eoc) {
322 if (++len > sizeof (long)) {
323 ctx->error = ASN1_ERR_DEC_BADVALUE;
324 return 0;
325 }
326
327 if (!asn1_octet_decode(ctx, &ch))
328 return 0;
329
330 *integer <<= 8;
331 *integer |= ch;
332 }
333 return 1;
334}
335
336static unsigned char asn1_uint_decode(struct asn1_ctx *ctx,
337 unsigned char *eoc,
338 unsigned int *integer)
339{
340 unsigned char ch;
341 unsigned int len;
342
343 if (!asn1_octet_decode(ctx, &ch))
344 return 0;
345
346 *integer = ch;
347 if (ch == 0) len = 0;
348 else len = 1;
349
350 while (ctx->pointer < eoc) {
351 if (++len > sizeof (unsigned int)) {
352 ctx->error = ASN1_ERR_DEC_BADVALUE;
353 return 0;
354 }
355
356 if (!asn1_octet_decode(ctx, &ch))
357 return 0;
358
359 *integer <<= 8;
360 *integer |= ch;
361 }
362 return 1;
363}
364
365static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx,
366 unsigned char *eoc,
367 unsigned long *integer)
368{
369 unsigned char ch;
370 unsigned int len;
371
372 if (!asn1_octet_decode(ctx, &ch))
373 return 0;
374
375 *integer = ch;
376 if (ch == 0) len = 0;
377 else len = 1;
378
379 while (ctx->pointer < eoc) {
380 if (++len > sizeof (unsigned long)) {
381 ctx->error = ASN1_ERR_DEC_BADVALUE;
382 return 0;
383 }
384
385 if (!asn1_octet_decode(ctx, &ch))
386 return 0;
387
388 *integer <<= 8;
389 *integer |= ch;
390 }
391 return 1;
392}
393
394static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
395 unsigned char *eoc,
396 unsigned char **octets,
397 unsigned int *len)
398{
399 unsigned char *ptr;
400
401 *len = 0;
402
403 *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
404 if (*octets == NULL)
405 return 0;
406
407 ptr = *octets;
408 while (ctx->pointer < eoc) {
409 if (!asn1_octet_decode(ctx, ptr++)) {
410 kfree(*octets);
411 *octets = NULL;
412 return 0;
413 }
414 (*len)++;
415 }
416 return 1;
417}
418
419static unsigned char asn1_subid_decode(struct asn1_ctx *ctx,
420 unsigned long *subid)
421{
422 unsigned char ch;
423
424 *subid = 0;
425
426 do {
427 if (!asn1_octet_decode(ctx, &ch))
428 return 0;
429
430 *subid <<= 7;
431 *subid |= ch & 0x7F;
432 } while ((ch & 0x80) == 0x80);
433 return 1;
434}
435
436static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
437 unsigned char *eoc,
438 unsigned long **oid,
439 unsigned int *len)
440{
441 unsigned long subid;
442 unsigned long *optr;
443 size_t size;
444
445 size = eoc - ctx->pointer + 1;
446
447 /* first subid actually encodes first two subids */
448 if (size < 2 || size > ULONG_MAX/sizeof(unsigned long))
449 return 0;
450
451 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
452 if (*oid == NULL)
453 return 0;
454
455 optr = *oid;
456
457 if (!asn1_subid_decode(ctx, &subid)) {
458 kfree(*oid);
459 *oid = NULL;
460 return 0;
461 }
462
463 if (subid < 40) {
464 optr[0] = 0;
465 optr[1] = subid;
466 } else if (subid < 80) {
467 optr[0] = 1;
468 optr[1] = subid - 40;
469 } else {
470 optr[0] = 2;
471 optr[1] = subid - 80;
472 }
473
474 *len = 2;
475 optr += 2;
476
477 while (ctx->pointer < eoc) {
478 if (++(*len) > size) {
479 ctx->error = ASN1_ERR_DEC_BADVALUE;
480 kfree(*oid);
481 *oid = NULL;
482 return 0;
483 }
484
485 if (!asn1_subid_decode(ctx, optr++)) {
486 kfree(*oid);
487 *oid = NULL;
488 return 0;
489 }
490 }
491 return 1;
492}
493
494/*****************************************************************************
495 *
496 * SNMP decoding routines (gxsnmp author Dirk Wisse)
497 *
498 *****************************************************************************/
499
500/* SNMP Versions */
501#define SNMP_V1 0
502#define SNMP_V2C 1
503#define SNMP_V2 2
504#define SNMP_V3 3
505
506/* Default Sizes */
507#define SNMP_SIZE_COMM 256
508#define SNMP_SIZE_OBJECTID 128
509#define SNMP_SIZE_BUFCHR 256
510#define SNMP_SIZE_BUFINT 128
511#define SNMP_SIZE_SMALLOBJECTID 16
512
513/* Requests */
514#define SNMP_PDU_GET 0
515#define SNMP_PDU_NEXT 1
516#define SNMP_PDU_RESPONSE 2
517#define SNMP_PDU_SET 3
518#define SNMP_PDU_TRAP1 4
519#define SNMP_PDU_BULK 5
520#define SNMP_PDU_INFORM 6
521#define SNMP_PDU_TRAP2 7
522
523/* Errors */
524#define SNMP_NOERROR 0
525#define SNMP_TOOBIG 1
526#define SNMP_NOSUCHNAME 2
527#define SNMP_BADVALUE 3
528#define SNMP_READONLY 4
529#define SNMP_GENERROR 5
530#define SNMP_NOACCESS 6
531#define SNMP_WRONGTYPE 7
532#define SNMP_WRONGLENGTH 8
533#define SNMP_WRONGENCODING 9
534#define SNMP_WRONGVALUE 10
535#define SNMP_NOCREATION 11
536#define SNMP_INCONSISTENTVALUE 12
537#define SNMP_RESOURCEUNAVAILABLE 13
538#define SNMP_COMMITFAILED 14
539#define SNMP_UNDOFAILED 15
540#define SNMP_AUTHORIZATIONERROR 16
541#define SNMP_NOTWRITABLE 17
542#define SNMP_INCONSISTENTNAME 18
543
544/* General SNMP V1 Traps */
545#define SNMP_TRAP_COLDSTART 0
546#define SNMP_TRAP_WARMSTART 1
547#define SNMP_TRAP_LINKDOWN 2
548#define SNMP_TRAP_LINKUP 3
549#define SNMP_TRAP_AUTFAILURE 4
550#define SNMP_TRAP_EQPNEIGHBORLOSS 5
551#define SNMP_TRAP_ENTSPECIFIC 6
552
553/* SNMPv1 Types */
554#define SNMP_NULL 0
555#define SNMP_INTEGER 1 /* l */
556#define SNMP_OCTETSTR 2 /* c */
557#define SNMP_DISPLAYSTR 2 /* c */
558#define SNMP_OBJECTID 3 /* ul */
559#define SNMP_IPADDR 4 /* uc */
560#define SNMP_COUNTER 5 /* ul */
561#define SNMP_GAUGE 6 /* ul */
562#define SNMP_TIMETICKS 7 /* ul */
563#define SNMP_OPAQUE 8 /* c */
564
565/* Additional SNMPv2 Types */
566#define SNMP_UINTEGER 5 /* ul */
567#define SNMP_BITSTR 9 /* uc */
568#define SNMP_NSAP 10 /* uc */
569#define SNMP_COUNTER64 11 /* ul */
570#define SNMP_NOSUCHOBJECT 12
571#define SNMP_NOSUCHINSTANCE 13
572#define SNMP_ENDOFMIBVIEW 14
573
574union snmp_syntax
575{
576 unsigned char uc[0]; /* 8 bit unsigned */
577 char c[0]; /* 8 bit signed */
578 unsigned long ul[0]; /* 32 bit unsigned */
579 long l[0]; /* 32 bit signed */
580};
581
582struct snmp_object
583{
584 unsigned long *id;
585 unsigned int id_len;
586 unsigned short type;
587 unsigned int syntax_len;
588 union snmp_syntax syntax;
589};
590
591struct snmp_request
592{
593 unsigned long id;
594 unsigned int error_status;
595 unsigned int error_index;
596};
597
598struct snmp_v1_trap
599{
600 unsigned long *id;
601 unsigned int id_len;
602 unsigned long ip_address; /* pointer */
603 unsigned int general;
604 unsigned int specific;
605 unsigned long time;
606};
607
608/* SNMP types */
609#define SNMP_IPA 0
610#define SNMP_CNT 1
611#define SNMP_GGE 2
612#define SNMP_TIT 3
613#define SNMP_OPQ 4
614#define SNMP_C64 6
615
616/* SNMP errors */
617#define SERR_NSO 0
618#define SERR_NSI 1
619#define SERR_EOM 2
620
621static inline void mangle_address(unsigned char *begin,
622 unsigned char *addr,
623 const struct oct1_map *map,
624 __sum16 *check);
625struct snmp_cnv
626{
627 unsigned int class;
628 unsigned int tag;
629 int syntax;
630};
631
632static const struct snmp_cnv snmp_conv[] = {
633 {ASN1_UNI, ASN1_NUL, SNMP_NULL},
634 {ASN1_UNI, ASN1_INT, SNMP_INTEGER},
635 {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR},
636 {ASN1_UNI, ASN1_OTS, SNMP_DISPLAYSTR},
637 {ASN1_UNI, ASN1_OJI, SNMP_OBJECTID},
638 {ASN1_APL, SNMP_IPA, SNMP_IPADDR},
639 {ASN1_APL, SNMP_CNT, SNMP_COUNTER}, /* Counter32 */
640 {ASN1_APL, SNMP_GGE, SNMP_GAUGE}, /* Gauge32 == Unsigned32 */
641 {ASN1_APL, SNMP_TIT, SNMP_TIMETICKS},
642 {ASN1_APL, SNMP_OPQ, SNMP_OPAQUE},
643
644 /* SNMPv2 data types and errors */
645 {ASN1_UNI, ASN1_BTS, SNMP_BITSTR},
646 {ASN1_APL, SNMP_C64, SNMP_COUNTER64},
647 {ASN1_CTX, SERR_NSO, SNMP_NOSUCHOBJECT},
648 {ASN1_CTX, SERR_NSI, SNMP_NOSUCHINSTANCE},
649 {ASN1_CTX, SERR_EOM, SNMP_ENDOFMIBVIEW},
650 {0, 0, -1}
651};
652
653static unsigned char snmp_tag_cls2syntax(unsigned int tag,
654 unsigned int cls,
655 unsigned short *syntax)
656{
657 const struct snmp_cnv *cnv;
658
659 cnv = snmp_conv;
660
661 while (cnv->syntax != -1) {
662 if (cnv->tag == tag && cnv->class == cls) {
663 *syntax = cnv->syntax;
664 return 1;
665 }
666 cnv++;
667 }
668 return 0;
669}
670
671static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
672 struct snmp_object **obj)
673{
674 unsigned int cls, con, tag, len, idlen;
675 unsigned short type;
676 unsigned char *eoc, *end, *p;
677 unsigned long *lp, *id;
678 unsigned long ul;
679 long l;
680
681 *obj = NULL;
682 id = NULL;
683
684 if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag))
685 return 0;
686
687 if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
688 return 0;
689
690 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
691 return 0;
692
693 if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
694 return 0;
695
696 if (!asn1_oid_decode(ctx, end, &id, &idlen))
697 return 0;
698
699 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) {
700 kfree(id);
701 return 0;
702 }
703
704 if (con != ASN1_PRI) {
705 kfree(id);
706 return 0;
707 }
708
709 type = 0;
710 if (!snmp_tag_cls2syntax(tag, cls, &type)) {
711 kfree(id);
712 return 0;
713 }
714
715 l = 0;
716 switch (type) {
717 case SNMP_INTEGER:
718 len = sizeof(long);
719 if (!asn1_long_decode(ctx, end, &l)) {
720 kfree(id);
721 return 0;
722 }
723 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
724 if (*obj == NULL) {
725 kfree(id);
726 return 0;
727 }
728 (*obj)->syntax.l[0] = l;
729 break;
730 case SNMP_OCTETSTR:
731 case SNMP_OPAQUE:
732 if (!asn1_octets_decode(ctx, end, &p, &len)) {
733 kfree(id);
734 return 0;
735 }
736 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
737 if (*obj == NULL) {
738 kfree(p);
739 kfree(id);
740 return 0;
741 }
742 memcpy((*obj)->syntax.c, p, len);
743 kfree(p);
744 break;
745 case SNMP_NULL:
746 case SNMP_NOSUCHOBJECT:
747 case SNMP_NOSUCHINSTANCE:
748 case SNMP_ENDOFMIBVIEW:
749 len = 0;
750 *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
751 if (*obj == NULL) {
752 kfree(id);
753 return 0;
754 }
755 if (!asn1_null_decode(ctx, end)) {
756 kfree(id);
757 kfree(*obj);
758 *obj = NULL;
759 return 0;
760 }
761 break;
762 case SNMP_OBJECTID:
763 if (!asn1_oid_decode(ctx, end, &lp, &len)) {
764 kfree(id);
765 return 0;
766 }
767 len *= sizeof(unsigned long);
768 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
769 if (*obj == NULL) {
770 kfree(lp);
771 kfree(id);
772 return 0;
773 }
774 memcpy((*obj)->syntax.ul, lp, len);
775 kfree(lp);
776 break;
777 case SNMP_IPADDR:
778 if (!asn1_octets_decode(ctx, end, &p, &len)) {
779 kfree(id);
780 return 0;
781 }
782 if (len != 4) {
783 kfree(p);
784 kfree(id);
785 return 0;
786 }
787 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
788 if (*obj == NULL) {
789 kfree(p);
790 kfree(id);
791 return 0;
792 }
793 memcpy((*obj)->syntax.uc, p, len);
794 kfree(p);
795 break;
796 case SNMP_COUNTER:
797 case SNMP_GAUGE:
798 case SNMP_TIMETICKS:
799 len = sizeof(unsigned long);
800 if (!asn1_ulong_decode(ctx, end, &ul)) {
801 kfree(id);
802 return 0;
803 }
804 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
805 if (*obj == NULL) {
806 kfree(id);
807 return 0;
808 }
809 (*obj)->syntax.ul[0] = ul;
810 break;
811 default:
812 kfree(id);
813 return 0;
814 }
815
816 (*obj)->syntax_len = len;
817 (*obj)->type = type;
818 (*obj)->id = id;
819 (*obj)->id_len = idlen;
820
821 if (!asn1_eoc_decode(ctx, eoc)) {
822 kfree(id);
823 kfree(*obj);
824 *obj = NULL;
825 return 0;
826 }
827 return 1;
828}
829
830static unsigned char noinline_for_stack
831snmp_request_decode(struct asn1_ctx *ctx, struct snmp_request *request)
832{
833 unsigned int cls, con, tag;
834 unsigned char *end;
835
836 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
837 return 0;
838
839 if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
840 return 0;
841
842 if (!asn1_ulong_decode(ctx, end, &request->id))
843 return 0;
844
845 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
846 return 0;
847
848 if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
849 return 0;
850
851 if (!asn1_uint_decode(ctx, end, &request->error_status))
852 return 0;
853
854 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
855 return 0;
856
857 if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
858 return 0;
859
860 if (!asn1_uint_decode(ctx, end, &request->error_index))
861 return 0;
862
863 return 1;
864}
865
866/*
867 * Fast checksum update for possibly oddly-aligned UDP byte, from the
868 * code example in the draft.
869 */
870static void fast_csum(__sum16 *csum,
871 const unsigned char *optr,
872 const unsigned char *nptr,
873 int offset)
874{
875 unsigned char s[4];
876
877 if (offset & 1) {
878 s[0] = ~0;
879 s[1] = ~*optr;
880 s[2] = 0;
881 s[3] = *nptr;
882 } else {
883 s[0] = ~*optr;
884 s[1] = ~0;
885 s[2] = *nptr;
886 s[3] = 0;
887 }
888
889 *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum)));
890}
891
892/*
893 * Mangle IP address.
894 * - begin points to the start of the snmp messgae
895 * - addr points to the start of the address
896 */
897static inline void mangle_address(unsigned char *begin,
898 unsigned char *addr,
899 const struct oct1_map *map,
900 __sum16 *check)
901{
902 if (map->from == NOCT1(addr)) {
903 u_int32_t old;
904
905 if (debug)
906 memcpy(&old, addr, sizeof(old));
907
908 *addr = map->to;
909
910 /* Update UDP checksum if being used */
911 if (*check) {
912 fast_csum(check,
913 &map->from, &map->to, addr - begin);
914
915 }
916
917 if (debug)
918 printk(KERN_DEBUG "bsalg: mapped %pI4 to %pI4\n",
919 &old, addr);
920 }
921}
922
923static unsigned char noinline_for_stack
924snmp_trap_decode(struct asn1_ctx *ctx, struct snmp_v1_trap *trap,
925 const struct oct1_map *map,
926 __sum16 *check)
927{
928 unsigned int cls, con, tag, len;
929 unsigned char *end;
930
931 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
932 return 0;
933
934 if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
935 return 0;
936
937 if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len))
938 return 0;
939
940 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
941 goto err_id_free;
942
943 if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) ||
944 (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS)))
945 goto err_id_free;
946
947 if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len))
948 goto err_id_free;
949
950 /* IPv4 only */
951 if (len != 4)
952 goto err_addr_free;
953
954 mangle_address(ctx->begin, ctx->pointer - 4, map, check);
955
956 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
957 goto err_addr_free;
958
959 if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
960 goto err_addr_free;
961
962 if (!asn1_uint_decode(ctx, end, &trap->general))
963 goto err_addr_free;
964
965 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
966 goto err_addr_free;
967
968 if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
969 goto err_addr_free;
970
971 if (!asn1_uint_decode(ctx, end, &trap->specific))
972 goto err_addr_free;
973
974 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
975 goto err_addr_free;
976
977 if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) ||
978 (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT)))
979 goto err_addr_free;
980
981 if (!asn1_ulong_decode(ctx, end, &trap->time))
982 goto err_addr_free;
983
984 return 1;
985
986err_addr_free:
987 kfree((unsigned long *)trap->ip_address);
988
989err_id_free:
990 kfree(trap->id);
991
992 return 0;
993}
994
995/*****************************************************************************
996 *
997 * Misc. routines
998 *
999 *****************************************************************************/
1000
1001/*
1002 * Parse and mangle SNMP message according to mapping.
1003 * (And this is the fucking 'basic' method).
1004 */
1005static int snmp_parse_mangle(unsigned char *msg,
1006 u_int16_t len,
1007 const struct oct1_map *map,
1008 __sum16 *check)
1009{
1010 unsigned char *eoc, *end;
1011 unsigned int cls, con, tag, vers, pdutype;
1012 struct asn1_ctx ctx;
1013 struct asn1_octstr comm;
1014 struct snmp_object *obj;
1015
1016 if (debug > 1)
1017 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 1,
1018 msg, len, 0);
1019
1020 asn1_open(&ctx, msg, len);
1021
1022 /*
1023 * Start of SNMP message.
1024 */
1025 if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
1026 return 0;
1027 if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
1028 return 0;
1029
1030 /*
1031 * Version 1 or 2 handled.
1032 */
1033 if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag))
1034 return 0;
1035 if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
1036 return 0;
1037 if (!asn1_uint_decode (&ctx, end, &vers))
1038 return 0;
1039 if (debug > 1)
1040 pr_debug("bsalg: snmp version: %u\n", vers + 1);
1041 if (vers > 1)
1042 return 1;
1043
1044 /*
1045 * Community.
1046 */
1047 if (!asn1_header_decode (&ctx, &end, &cls, &con, &tag))
1048 return 0;
1049 if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OTS)
1050 return 0;
1051 if (!asn1_octets_decode(&ctx, end, &comm.data, &comm.len))
1052 return 0;
1053 if (debug > 1) {
1054 unsigned int i;
1055
1056 pr_debug("bsalg: community: ");
1057 for (i = 0; i < comm.len; i++)
1058 pr_cont("%c", comm.data[i]);
1059 pr_cont("\n");
1060 }
1061 kfree(comm.data);
1062
1063 /*
1064 * PDU type
1065 */
1066 if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &pdutype))
1067 return 0;
1068 if (cls != ASN1_CTX || con != ASN1_CON)
1069 return 0;
1070 if (debug > 1) {
1071 static const unsigned char *const pdus[] = {
1072 [SNMP_PDU_GET] = "get",
1073 [SNMP_PDU_NEXT] = "get-next",
1074 [SNMP_PDU_RESPONSE] = "response",
1075 [SNMP_PDU_SET] = "set",
1076 [SNMP_PDU_TRAP1] = "trapv1",
1077 [SNMP_PDU_BULK] = "bulk",
1078 [SNMP_PDU_INFORM] = "inform",
1079 [SNMP_PDU_TRAP2] = "trapv2"
1080 };
1081
1082 if (pdutype > SNMP_PDU_TRAP2)
1083 pr_debug("bsalg: bad pdu type %u\n", pdutype);
1084 else
1085 pr_debug("bsalg: pdu: %s\n", pdus[pdutype]);
1086 }
1087 if (pdutype != SNMP_PDU_RESPONSE &&
1088 pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2)
1089 return 1;
1090
1091 /*
1092 * Request header or v1 trap
1093 */
1094 if (pdutype == SNMP_PDU_TRAP1) {
1095 struct snmp_v1_trap trap;
1096 unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check);
1097
1098 if (ret) {
1099 kfree(trap.id);
1100 kfree((unsigned long *)trap.ip_address);
1101 } else
1102 return ret;
1103
1104 } else {
1105 struct snmp_request req;
1106
1107 if (!snmp_request_decode(&ctx, &req))
1108 return 0;
1109
1110 if (debug > 1)
1111 pr_debug("bsalg: request: id=0x%lx error_status=%u "
1112 "error_index=%u\n", req.id, req.error_status,
1113 req.error_index);
1114 }
1115
1116 /*
1117 * Loop through objects, look for IP addresses to mangle.
1118 */
1119 if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
1120 return 0;
1121
1122 if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
1123 return 0;
1124
1125 while (!asn1_eoc_decode(&ctx, eoc)) {
1126 unsigned int i;
1127
1128 if (!snmp_object_decode(&ctx, &obj)) {
1129 if (obj) {
1130 kfree(obj->id);
1131 kfree(obj);
1132 }
1133 return 0;
1134 }
1135
1136 if (debug > 1) {
1137 pr_debug("bsalg: object: ");
1138 for (i = 0; i < obj->id_len; i++) {
1139 if (i > 0)
1140 pr_cont(".");
1141 pr_cont("%lu", obj->id[i]);
1142 }
1143 pr_cont(": type=%u\n", obj->type);
1144
1145 }
1146
1147 if (obj->type == SNMP_IPADDR)
1148 mangle_address(ctx.begin, ctx.pointer - 4, map, check);
1149
1150 kfree(obj->id);
1151 kfree(obj);
1152 }
1153
1154 if (!asn1_eoc_decode(&ctx, eoc))
1155 return 0;
1156
1157 return 1;
1158}
1159
1160/*****************************************************************************
1161 *
1162 * NAT routines.
1163 *
1164 *****************************************************************************/
1165
1166/*
1167 * SNMP translation routine.
1168 */
1169static int snmp_translate(struct nf_conn *ct,
1170 enum ip_conntrack_info ctinfo,
1171 struct sk_buff *skb)
1172{
1173 struct iphdr *iph = ip_hdr(skb);
1174 struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
1175 u_int16_t udplen = ntohs(udph->len);
1176 u_int16_t paylen = udplen - sizeof(struct udphdr);
1177 int dir = CTINFO2DIR(ctinfo);
1178 struct oct1_map map;
1179
1180 /*
1181 * Determine mappping for application layer addresses based
1182 * on NAT manipulations for the packet.
1183 */
1184 if (dir == IP_CT_DIR_ORIGINAL) {
1185 /* SNAT traps */
1186 map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip);
1187 map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
1188 } else {
1189 /* DNAT replies */
1190 map.from = NOCT1(&ct->tuplehash[!dir].tuple.src.u3.ip);
1191 map.to = NOCT1(&ct->tuplehash[dir].tuple.dst.u3.ip);
1192 }
1193
1194 if (map.from == map.to)
1195 return NF_ACCEPT;
1196
1197 if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr),
1198 paylen, &map, &udph->check)) {
1199 net_warn_ratelimited("bsalg: parser failed\n");
1200 return NF_DROP;
1201 }
1202 return NF_ACCEPT;
1203}
1204
1205/* We don't actually set up expectations, just adjust internal IP
1206 * addresses if this is being NATted */
1207static int help(struct sk_buff *skb, unsigned int protoff,
1208 struct nf_conn *ct,
1209 enum ip_conntrack_info ctinfo)
1210{
1211 int dir = CTINFO2DIR(ctinfo);
1212 unsigned int ret;
1213 const struct iphdr *iph = ip_hdr(skb);
1214 const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
1215
1216 /* SNMP replies and originating SNMP traps get mangled */
1217 if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
1218 return NF_ACCEPT;
1219 if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
1220 return NF_ACCEPT;
1221
1222 /* No NAT? */
1223 if (!(ct->status & IPS_NAT_MASK))
1224 return NF_ACCEPT;
1225
1226 /*
1227 * Make sure the packet length is ok. So far, we were only guaranteed
1228 * to have a valid length IP header plus 8 bytes, which means we have
1229 * enough room for a UDP header. Just verify the UDP length field so we
1230 * can mess around with the payload.
1231 */
1232 if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) {
1233 net_warn_ratelimited("SNMP: dropping malformed packet src=%pI4 dst=%pI4\n",
1234 &iph->saddr, &iph->daddr);
1235 return NF_DROP;
1236 }
1237
1238 if (!skb_make_writable(skb, skb->len))
1239 return NF_DROP;
1240
1241 spin_lock_bh(&snmp_lock);
1242 ret = snmp_translate(ct, ctinfo, skb);
1243 spin_unlock_bh(&snmp_lock);
1244 return ret;
1245}
1246
1247static const struct nf_conntrack_expect_policy snmp_exp_policy = {
1248 .max_expected = 0,
1249 .timeout = 180,
1250};
1251
1252static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
1253 .me = THIS_MODULE,
1254 .help = help,
1255 .expect_policy = &snmp_exp_policy,
1256 .name = "snmp_trap",
1257 .tuple.src.l3num = AF_INET,
1258 .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT),
1259 .tuple.dst.protonum = IPPROTO_UDP,
1260};
1261
1262/*****************************************************************************
1263 *
1264 * Module stuff.
1265 *
1266 *****************************************************************************/
1267
1268static int __init nf_nat_snmp_basic_init(void)
1269{
1270 BUG_ON(nf_nat_snmp_hook != NULL);
1271 RCU_INIT_POINTER(nf_nat_snmp_hook, help);
1272
1273 return nf_conntrack_helper_register(&snmp_trap_helper);
1274}
1275
1276static void __exit nf_nat_snmp_basic_fini(void)
1277{
1278 RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
1279 synchronize_rcu();
1280 nf_conntrack_helper_unregister(&snmp_trap_helper);
1281}
1282
1283module_init(nf_nat_snmp_basic_init);
1284module_exit(nf_nat_snmp_basic_fini);
1285
1286module_param(debug, int, 0600);
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
new file mode 100644
index 000000000000..b6e277093e7e
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
@@ -0,0 +1,235 @@
1/*
2 * nf_nat_snmp_basic.c
3 *
4 * Basic SNMP Application Layer Gateway
5 *
6 * This IP NAT module is intended for use with SNMP network
7 * discovery and monitoring applications where target networks use
8 * conflicting private address realms.
9 *
10 * Static NAT is used to remap the networks from the view of the network
11 * management system at the IP layer, and this module remaps some application
12 * layer addresses to match.
13 *
14 * The simplest form of ALG is performed, where only tagged IP addresses
15 * are modified. The module does not need to be MIB aware and only scans
16 * messages at the ASN.1/BER level.
17 *
18 * Currently, only SNMPv1 and SNMPv2 are supported.
19 *
20 * More information on ALG and associated issues can be found in
21 * RFC 2962
22 *
23 * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory
24 * McLean & Jochen Friedrich, stripped down for use in the kernel.
25 *
26 * Copyright (c) 2000 RP Internet (www.rpi.net.au).
27 *
28 * This program is free software; you can redistribute it and/or modify
29 * it under the terms of the GNU General Public License as published by
30 * the Free Software Foundation; either version 2 of the License, or
31 * (at your option) any later version.
32 * This program is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details.
36 * You should have received a copy of the GNU General Public License
37 * along with this program; if not, see <http://www.gnu.org/licenses/>.
38 *
39 * Author: James Morris <jmorris@intercode.com.au>
40 *
41 * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
42 */
43#include <linux/module.h>
44#include <linux/moduleparam.h>
45#include <linux/types.h>
46#include <linux/kernel.h>
47#include <linux/in.h>
48#include <linux/ip.h>
49#include <linux/udp.h>
50#include <net/checksum.h>
51#include <net/udp.h>
52
53#include <net/netfilter/nf_nat.h>
54#include <net/netfilter/nf_conntrack_expect.h>
55#include <net/netfilter/nf_conntrack_helper.h>
56#include <linux/netfilter/nf_conntrack_snmp.h>
57#include "nf_nat_snmp_basic-asn1.h"
58
59MODULE_LICENSE("GPL");
60MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
61MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
62MODULE_ALIAS("ip_nat_snmp_basic");
63
64#define SNMP_PORT 161
65#define SNMP_TRAP_PORT 162
66
67static DEFINE_SPINLOCK(snmp_lock);
68
69struct snmp_ctx {
70 unsigned char *begin;
71 __sum16 *check;
72 __be32 from;
73 __be32 to;
74};
75
76static void fast_csum(struct snmp_ctx *ctx, unsigned char offset)
77{
78 unsigned char s[12] = {0,};
79 int size;
80
81 if (offset & 1) {
82 memcpy(&s[1], &ctx->from, 4);
83 memcpy(&s[7], &ctx->to, 4);
84 s[0] = ~0;
85 s[1] = ~s[1];
86 s[2] = ~s[2];
87 s[3] = ~s[3];
88 s[4] = ~s[4];
89 s[5] = ~0;
90 size = 12;
91 } else {
92 memcpy(&s[0], &ctx->from, 4);
93 memcpy(&s[4], &ctx->to, 4);
94 s[0] = ~s[0];
95 s[1] = ~s[1];
96 s[2] = ~s[2];
97 s[3] = ~s[3];
98 size = 8;
99 }
100 *ctx->check = csum_fold(csum_partial(s, size,
101 ~csum_unfold(*ctx->check)));
102}
103
104int snmp_version(void *context, size_t hdrlen, unsigned char tag,
105 const void *data, size_t datalen)
106{
107 if (*(unsigned char *)data > 1)
108 return -ENOTSUPP;
109 return 1;
110}
111
112int snmp_helper(void *context, size_t hdrlen, unsigned char tag,
113 const void *data, size_t datalen)
114{
115 struct snmp_ctx *ctx = (struct snmp_ctx *)context;
116 __be32 *pdata = (__be32 *)data;
117
118 if (*pdata == ctx->from) {
119 pr_debug("%s: %pI4 to %pI4\n", __func__,
120 (void *)&ctx->from, (void *)&ctx->to);
121
122 if (*ctx->check)
123 fast_csum(ctx, (unsigned char *)data - ctx->begin);
124 *pdata = ctx->to;
125 }
126
127 return 1;
128}
129
130static int snmp_translate(struct nf_conn *ct, int dir, struct sk_buff *skb)
131{
132 struct iphdr *iph = ip_hdr(skb);
133 struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
134 u16 datalen = ntohs(udph->len) - sizeof(struct udphdr);
135 char *data = (unsigned char *)udph + sizeof(struct udphdr);
136 struct snmp_ctx ctx;
137 int ret;
138
139 if (dir == IP_CT_DIR_ORIGINAL) {
140 ctx.from = ct->tuplehash[dir].tuple.src.u3.ip;
141 ctx.to = ct->tuplehash[!dir].tuple.dst.u3.ip;
142 } else {
143 ctx.from = ct->tuplehash[!dir].tuple.src.u3.ip;
144 ctx.to = ct->tuplehash[dir].tuple.dst.u3.ip;
145 }
146
147 if (ctx.from == ctx.to)
148 return NF_ACCEPT;
149
150 ctx.begin = (unsigned char *)udph + sizeof(struct udphdr);
151 ctx.check = &udph->check;
152 ret = asn1_ber_decoder(&nf_nat_snmp_basic_decoder, &ctx, data, datalen);
153 if (ret < 0) {
154 nf_ct_helper_log(skb, ct, "parser failed\n");
155 return NF_DROP;
156 }
157
158 return NF_ACCEPT;
159}
160
161/* We don't actually set up expectations, just adjust internal IP
162 * addresses if this is being NATted
163 */
164static int help(struct sk_buff *skb, unsigned int protoff,
165 struct nf_conn *ct,
166 enum ip_conntrack_info ctinfo)
167{
168 int dir = CTINFO2DIR(ctinfo);
169 unsigned int ret;
170 const struct iphdr *iph = ip_hdr(skb);
171 const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
172
173 /* SNMP replies and originating SNMP traps get mangled */
174 if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
175 return NF_ACCEPT;
176 if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
177 return NF_ACCEPT;
178
179 /* No NAT? */
180 if (!(ct->status & IPS_NAT_MASK))
181 return NF_ACCEPT;
182
183 /* Make sure the packet length is ok. So far, we were only guaranteed
184 * to have a valid length IP header plus 8 bytes, which means we have
185 * enough room for a UDP header. Just verify the UDP length field so we
186 * can mess around with the payload.
187 */
188 if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) {
189 nf_ct_helper_log(skb, ct, "dropping malformed packet\n");
190 return NF_DROP;
191 }
192
193 if (!skb_make_writable(skb, skb->len)) {
194 nf_ct_helper_log(skb, ct, "cannot mangle packet");
195 return NF_DROP;
196 }
197
198 spin_lock_bh(&snmp_lock);
199 ret = snmp_translate(ct, dir, skb);
200 spin_unlock_bh(&snmp_lock);
201 return ret;
202}
203
204static const struct nf_conntrack_expect_policy snmp_exp_policy = {
205 .max_expected = 0,
206 .timeout = 180,
207};
208
209static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
210 .me = THIS_MODULE,
211 .help = help,
212 .expect_policy = &snmp_exp_policy,
213 .name = "snmp_trap",
214 .tuple.src.l3num = AF_INET,
215 .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT),
216 .tuple.dst.protonum = IPPROTO_UDP,
217};
218
219static int __init nf_nat_snmp_basic_init(void)
220{
221 BUG_ON(nf_nat_snmp_hook != NULL);
222 RCU_INIT_POINTER(nf_nat_snmp_hook, help);
223
224 return nf_conntrack_helper_register(&snmp_trap_helper);
225}
226
227static void __exit nf_nat_snmp_basic_fini(void)
228{
229 RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
230 synchronize_rcu();
231 nf_conntrack_helper_unregister(&snmp_trap_helper);
232}
233
234module_init(nf_nat_snmp_basic_init);
235module_exit(nf_nat_snmp_basic_fini);
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index 4bbc273b45e8..036c074736b0 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -21,51 +21,12 @@ nft_do_chain_arp(void *priv,
21{ 21{
22 struct nft_pktinfo pkt; 22 struct nft_pktinfo pkt;
23 23
24 nft_set_pktinfo_unspec(&pkt, skb, state); 24 nft_set_pktinfo(&pkt, skb, state);
25 nft_set_pktinfo_unspec(&pkt, skb);
25 26
26 return nft_do_chain(&pkt, priv); 27 return nft_do_chain(&pkt, priv);
27} 28}
28 29
29static struct nft_af_info nft_af_arp __read_mostly = {
30 .family = NFPROTO_ARP,
31 .nhooks = NF_ARP_NUMHOOKS,
32 .owner = THIS_MODULE,
33 .nops = 1,
34 .hooks = {
35 [NF_ARP_IN] = nft_do_chain_arp,
36 [NF_ARP_OUT] = nft_do_chain_arp,
37 [NF_ARP_FORWARD] = nft_do_chain_arp,
38 },
39};
40
41static int nf_tables_arp_init_net(struct net *net)
42{
43 net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
44 if (net->nft.arp== NULL)
45 return -ENOMEM;
46
47 memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp));
48
49 if (nft_register_afinfo(net, net->nft.arp) < 0)
50 goto err;
51
52 return 0;
53err:
54 kfree(net->nft.arp);
55 return -ENOMEM;
56}
57
58static void nf_tables_arp_exit_net(struct net *net)
59{
60 nft_unregister_afinfo(net, net->nft.arp);
61 kfree(net->nft.arp);
62}
63
64static struct pernet_operations nf_tables_arp_net_ops = {
65 .init = nf_tables_arp_init_net,
66 .exit = nf_tables_arp_exit_net,
67};
68
69static const struct nf_chain_type filter_arp = { 30static const struct nf_chain_type filter_arp = {
70 .name = "filter", 31 .name = "filter",
71 .type = NFT_CHAIN_T_DEFAULT, 32 .type = NFT_CHAIN_T_DEFAULT,
@@ -73,26 +34,19 @@ static const struct nf_chain_type filter_arp = {
73 .owner = THIS_MODULE, 34 .owner = THIS_MODULE,
74 .hook_mask = (1 << NF_ARP_IN) | 35 .hook_mask = (1 << NF_ARP_IN) |
75 (1 << NF_ARP_OUT), 36 (1 << NF_ARP_OUT),
37 .hooks = {
38 [NF_ARP_IN] = nft_do_chain_arp,
39 [NF_ARP_OUT] = nft_do_chain_arp,
40 },
76}; 41};
77 42
78static int __init nf_tables_arp_init(void) 43static int __init nf_tables_arp_init(void)
79{ 44{
80 int ret; 45 return nft_register_chain_type(&filter_arp);
81
82 ret = nft_register_chain_type(&filter_arp);
83 if (ret < 0)
84 return ret;
85
86 ret = register_pernet_subsys(&nf_tables_arp_net_ops);
87 if (ret < 0)
88 nft_unregister_chain_type(&filter_arp);
89
90 return ret;
91} 46}
92 47
93static void __exit nf_tables_arp_exit(void) 48static void __exit nf_tables_arp_exit(void)
94{ 49{
95 unregister_pernet_subsys(&nf_tables_arp_net_ops);
96 nft_unregister_chain_type(&filter_arp); 50 nft_unregister_chain_type(&filter_arp);
97} 51}
98 52
@@ -101,4 +55,4 @@ module_exit(nf_tables_arp_exit);
101 55
102MODULE_LICENSE("GPL"); 56MODULE_LICENSE("GPL");
103MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 57MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
104MODULE_ALIAS_NFT_FAMILY(3); /* NFPROTO_ARP */ 58MODULE_ALIAS_NFT_CHAIN(3, "filter"); /* NFPROTO_ARP */
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
index 2840a29b2e04..96f955496d5f 100644
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -24,69 +24,12 @@ static unsigned int nft_do_chain_ipv4(void *priv,
24{ 24{
25 struct nft_pktinfo pkt; 25 struct nft_pktinfo pkt;
26 26
27 nft_set_pktinfo_ipv4(&pkt, skb, state); 27 nft_set_pktinfo(&pkt, skb, state);
28 nft_set_pktinfo_ipv4(&pkt, skb);
28 29
29 return nft_do_chain(&pkt, priv); 30 return nft_do_chain(&pkt, priv);
30} 31}
31 32
32static unsigned int nft_ipv4_output(void *priv,
33 struct sk_buff *skb,
34 const struct nf_hook_state *state)
35{
36 if (unlikely(skb->len < sizeof(struct iphdr) ||
37 ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
38 if (net_ratelimit())
39 pr_info("nf_tables_ipv4: ignoring short SOCK_RAW "
40 "packet\n");
41 return NF_ACCEPT;
42 }
43
44 return nft_do_chain_ipv4(priv, skb, state);
45}
46
47struct nft_af_info nft_af_ipv4 __read_mostly = {
48 .family = NFPROTO_IPV4,
49 .nhooks = NF_INET_NUMHOOKS,
50 .owner = THIS_MODULE,
51 .nops = 1,
52 .hooks = {
53 [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
54 [NF_INET_LOCAL_OUT] = nft_ipv4_output,
55 [NF_INET_FORWARD] = nft_do_chain_ipv4,
56 [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
57 [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
58 },
59};
60EXPORT_SYMBOL_GPL(nft_af_ipv4);
61
62static int nf_tables_ipv4_init_net(struct net *net)
63{
64 net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
65 if (net->nft.ipv4 == NULL)
66 return -ENOMEM;
67
68 memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4));
69
70 if (nft_register_afinfo(net, net->nft.ipv4) < 0)
71 goto err;
72
73 return 0;
74err:
75 kfree(net->nft.ipv4);
76 return -ENOMEM;
77}
78
79static void nf_tables_ipv4_exit_net(struct net *net)
80{
81 nft_unregister_afinfo(net, net->nft.ipv4);
82 kfree(net->nft.ipv4);
83}
84
85static struct pernet_operations nf_tables_ipv4_net_ops = {
86 .init = nf_tables_ipv4_init_net,
87 .exit = nf_tables_ipv4_exit_net,
88};
89
90static const struct nf_chain_type filter_ipv4 = { 33static const struct nf_chain_type filter_ipv4 = {
91 .name = "filter", 34 .name = "filter",
92 .type = NFT_CHAIN_T_DEFAULT, 35 .type = NFT_CHAIN_T_DEFAULT,
@@ -97,26 +40,22 @@ static const struct nf_chain_type filter_ipv4 = {
97 (1 << NF_INET_FORWARD) | 40 (1 << NF_INET_FORWARD) |
98 (1 << NF_INET_PRE_ROUTING) | 41 (1 << NF_INET_PRE_ROUTING) |
99 (1 << NF_INET_POST_ROUTING), 42 (1 << NF_INET_POST_ROUTING),
43 .hooks = {
44 [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
45 [NF_INET_LOCAL_OUT] = nft_do_chain_ipv4,
46 [NF_INET_FORWARD] = nft_do_chain_ipv4,
47 [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
48 [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
49 },
100}; 50};
101 51
102static int __init nf_tables_ipv4_init(void) 52static int __init nf_tables_ipv4_init(void)
103{ 53{
104 int ret; 54 return nft_register_chain_type(&filter_ipv4);
105
106 ret = nft_register_chain_type(&filter_ipv4);
107 if (ret < 0)
108 return ret;
109
110 ret = register_pernet_subsys(&nf_tables_ipv4_net_ops);
111 if (ret < 0)
112 nft_unregister_chain_type(&filter_ipv4);
113
114 return ret;
115} 55}
116 56
117static void __exit nf_tables_ipv4_exit(void) 57static void __exit nf_tables_ipv4_exit(void)
118{ 58{
119 unregister_pernet_subsys(&nf_tables_ipv4_net_ops);
120 nft_unregister_chain_type(&filter_ipv4); 59 nft_unregister_chain_type(&filter_ipv4);
121} 60}
122 61
@@ -125,4 +64,4 @@ module_exit(nf_tables_ipv4_exit);
125 64
126MODULE_LICENSE("GPL"); 65MODULE_LICENSE("GPL");
127MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 66MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
128MODULE_ALIAS_NFT_FAMILY(AF_INET); 67MODULE_ALIAS_NFT_CHAIN(AF_INET, "filter");
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index f5c66a7a4bf2..f2a490981594 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -33,7 +33,8 @@ static unsigned int nft_nat_do_chain(void *priv,
33{ 33{
34 struct nft_pktinfo pkt; 34 struct nft_pktinfo pkt;
35 35
36 nft_set_pktinfo_ipv4(&pkt, skb, state); 36 nft_set_pktinfo(&pkt, skb, state);
37 nft_set_pktinfo_ipv4(&pkt, skb);
37 38
38 return nft_do_chain(&pkt, priv); 39 return nft_do_chain(&pkt, priv);
39} 40}
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index 30493beb611a..d965c225b9f6 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -33,12 +33,8 @@ static unsigned int nf_route_table_hook(void *priv,
33 const struct iphdr *iph; 33 const struct iphdr *iph;
34 int err; 34 int err;
35 35
36 /* root is playing with raw sockets. */ 36 nft_set_pktinfo(&pkt, skb, state);
37 if (skb->len < sizeof(struct iphdr) || 37 nft_set_pktinfo_ipv4(&pkt, skb);
38 ip_hdrlen(skb) < sizeof(struct iphdr))
39 return NF_ACCEPT;
40
41 nft_set_pktinfo_ipv4(&pkt, skb, state);
42 38
43 mark = skb->mark; 39 mark = skb->mark;
44 iph = ip_hdr(skb); 40 iph = ip_hdr(skb);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 9f37c4727861..dc5edc8f7564 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -83,7 +83,6 @@ static int sockstat_seq_open(struct inode *inode, struct file *file)
83} 83}
84 84
85static const struct file_operations sockstat_seq_fops = { 85static const struct file_operations sockstat_seq_fops = {
86 .owner = THIS_MODULE,
87 .open = sockstat_seq_open, 86 .open = sockstat_seq_open,
88 .read = seq_read, 87 .read = seq_read,
89 .llseek = seq_lseek, 88 .llseek = seq_lseek,
@@ -467,7 +466,6 @@ static int snmp_seq_open(struct inode *inode, struct file *file)
467} 466}
468 467
469static const struct file_operations snmp_seq_fops = { 468static const struct file_operations snmp_seq_fops = {
470 .owner = THIS_MODULE,
471 .open = snmp_seq_open, 469 .open = snmp_seq_open,
472 .read = seq_read, 470 .read = seq_read,
473 .llseek = seq_lseek, 471 .llseek = seq_lseek,
@@ -515,7 +513,6 @@ static int netstat_seq_open(struct inode *inode, struct file *file)
515} 513}
516 514
517static const struct file_operations netstat_seq_fops = { 515static const struct file_operations netstat_seq_fops = {
518 .owner = THIS_MODULE,
519 .open = netstat_seq_open, 516 .open = netstat_seq_open,
520 .read = seq_read, 517 .read = seq_read,
521 .llseek = seq_lseek, 518 .llseek = seq_lseek,
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 5e570aa9e43b..9b367fc48d7d 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -617,8 +617,21 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
617 ipc.oif = inet->mc_index; 617 ipc.oif = inet->mc_index;
618 if (!saddr) 618 if (!saddr)
619 saddr = inet->mc_addr; 619 saddr = inet->mc_addr;
620 } else if (!ipc.oif) 620 } else if (!ipc.oif) {
621 ipc.oif = inet->uc_index; 621 ipc.oif = inet->uc_index;
622 } else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
623 /* oif is set, packet is to local broadcast and
624 * and uc_index is set. oif is most likely set
625 * by sk_bound_dev_if. If uc_index != oif check if the
626 * oif is an L3 master and uc_index is an L3 slave.
627 * If so, we want to allow the send using the uc_index.
628 */
629 if (ipc.oif != inet->uc_index &&
630 ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
631 inet->uc_index)) {
632 ipc.oif = inet->uc_index;
633 }
634 }
622 635
623 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, 636 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
624 RT_SCOPE_UNIVERSE, 637 RT_SCOPE_UNIVERSE,
@@ -977,6 +990,8 @@ struct proto raw_prot = {
977 .hash = raw_hash_sk, 990 .hash = raw_hash_sk,
978 .unhash = raw_unhash_sk, 991 .unhash = raw_unhash_sk,
979 .obj_size = sizeof(struct raw_sock), 992 .obj_size = sizeof(struct raw_sock),
993 .useroffset = offsetof(struct raw_sock, filter),
994 .usersize = sizeof_field(struct raw_sock, filter),
980 .h.raw_hash = &raw_v4_hashinfo, 995 .h.raw_hash = &raw_v4_hashinfo,
981#ifdef CONFIG_COMPAT 996#ifdef CONFIG_COMPAT
982 .compat_setsockopt = compat_raw_setsockopt, 997 .compat_setsockopt = compat_raw_setsockopt,
@@ -1119,7 +1134,6 @@ static int raw_v4_seq_open(struct inode *inode, struct file *file)
1119} 1134}
1120 1135
1121static const struct file_operations raw_seq_fops = { 1136static const struct file_operations raw_seq_fops = {
1122 .owner = THIS_MODULE,
1123 .open = raw_v4_seq_open, 1137 .open = raw_v4_seq_open,
1124 .read = seq_read, 1138 .read = seq_read,
1125 .llseek = seq_lseek, 1139 .llseek = seq_lseek,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4e153b23bcec..49cc1c1df1ba 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -240,7 +240,6 @@ static int rt_cache_seq_open(struct inode *inode, struct file *file)
240} 240}
241 241
242static const struct file_operations rt_cache_seq_fops = { 242static const struct file_operations rt_cache_seq_fops = {
243 .owner = THIS_MODULE,
244 .open = rt_cache_seq_open, 243 .open = rt_cache_seq_open,
245 .read = seq_read, 244 .read = seq_read,
246 .llseek = seq_lseek, 245 .llseek = seq_lseek,
@@ -331,7 +330,6 @@ static int rt_cpu_seq_open(struct inode *inode, struct file *file)
331} 330}
332 331
333static const struct file_operations rt_cpu_seq_fops = { 332static const struct file_operations rt_cpu_seq_fops = {
334 .owner = THIS_MODULE,
335 .open = rt_cpu_seq_open, 333 .open = rt_cpu_seq_open,
336 .read = seq_read, 334 .read = seq_read,
337 .llseek = seq_lseek, 335 .llseek = seq_lseek,
@@ -369,7 +367,6 @@ static int rt_acct_proc_open(struct inode *inode, struct file *file)
369} 367}
370 368
371static const struct file_operations rt_acct_proc_fops = { 369static const struct file_operations rt_acct_proc_fops = {
372 .owner = THIS_MODULE,
373 .open = rt_acct_proc_open, 370 .open = rt_acct_proc_open,
374 .read = seq_read, 371 .read = seq_read,
375 .llseek = seq_lseek, 372 .llseek = seq_lseek,
@@ -1106,7 +1103,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1106 new = true; 1103 new = true;
1107 } 1104 }
1108 1105
1109 __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu); 1106 __ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu);
1110 1107
1111 if (!dst_check(&rt->dst, 0)) { 1108 if (!dst_check(&rt->dst, 0)) {
1112 if (new) 1109 if (new)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8e053ad7cae2..48636aee23c3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -283,8 +283,6 @@
283#include <asm/ioctls.h> 283#include <asm/ioctls.h>
284#include <net/busy_poll.h> 284#include <net/busy_poll.h>
285 285
286#include <trace/events/tcp.h>
287
288struct percpu_counter tcp_orphan_count; 286struct percpu_counter tcp_orphan_count;
289EXPORT_SYMBOL_GPL(tcp_orphan_count); 287EXPORT_SYMBOL_GPL(tcp_orphan_count);
290 288
@@ -465,7 +463,7 @@ void tcp_init_transfer(struct sock *sk, int bpf_op)
465 tcp_mtup_init(sk); 463 tcp_mtup_init(sk);
466 icsk->icsk_af_ops->rebuild_header(sk); 464 icsk->icsk_af_ops->rebuild_header(sk);
467 tcp_init_metrics(sk); 465 tcp_init_metrics(sk);
468 tcp_call_bpf(sk, bpf_op); 466 tcp_call_bpf(sk, bpf_op, 0, NULL);
469 tcp_init_congestion_control(sk); 467 tcp_init_congestion_control(sk);
470 tcp_init_buffer_space(sk); 468 tcp_init_buffer_space(sk);
471} 469}
@@ -493,18 +491,16 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
493 * take care of normal races (between the test and the event) and we don't 491 * take care of normal races (between the test and the event) and we don't
494 * go look at any of the socket buffers directly. 492 * go look at any of the socket buffers directly.
495 */ 493 */
496unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) 494__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
497{ 495{
498 unsigned int mask; 496 __poll_t mask;
499 struct sock *sk = sock->sk; 497 struct sock *sk = sock->sk;
500 const struct tcp_sock *tp = tcp_sk(sk); 498 const struct tcp_sock *tp = tcp_sk(sk);
501 int state; 499 int state;
502 500
503 sock_rps_record_flow(sk);
504
505 sock_poll_wait(file, sk_sleep(sk), wait); 501 sock_poll_wait(file, sk_sleep(sk), wait);
506 502
507 state = sk_state_load(sk); 503 state = inet_sk_state_load(sk);
508 if (state == TCP_LISTEN) 504 if (state == TCP_LISTEN)
509 return inet_csk_listen_poll(sk); 505 return inet_csk_listen_poll(sk);
510 506
@@ -516,36 +512,36 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
516 mask = 0; 512 mask = 0;
517 513
518 /* 514 /*
519 * POLLHUP is certainly not done right. But poll() doesn't 515 * EPOLLHUP is certainly not done right. But poll() doesn't
520 * have a notion of HUP in just one direction, and for a 516 * have a notion of HUP in just one direction, and for a
521 * socket the read side is more interesting. 517 * socket the read side is more interesting.
522 * 518 *
523 * Some poll() documentation says that POLLHUP is incompatible 519 * Some poll() documentation says that EPOLLHUP is incompatible
524 * with the POLLOUT/POLLWR flags, so somebody should check this 520 * with the EPOLLOUT/POLLWR flags, so somebody should check this
525 * all. But careful, it tends to be safer to return too many 521 * all. But careful, it tends to be safer to return too many
526 * bits than too few, and you can easily break real applications 522 * bits than too few, and you can easily break real applications
527 * if you don't tell them that something has hung up! 523 * if you don't tell them that something has hung up!
528 * 524 *
529 * Check-me. 525 * Check-me.
530 * 526 *
531 * Check number 1. POLLHUP is _UNMASKABLE_ event (see UNIX98 and 527 * Check number 1. EPOLLHUP is _UNMASKABLE_ event (see UNIX98 and
532 * our fs/select.c). It means that after we received EOF, 528 * our fs/select.c). It means that after we received EOF,
533 * poll always returns immediately, making impossible poll() on write() 529 * poll always returns immediately, making impossible poll() on write()
534 * in state CLOSE_WAIT. One solution is evident --- to set POLLHUP 530 * in state CLOSE_WAIT. One solution is evident --- to set EPOLLHUP
535 * if and only if shutdown has been made in both directions. 531 * if and only if shutdown has been made in both directions.
536 * Actually, it is interesting to look how Solaris and DUX 532 * Actually, it is interesting to look how Solaris and DUX
537 * solve this dilemma. I would prefer, if POLLHUP were maskable, 533 * solve this dilemma. I would prefer, if EPOLLHUP were maskable,
538 * then we could set it on SND_SHUTDOWN. BTW examples given 534 * then we could set it on SND_SHUTDOWN. BTW examples given
539 * in Stevens' books assume exactly this behaviour, it explains 535 * in Stevens' books assume exactly this behaviour, it explains
540 * why POLLHUP is incompatible with POLLOUT. --ANK 536 * why EPOLLHUP is incompatible with EPOLLOUT. --ANK
541 * 537 *
542 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent 538 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent
543 * blocking on fresh not-connected or disconnected socket. --ANK 539 * blocking on fresh not-connected or disconnected socket. --ANK
544 */ 540 */
545 if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) 541 if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
546 mask |= POLLHUP; 542 mask |= EPOLLHUP;
547 if (sk->sk_shutdown & RCV_SHUTDOWN) 543 if (sk->sk_shutdown & RCV_SHUTDOWN)
548 mask |= POLLIN | POLLRDNORM | POLLRDHUP; 544 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
549 545
550 /* Connected or passive Fast Open socket? */ 546 /* Connected or passive Fast Open socket? */
551 if (state != TCP_SYN_SENT && 547 if (state != TCP_SYN_SENT &&
@@ -558,11 +554,11 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
558 target++; 554 target++;
559 555
560 if (tp->rcv_nxt - tp->copied_seq >= target) 556 if (tp->rcv_nxt - tp->copied_seq >= target)
561 mask |= POLLIN | POLLRDNORM; 557 mask |= EPOLLIN | EPOLLRDNORM;
562 558
563 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { 559 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
564 if (sk_stream_is_writeable(sk)) { 560 if (sk_stream_is_writeable(sk)) {
565 mask |= POLLOUT | POLLWRNORM; 561 mask |= EPOLLOUT | EPOLLWRNORM;
566 } else { /* send SIGIO later */ 562 } else { /* send SIGIO later */
567 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 563 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
568 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 564 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
@@ -574,24 +570,24 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
574 */ 570 */
575 smp_mb__after_atomic(); 571 smp_mb__after_atomic();
576 if (sk_stream_is_writeable(sk)) 572 if (sk_stream_is_writeable(sk))
577 mask |= POLLOUT | POLLWRNORM; 573 mask |= EPOLLOUT | EPOLLWRNORM;
578 } 574 }
579 } else 575 } else
580 mask |= POLLOUT | POLLWRNORM; 576 mask |= EPOLLOUT | EPOLLWRNORM;
581 577
582 if (tp->urg_data & TCP_URG_VALID) 578 if (tp->urg_data & TCP_URG_VALID)
583 mask |= POLLPRI; 579 mask |= EPOLLPRI;
584 } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { 580 } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
585 /* Active TCP fastopen socket with defer_connect 581 /* Active TCP fastopen socket with defer_connect
586 * Return POLLOUT so application can call write() 582 * Return EPOLLOUT so application can call write()
587 * in order for kernel to generate SYN+data 583 * in order for kernel to generate SYN+data
588 */ 584 */
589 mask |= POLLOUT | POLLWRNORM; 585 mask |= EPOLLOUT | EPOLLWRNORM;
590 } 586 }
591 /* This barrier is coupled with smp_wmb() in tcp_reset() */ 587 /* This barrier is coupled with smp_wmb() in tcp_reset() */
592 smp_rmb(); 588 smp_rmb();
593 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 589 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
594 mask |= POLLERR; 590 mask |= EPOLLERR;
595 591
596 return mask; 592 return mask;
597} 593}
@@ -1106,12 +1102,15 @@ static int linear_payload_sz(bool first_skb)
1106 return 0; 1102 return 0;
1107} 1103}
1108 1104
1109static int select_size(const struct sock *sk, bool sg, bool first_skb) 1105static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc)
1110{ 1106{
1111 const struct tcp_sock *tp = tcp_sk(sk); 1107 const struct tcp_sock *tp = tcp_sk(sk);
1112 int tmp = tp->mss_cache; 1108 int tmp = tp->mss_cache;
1113 1109
1114 if (sg) { 1110 if (sg) {
1111 if (zc)
1112 return 0;
1113
1115 if (sk_can_gso(sk)) { 1114 if (sk_can_gso(sk)) {
1116 tmp = linear_payload_sz(first_skb); 1115 tmp = linear_payload_sz(first_skb);
1117 } else { 1116 } else {
@@ -1188,7 +1187,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
1188 int flags, err, copied = 0; 1187 int flags, err, copied = 0;
1189 int mss_now = 0, size_goal, copied_syn = 0; 1188 int mss_now = 0, size_goal, copied_syn = 0;
1190 bool process_backlog = false; 1189 bool process_backlog = false;
1191 bool sg; 1190 bool sg, zc = false;
1192 long timeo; 1191 long timeo;
1193 1192
1194 flags = msg->msg_flags; 1193 flags = msg->msg_flags;
@@ -1206,7 +1205,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
1206 goto out_err; 1205 goto out_err;
1207 } 1206 }
1208 1207
1209 if (!(sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG)) 1208 zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG;
1209 if (!zc)
1210 uarg->zerocopy = 0; 1210 uarg->zerocopy = 0;
1211 } 1211 }
1212 1212
@@ -1283,6 +1283,7 @@ restart:
1283 1283
1284 if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { 1284 if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
1285 bool first_skb; 1285 bool first_skb;
1286 int linear;
1286 1287
1287new_segment: 1288new_segment:
1288 /* Allocate new segment. If the interface is SG, 1289 /* Allocate new segment. If the interface is SG,
@@ -1296,9 +1297,8 @@ new_segment:
1296 goto restart; 1297 goto restart;
1297 } 1298 }
1298 first_skb = tcp_rtx_and_write_queues_empty(sk); 1299 first_skb = tcp_rtx_and_write_queues_empty(sk);
1299 skb = sk_stream_alloc_skb(sk, 1300 linear = select_size(sk, sg, first_skb, zc);
1300 select_size(sk, sg, first_skb), 1301 skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
1301 sk->sk_allocation,
1302 first_skb); 1302 first_skb);
1303 if (!skb) 1303 if (!skb)
1304 goto wait_for_memory; 1304 goto wait_for_memory;
@@ -1327,13 +1327,13 @@ new_segment:
1327 copy = msg_data_left(msg); 1327 copy = msg_data_left(msg);
1328 1328
1329 /* Where to copy to? */ 1329 /* Where to copy to? */
1330 if (skb_availroom(skb) > 0) { 1330 if (skb_availroom(skb) > 0 && !zc) {
1331 /* We have some space in skb head. Superb! */ 1331 /* We have some space in skb head. Superb! */
1332 copy = min_t(int, copy, skb_availroom(skb)); 1332 copy = min_t(int, copy, skb_availroom(skb));
1333 err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); 1333 err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
1334 if (err) 1334 if (err)
1335 goto do_fault; 1335 goto do_fault;
1336 } else if (!uarg || !uarg->zerocopy) { 1336 } else if (!zc) {
1337 bool merge = true; 1337 bool merge = true;
1338 int i = skb_shinfo(skb)->nr_frags; 1338 int i = skb_shinfo(skb)->nr_frags;
1339 struct page_frag *pfrag = sk_page_frag(sk); 1339 struct page_frag *pfrag = sk_page_frag(sk);
@@ -1373,8 +1373,10 @@ new_segment:
1373 pfrag->offset += copy; 1373 pfrag->offset += copy;
1374 } else { 1374 } else {
1375 err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg); 1375 err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
1376 if (err == -EMSGSIZE || err == -EEXIST) 1376 if (err == -EMSGSIZE || err == -EEXIST) {
1377 tcp_mark_push(tp, skb);
1377 goto new_segment; 1378 goto new_segment;
1379 }
1378 if (err < 0) 1380 if (err < 0)
1379 goto do_error; 1381 goto do_error;
1380 copy = err; 1382 copy = err;
@@ -1731,8 +1733,8 @@ static void tcp_update_recv_tstamps(struct sk_buff *skb,
1731} 1733}
1732 1734
1733/* Similar to __sock_recv_timestamp, but does not require an skb */ 1735/* Similar to __sock_recv_timestamp, but does not require an skb */
1734void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, 1736static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
1735 struct scm_timestamping *tss) 1737 struct scm_timestamping *tss)
1736{ 1738{
1737 struct timeval tv; 1739 struct timeval tv;
1738 bool has_timestamping = false; 1740 bool has_timestamping = false;
@@ -2040,7 +2042,29 @@ void tcp_set_state(struct sock *sk, int state)
2040{ 2042{
2041 int oldstate = sk->sk_state; 2043 int oldstate = sk->sk_state;
2042 2044
2043 trace_tcp_set_state(sk, oldstate, state); 2045 /* We defined a new enum for TCP states that are exported in BPF
2046 * so as not force the internal TCP states to be frozen. The
2047 * following checks will detect if an internal state value ever
2048 * differs from the BPF value. If this ever happens, then we will
2049 * need to remap the internal value to the BPF value before calling
2050 * tcp_call_bpf_2arg.
2051 */
2052 BUILD_BUG_ON((int)BPF_TCP_ESTABLISHED != (int)TCP_ESTABLISHED);
2053 BUILD_BUG_ON((int)BPF_TCP_SYN_SENT != (int)TCP_SYN_SENT);
2054 BUILD_BUG_ON((int)BPF_TCP_SYN_RECV != (int)TCP_SYN_RECV);
2055 BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT1 != (int)TCP_FIN_WAIT1);
2056 BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT2 != (int)TCP_FIN_WAIT2);
2057 BUILD_BUG_ON((int)BPF_TCP_TIME_WAIT != (int)TCP_TIME_WAIT);
2058 BUILD_BUG_ON((int)BPF_TCP_CLOSE != (int)TCP_CLOSE);
2059 BUILD_BUG_ON((int)BPF_TCP_CLOSE_WAIT != (int)TCP_CLOSE_WAIT);
2060 BUILD_BUG_ON((int)BPF_TCP_LAST_ACK != (int)TCP_LAST_ACK);
2061 BUILD_BUG_ON((int)BPF_TCP_LISTEN != (int)TCP_LISTEN);
2062 BUILD_BUG_ON((int)BPF_TCP_CLOSING != (int)TCP_CLOSING);
2063 BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV);
2064 BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES);
2065
2066 if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_STATE_CB_FLAG))
2067 tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_STATE_CB, oldstate, state);
2044 2068
2045 switch (state) { 2069 switch (state) {
2046 case TCP_ESTABLISHED: 2070 case TCP_ESTABLISHED:
@@ -2065,7 +2089,7 @@ void tcp_set_state(struct sock *sk, int state)
2065 /* Change state AFTER socket is unhashed to avoid closed 2089 /* Change state AFTER socket is unhashed to avoid closed
2066 * socket sitting in hash tables. 2090 * socket sitting in hash tables.
2067 */ 2091 */
2068 sk_state_store(sk, state); 2092 inet_sk_state_store(sk, state);
2069 2093
2070#ifdef STATE_TRACE 2094#ifdef STATE_TRACE
2071 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]); 2095 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
@@ -2434,6 +2458,12 @@ int tcp_disconnect(struct sock *sk, int flags)
2434 2458
2435 WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); 2459 WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
2436 2460
2461 if (sk->sk_frag.page) {
2462 put_page(sk->sk_frag.page);
2463 sk->sk_frag.page = NULL;
2464 sk->sk_frag.offset = 0;
2465 }
2466
2437 sk->sk_error_report(sk); 2467 sk->sk_error_report(sk);
2438 return err; 2468 return err;
2439} 2469}
@@ -2923,7 +2953,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2923 if (sk->sk_type != SOCK_STREAM) 2953 if (sk->sk_type != SOCK_STREAM)
2924 return; 2954 return;
2925 2955
2926 info->tcpi_state = sk_state_load(sk); 2956 info->tcpi_state = inet_sk_state_load(sk);
2927 2957
2928 /* Report meaningful fields for all TCP states, including listeners */ 2958 /* Report meaningful fields for all TCP states, including listeners */
2929 rate = READ_ONCE(sk->sk_pacing_rate); 2959 rate = READ_ONCE(sk->sk_pacing_rate);
@@ -3581,6 +3611,9 @@ void __init tcp_init(void)
3581 percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); 3611 percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
3582 percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); 3612 percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
3583 inet_hashinfo_init(&tcp_hashinfo); 3613 inet_hashinfo_init(&tcp_hashinfo);
3614 inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash",
3615 thash_entries, 21, /* one slot per 2 MB*/
3616 0, 64 * 1024);
3584 tcp_hashinfo.bind_bucket_cachep = 3617 tcp_hashinfo.bind_bucket_cachep =
3585 kmem_cache_create("tcp_bind_bucket", 3618 kmem_cache_create("tcp_bind_bucket",
3586 sizeof(struct inet_bind_bucket), 0, 3619 sizeof(struct inet_bind_bucket), 0,
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 8322f26e770e..a471f696e13c 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -481,7 +481,8 @@ static void bbr_advance_cycle_phase(struct sock *sk)
481 481
482 bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); 482 bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
483 bbr->cycle_mstamp = tp->delivered_mstamp; 483 bbr->cycle_mstamp = tp->delivered_mstamp;
484 bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx]; 484 bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT :
485 bbr_pacing_gain[bbr->cycle_idx];
485} 486}
486 487
487/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ 488/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
@@ -490,8 +491,7 @@ static void bbr_update_cycle_phase(struct sock *sk,
490{ 491{
491 struct bbr *bbr = inet_csk_ca(sk); 492 struct bbr *bbr = inet_csk_ca(sk);
492 493
493 if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw && 494 if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs))
494 bbr_is_next_cycle_phase(sk, rs))
495 bbr_advance_cycle_phase(sk); 495 bbr_advance_cycle_phase(sk);
496} 496}
497 497
@@ -766,7 +766,8 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
766 filter_expired = after(tcp_jiffies32, 766 filter_expired = after(tcp_jiffies32,
767 bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); 767 bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
768 if (rs->rtt_us >= 0 && 768 if (rs->rtt_us >= 0 &&
769 (rs->rtt_us <= bbr->min_rtt_us || filter_expired)) { 769 (rs->rtt_us <= bbr->min_rtt_us ||
770 (filter_expired && !rs->is_ack_delayed))) {
770 bbr->min_rtt_us = rs->rtt_us; 771 bbr->min_rtt_us = rs->rtt_us;
771 bbr->min_rtt_stamp = tcp_jiffies32; 772 bbr->min_rtt_stamp = tcp_jiffies32;
772 } 773 }
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index abbf0edcf6c2..81148f7a2323 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -24,7 +24,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
24{ 24{
25 struct tcp_info *info = _info; 25 struct tcp_info *info = _info;
26 26
27 if (sk_state_load(sk) == TCP_LISTEN) { 27 if (inet_sk_state_load(sk) == TCP_LISTEN) {
28 r->idiag_rqueue = sk->sk_ack_backlog; 28 r->idiag_rqueue = sk->sk_ack_backlog;
29 r->idiag_wqueue = sk->sk_max_ack_backlog; 29 r->idiag_wqueue = sk->sk_max_ack_backlog;
30 } else if (sk->sk_type == SOCK_STREAM) { 30 } else if (sk->sk_type == SOCK_STREAM) {
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 78c192ee03a4..018a48477355 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -379,18 +379,9 @@ fastopen:
379bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, 379bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
380 struct tcp_fastopen_cookie *cookie) 380 struct tcp_fastopen_cookie *cookie)
381{ 381{
382 unsigned long last_syn_loss = 0;
383 const struct dst_entry *dst; 382 const struct dst_entry *dst;
384 int syn_loss = 0;
385 383
386 tcp_fastopen_cache_get(sk, mss, cookie, &syn_loss, &last_syn_loss); 384 tcp_fastopen_cache_get(sk, mss, cookie);
387
388 /* Recurring FO SYN losses: no cookie or data in SYN */
389 if (syn_loss > 1 &&
390 time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
391 cookie->len = -1;
392 return false;
393 }
394 385
395 /* Firewall blackhole issue check */ 386 /* Firewall blackhole issue check */
396 if (tcp_fastopen_active_should_disable(sk)) { 387 if (tcp_fastopen_active_should_disable(sk)) {
@@ -448,6 +439,8 @@ EXPORT_SYMBOL(tcp_fastopen_defer_connect);
448 * following circumstances: 439 * following circumstances:
449 * 1. client side TFO socket receives out of order FIN 440 * 1. client side TFO socket receives out of order FIN
450 * 2. client side TFO socket receives out of order RST 441 * 2. client side TFO socket receives out of order RST
442 * 3. client side TFO socket has timed out three times consecutively during
443 * or after handshake
451 * We disable active side TFO globally for 1hr at first. Then if it 444 * We disable active side TFO globally for 1hr at first. Then if it
452 * happens again, we disable it for 2h, then 4h, 8h, ... 445 * happens again, we disable it for 2h, then 4h, 8h, ...
453 * And we reset the timeout back to 1hr when we see a successful active 446 * And we reset the timeout back to 1hr when we see a successful active
@@ -524,3 +517,20 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
524 dst_release(dst); 517 dst_release(dst);
525 } 518 }
526} 519}
520
521void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired)
522{
523 u32 timeouts = inet_csk(sk)->icsk_retransmits;
524 struct tcp_sock *tp = tcp_sk(sk);
525
526 /* Broken middle-boxes may black-hole Fast Open connection during or
527 * even after the handshake. Be extremely conservative and pause
528 * Fast Open globally after hitting the third consecutive timeout or
529 * exceeding the configured timeout limit.
530 */
531 if ((tp->syn_fastopen || tp->syn_data || tp->syn_data_acked) &&
532 (timeouts == 2 || (timeouts < 2 && expired))) {
533 tcp_fastopen_active_disable(sk);
534 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL);
535 }
536}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 45f750e85714..575d3c1fb6e8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -97,6 +97,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
97#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ 97#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
98#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ 98#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
99#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ 99#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */
100#define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */
100 101
101#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) 102#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
102#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) 103#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -314,7 +315,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
314 315
315 /* Fast Recovery (RFC 5681 3.2) : 316 /* Fast Recovery (RFC 5681 3.2) :
316 * Cubic needs 1.7 factor, rounded to 2 to include 317 * Cubic needs 1.7 factor, rounded to 2 to include
317 * extra cushion (application might react slowly to POLLOUT) 318 * extra cushion (application might react slowly to EPOLLOUT)
318 */ 319 */
319 sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2; 320 sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
320 sndmem *= nr_segs * per_mss; 321 sndmem *= nr_segs * per_mss;
@@ -578,8 +579,8 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
578void tcp_rcv_space_adjust(struct sock *sk) 579void tcp_rcv_space_adjust(struct sock *sk)
579{ 580{
580 struct tcp_sock *tp = tcp_sk(sk); 581 struct tcp_sock *tp = tcp_sk(sk);
582 u32 copied;
581 int time; 583 int time;
582 int copied;
583 584
584 tcp_mstamp_refresh(tp); 585 tcp_mstamp_refresh(tp);
585 time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); 586 time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
@@ -602,38 +603,31 @@ void tcp_rcv_space_adjust(struct sock *sk)
602 603
603 if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && 604 if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
604 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { 605 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
605 int rcvwin, rcvmem, rcvbuf; 606 int rcvmem, rcvbuf;
607 u64 rcvwin, grow;
606 608
607 /* minimal window to cope with packet losses, assuming 609 /* minimal window to cope with packet losses, assuming
608 * steady state. Add some cushion because of small variations. 610 * steady state. Add some cushion because of small variations.
609 */ 611 */
610 rcvwin = (copied << 1) + 16 * tp->advmss; 612 rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
611 613
612 /* If rate increased by 25%, 614 /* Accommodate for sender rate increase (eg. slow start) */
613 * assume slow start, rcvwin = 3 * copied 615 grow = rcvwin * (copied - tp->rcvq_space.space);
614 * If rate increased by 50%, 616 do_div(grow, tp->rcvq_space.space);
615 * assume sender can use 2x growth, rcvwin = 4 * copied 617 rcvwin += (grow << 1);
616 */
617 if (copied >=
618 tp->rcvq_space.space + (tp->rcvq_space.space >> 2)) {
619 if (copied >=
620 tp->rcvq_space.space + (tp->rcvq_space.space >> 1))
621 rcvwin <<= 1;
622 else
623 rcvwin += (rcvwin >> 1);
624 }
625 618
626 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); 619 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
627 while (tcp_win_from_space(sk, rcvmem) < tp->advmss) 620 while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
628 rcvmem += 128; 621 rcvmem += 128;
629 622
630 rcvbuf = min(rcvwin / tp->advmss * rcvmem, 623 do_div(rcvwin, tp->advmss);
631 sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); 624 rcvbuf = min_t(u64, rcvwin * rcvmem,
625 sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
632 if (rcvbuf > sk->sk_rcvbuf) { 626 if (rcvbuf > sk->sk_rcvbuf) {
633 sk->sk_rcvbuf = rcvbuf; 627 sk->sk_rcvbuf = rcvbuf;
634 628
635 /* Make the window clamp follow along. */ 629 /* Make the window clamp follow along. */
636 tp->window_clamp = rcvwin; 630 tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
637 } 631 }
638 } 632 }
639 tp->rcvq_space.space = copied; 633 tp->rcvq_space.space = copied;
@@ -2864,11 +2858,18 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
2864 *rexmit = REXMIT_LOST; 2858 *rexmit = REXMIT_LOST;
2865} 2859}
2866 2860
2867static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) 2861static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
2868{ 2862{
2869 u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ; 2863 u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
2870 struct tcp_sock *tp = tcp_sk(sk); 2864 struct tcp_sock *tp = tcp_sk(sk);
2871 2865
2866 if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
2867 /* If the remote keeps returning delayed ACKs, eventually
2868 * the min filter would pick it up and overestimate the
2869 * prop. delay when it expires. Skip suspected delayed ACKs.
2870 */
2871 return;
2872 }
2872 minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32, 2873 minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
2873 rtt_us ? : jiffies_to_usecs(1)); 2874 rtt_us ? : jiffies_to_usecs(1));
2874} 2875}
@@ -2908,7 +2909,7 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2908 * always taken together with ACK, SACK, or TS-opts. Any negative 2909 * always taken together with ACK, SACK, or TS-opts. Any negative
2909 * values will be skipped with the seq_rtt_us < 0 check above. 2910 * values will be skipped with the seq_rtt_us < 0 check above.
2910 */ 2911 */
2911 tcp_update_rtt_min(sk, ca_rtt_us); 2912 tcp_update_rtt_min(sk, ca_rtt_us, flag);
2912 tcp_rtt_estimator(sk, seq_rtt_us); 2913 tcp_rtt_estimator(sk, seq_rtt_us);
2913 tcp_set_rto(sk); 2914 tcp_set_rto(sk);
2914 2915
@@ -3132,6 +3133,17 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
3132 if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) { 3133 if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
3133 seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt); 3134 seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
3134 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt); 3135 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
3136
3137 if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
3138 last_in_flight && !prior_sacked && fully_acked &&
3139 sack->rate->prior_delivered + 1 == tp->delivered &&
3140 !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
3141 /* Conservatively mark a delayed ACK. It's typically
3142 * from a lone runt packet over the round trip to
3143 * a receiver w/o out-of-order or CE events.
3144 */
3145 flag |= FLAG_ACK_MAYBE_DELAYED;
3146 }
3135 } 3147 }
3136 if (sack->first_sackt) { 3148 if (sack->first_sackt) {
3137 sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt); 3149 sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
@@ -3621,6 +3633,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3621 3633
3622 delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ 3634 delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
3623 lost = tp->lost - lost; /* freshly marked lost */ 3635 lost = tp->lost - lost; /* freshly marked lost */
3636 rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
3624 tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); 3637 tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
3625 tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); 3638 tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
3626 tcp_xmit_recovery(sk, rexmit); 3639 tcp_xmit_recovery(sk, rexmit);
@@ -5306,6 +5319,9 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5306 unsigned int len = skb->len; 5319 unsigned int len = skb->len;
5307 struct tcp_sock *tp = tcp_sk(sk); 5320 struct tcp_sock *tp = tcp_sk(sk);
5308 5321
5322 /* TCP congestion window tracking */
5323 trace_tcp_probe(sk, skb);
5324
5309 tcp_mstamp_refresh(tp); 5325 tcp_mstamp_refresh(tp);
5310 if (unlikely(!sk->sk_rx_dst)) 5326 if (unlikely(!sk->sk_rx_dst))
5311 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); 5327 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 94e28350f420..f8ad397e285e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -705,7 +705,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
705 */ 705 */
706 if (sk) { 706 if (sk) {
707 arg.bound_dev_if = sk->sk_bound_dev_if; 707 arg.bound_dev_if = sk->sk_bound_dev_if;
708 trace_tcp_send_reset(sk, skb); 708 if (sk_fullsock(sk))
709 trace_tcp_send_reset(sk, skb);
709 } 710 }
710 711
711 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != 712 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
@@ -1911,7 +1912,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
1911 /* Clean up the MD5 key list, if any */ 1912 /* Clean up the MD5 key list, if any */
1912 if (tp->md5sig_info) { 1913 if (tp->md5sig_info) {
1913 tcp_clear_md5_list(sk); 1914 tcp_clear_md5_list(sk);
1914 kfree_rcu(tp->md5sig_info, rcu); 1915 kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu);
1915 tp->md5sig_info = NULL; 1916 tp->md5sig_info = NULL;
1916 } 1917 }
1917#endif 1918#endif
@@ -2281,7 +2282,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2281 timer_expires = jiffies; 2282 timer_expires = jiffies;
2282 } 2283 }
2283 2284
2284 state = sk_state_load(sk); 2285 state = inet_sk_state_load(sk);
2285 if (state == TCP_LISTEN) 2286 if (state == TCP_LISTEN)
2286 rx_queue = sk->sk_ack_backlog; 2287 rx_queue = sk->sk_ack_backlog;
2287 else 2288 else
@@ -2358,7 +2359,6 @@ out:
2358} 2359}
2359 2360
2360static const struct file_operations tcp_afinfo_seq_fops = { 2361static const struct file_operations tcp_afinfo_seq_fops = {
2361 .owner = THIS_MODULE,
2362 .open = tcp_seq_open, 2362 .open = tcp_seq_open,
2363 .read = seq_read, 2363 .read = seq_read,
2364 .llseek = seq_lseek, 2364 .llseek = seq_lseek,
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 7097f92d16e5..03b51cdcc731 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -546,8 +546,7 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
546static DEFINE_SEQLOCK(fastopen_seqlock); 546static DEFINE_SEQLOCK(fastopen_seqlock);
547 547
548void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, 548void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
549 struct tcp_fastopen_cookie *cookie, 549 struct tcp_fastopen_cookie *cookie)
550 int *syn_loss, unsigned long *last_syn_loss)
551{ 550{
552 struct tcp_metrics_block *tm; 551 struct tcp_metrics_block *tm;
553 552
@@ -564,8 +563,6 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
564 *cookie = tfom->cookie; 563 *cookie = tfom->cookie;
565 if (cookie->len <= 0 && tfom->try_exp == 1) 564 if (cookie->len <= 0 && tfom->try_exp == 1)
566 cookie->exp = true; 565 cookie->exp = true;
567 *syn_loss = tfom->syn_loss;
568 *last_syn_loss = *syn_loss ? tfom->last_syn_loss : 0;
569 } while (read_seqretry(&fastopen_seqlock, seq)); 566 } while (read_seqretry(&fastopen_seqlock, seq));
570 } 567 }
571 rcu_read_unlock(); 568 rcu_read_unlock();
@@ -895,7 +892,7 @@ static void tcp_metrics_flush_all(struct net *net)
895 pp = &hb->chain; 892 pp = &hb->chain;
896 for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { 893 for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) {
897 match = net ? net_eq(tm_net(tm), net) : 894 match = net ? net_eq(tm_net(tm), net) :
898 !atomic_read(&tm_net(tm)->count); 895 !refcount_read(&tm_net(tm)->count);
899 if (match) { 896 if (match) {
900 *pp = tm->tcpm_next; 897 *pp = tm->tcpm_next;
901 kfree_rcu(tm, rcu_head); 898 kfree_rcu(tm, rcu_head);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b079b619b60c..a8384b0c11f8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -316,9 +316,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
316 */ 316 */
317 local_bh_disable(); 317 local_bh_disable();
318 inet_twsk_schedule(tw, timeo); 318 inet_twsk_schedule(tw, timeo);
319 /* Linkage updates. */ 319 /* Linkage updates.
320 __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); 320 * Note that access to tw after this point is illegal.
321 inet_twsk_put(tw); 321 */
322 inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
322 local_bh_enable(); 323 local_bh_enable();
323 } else { 324 } else {
324 /* Sorry, if we're out of memory, just CLOSE this 325 /* Sorry, if we're out of memory, just CLOSE this
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 0b5a05bd82e3..764298e52577 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -146,7 +146,7 @@ static void tcpnv_init(struct sock *sk)
146 * within a datacenter, where we have reasonable estimates of 146 * within a datacenter, where we have reasonable estimates of
147 * RTTs 147 * RTTs
148 */ 148 */
149 base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT); 149 base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT, 0, NULL);
150 if (base_rtt > 0) { 150 if (base_rtt > 0) {
151 ca->nv_base_rtt = base_rtt; 151 ca->nv_base_rtt = base_rtt;
152 ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */ 152 ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */
@@ -364,7 +364,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
364 */ 364 */
365 cwnd_by_slope = (u32) 365 cwnd_by_slope = (u32)
366 div64_u64(((u64)ca->nv_rtt_max_rate) * ca->nv_min_rtt, 366 div64_u64(((u64)ca->nv_rtt_max_rate) * ca->nv_min_rtt,
367 (u64)(80000 * tp->mss_cache)); 367 80000ULL * tp->mss_cache);
368 max_win = cwnd_by_slope + nv_pad; 368 max_win = cwnd_by_slope + nv_pad;
369 369
370 /* If cwnd > max_win, decrease cwnd 370 /* If cwnd > max_win, decrease cwnd
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a4d214c7b506..e9f985e42405 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1944,7 +1944,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1944 1944
1945 in_flight = tcp_packets_in_flight(tp); 1945 in_flight = tcp_packets_in_flight(tp);
1946 1946
1947 BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight)); 1947 BUG_ON(tcp_skb_pcount(skb) <= 1);
1948 BUG_ON(tp->snd_cwnd <= in_flight);
1948 1949
1949 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; 1950 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1950 1951
@@ -2414,15 +2415,12 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
2414 2415
2415 early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans; 2416 early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
2416 /* Schedule a loss probe in 2*RTT for SACK capable connections 2417 /* Schedule a loss probe in 2*RTT for SACK capable connections
2417 * in Open state, that are either limited by cwnd or application. 2418 * not in loss recovery, that are either limited by cwnd or application.
2418 */ 2419 */
2419 if ((early_retrans != 3 && early_retrans != 4) || 2420 if ((early_retrans != 3 && early_retrans != 4) ||
2420 !tp->packets_out || !tcp_is_sack(tp) || 2421 !tp->packets_out || !tcp_is_sack(tp) ||
2421 icsk->icsk_ca_state != TCP_CA_Open) 2422 (icsk->icsk_ca_state != TCP_CA_Open &&
2422 return false; 2423 icsk->icsk_ca_state != TCP_CA_CWR))
2423
2424 if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
2425 !tcp_write_queue_empty(sk))
2426 return false; 2424 return false;
2427 2425
2428 /* Probe timeout is 2*rtt. Add minimum RTO to account 2426 /* Probe timeout is 2*rtt. Add minimum RTO to account
@@ -2907,6 +2905,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2907 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2905 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2908 } 2906 }
2909 2907
2908 if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG))
2909 tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
2910 TCP_SKB_CB(skb)->seq, segs, err);
2911
2910 if (likely(!err)) { 2912 if (likely(!err)) {
2911 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; 2913 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2912 trace_tcp_retransmit_skb(sk, skb); 2914 trace_tcp_retransmit_skb(sk, skb);
@@ -3471,7 +3473,7 @@ int tcp_connect(struct sock *sk)
3471 struct sk_buff *buff; 3473 struct sk_buff *buff;
3472 int err; 3474 int err;
3473 3475
3474 tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB); 3476 tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL);
3475 3477
3476 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) 3478 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
3477 return -EHOSTUNREACH; /* Routing failure or similar. */ 3479 return -EHOSTUNREACH; /* Routing failure or similar. */
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
deleted file mode 100644
index 697f4c67b2e3..000000000000
--- a/net/ipv4/tcp_probe.c
+++ /dev/null
@@ -1,301 +0,0 @@
1/*
2 * tcpprobe - Observe the TCP flow with kprobes.
3 *
4 * The idea for this came from Werner Almesberger's umlsim
5 * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22
23#include <linux/kernel.h>
24#include <linux/kprobes.h>
25#include <linux/socket.h>
26#include <linux/tcp.h>
27#include <linux/slab.h>
28#include <linux/proc_fs.h>
29#include <linux/module.h>
30#include <linux/ktime.h>
31#include <linux/time.h>
32#include <net/net_namespace.h>
33
34#include <net/tcp.h>
35
36MODULE_AUTHOR("Stephen Hemminger <shemminger@linux-foundation.org>");
37MODULE_DESCRIPTION("TCP cwnd snooper");
38MODULE_LICENSE("GPL");
39MODULE_VERSION("1.1");
40
41static int port __read_mostly;
42MODULE_PARM_DESC(port, "Port to match (0=all)");
43module_param(port, int, 0);
44
45static unsigned int bufsize __read_mostly = 4096;
46MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)");
47module_param(bufsize, uint, 0);
48
49static unsigned int fwmark __read_mostly;
50MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)");
51module_param(fwmark, uint, 0);
52
53static int full __read_mostly;
54MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)");
55module_param(full, int, 0);
56
57static const char procname[] = "tcpprobe";
58
59struct tcp_log {
60 ktime_t tstamp;
61 union {
62 struct sockaddr raw;
63 struct sockaddr_in v4;
64 struct sockaddr_in6 v6;
65 } src, dst;
66 u16 length;
67 u32 snd_nxt;
68 u32 snd_una;
69 u32 snd_wnd;
70 u32 rcv_wnd;
71 u32 snd_cwnd;
72 u32 ssthresh;
73 u32 srtt;
74};
75
76static struct {
77 spinlock_t lock;
78 wait_queue_head_t wait;
79 ktime_t start;
80 u32 lastcwnd;
81
82 unsigned long head, tail;
83 struct tcp_log *log;
84} tcp_probe;
85
86static inline int tcp_probe_used(void)
87{
88 return (tcp_probe.head - tcp_probe.tail) & (bufsize - 1);
89}
90
91static inline int tcp_probe_avail(void)
92{
93 return bufsize - tcp_probe_used() - 1;
94}
95
96#define tcp_probe_copy_fl_to_si4(inet, si4, mem) \
97 do { \
98 si4.sin_family = AF_INET; \
99 si4.sin_port = inet->inet_##mem##port; \
100 si4.sin_addr.s_addr = inet->inet_##mem##addr; \
101 } while (0) \
102
103/*
104 * Hook inserted to be called before each receive packet.
105 * Note: arguments must match tcp_rcv_established()!
106 */
107static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
108 const struct tcphdr *th)
109{
110 unsigned int len = skb->len;
111 const struct tcp_sock *tp = tcp_sk(sk);
112 const struct inet_sock *inet = inet_sk(sk);
113
114 /* Only update if port or skb mark matches */
115 if (((port == 0 && fwmark == 0) ||
116 ntohs(inet->inet_dport) == port ||
117 ntohs(inet->inet_sport) == port ||
118 (fwmark > 0 && skb->mark == fwmark)) &&
119 (full || tp->snd_cwnd != tcp_probe.lastcwnd)) {
120
121 spin_lock(&tcp_probe.lock);
122 /* If log fills, just silently drop */
123 if (tcp_probe_avail() > 1) {
124 struct tcp_log *p = tcp_probe.log + tcp_probe.head;
125
126 p->tstamp = ktime_get();
127 switch (sk->sk_family) {
128 case AF_INET:
129 tcp_probe_copy_fl_to_si4(inet, p->src.v4, s);
130 tcp_probe_copy_fl_to_si4(inet, p->dst.v4, d);
131 break;
132 case AF_INET6:
133 memset(&p->src.v6, 0, sizeof(p->src.v6));
134 memset(&p->dst.v6, 0, sizeof(p->dst.v6));
135#if IS_ENABLED(CONFIG_IPV6)
136 p->src.v6.sin6_family = AF_INET6;
137 p->src.v6.sin6_port = inet->inet_sport;
138 p->src.v6.sin6_addr = inet6_sk(sk)->saddr;
139
140 p->dst.v6.sin6_family = AF_INET6;
141 p->dst.v6.sin6_port = inet->inet_dport;
142 p->dst.v6.sin6_addr = sk->sk_v6_daddr;
143#endif
144 break;
145 default:
146 BUG();
147 }
148
149 p->length = len;
150 p->snd_nxt = tp->snd_nxt;
151 p->snd_una = tp->snd_una;
152 p->snd_cwnd = tp->snd_cwnd;
153 p->snd_wnd = tp->snd_wnd;
154 p->rcv_wnd = tp->rcv_wnd;
155 p->ssthresh = tcp_current_ssthresh(sk);
156 p->srtt = tp->srtt_us >> 3;
157
158 tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1);
159 }
160 tcp_probe.lastcwnd = tp->snd_cwnd;
161 spin_unlock(&tcp_probe.lock);
162
163 wake_up(&tcp_probe.wait);
164 }
165
166 jprobe_return();
167}
168
169static struct jprobe tcp_jprobe = {
170 .kp = {
171 .symbol_name = "tcp_rcv_established",
172 },
173 .entry = jtcp_rcv_established,
174};
175
176static int tcpprobe_open(struct inode *inode, struct file *file)
177{
178 /* Reset (empty) log */
179 spin_lock_bh(&tcp_probe.lock);
180 tcp_probe.head = tcp_probe.tail = 0;
181 tcp_probe.start = ktime_get();
182 spin_unlock_bh(&tcp_probe.lock);
183
184 return 0;
185}
186
187static int tcpprobe_sprint(char *tbuf, int n)
188{
189 const struct tcp_log *p
190 = tcp_probe.log + tcp_probe.tail;
191 struct timespec64 ts
192 = ktime_to_timespec64(ktime_sub(p->tstamp, tcp_probe.start));
193
194 return scnprintf(tbuf, n,
195 "%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u\n",
196 (unsigned long)ts.tv_sec,
197 (unsigned long)ts.tv_nsec,
198 &p->src, &p->dst, p->length, p->snd_nxt, p->snd_una,
199 p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd);
200}
201
202static ssize_t tcpprobe_read(struct file *file, char __user *buf,
203 size_t len, loff_t *ppos)
204{
205 int error = 0;
206 size_t cnt = 0;
207
208 if (!buf)
209 return -EINVAL;
210
211 while (cnt < len) {
212 char tbuf[256];
213 int width;
214
215 /* Wait for data in buffer */
216 error = wait_event_interruptible(tcp_probe.wait,
217 tcp_probe_used() > 0);
218 if (error)
219 break;
220
221 spin_lock_bh(&tcp_probe.lock);
222 if (tcp_probe.head == tcp_probe.tail) {
223 /* multiple readers race? */
224 spin_unlock_bh(&tcp_probe.lock);
225 continue;
226 }
227
228 width = tcpprobe_sprint(tbuf, sizeof(tbuf));
229
230 if (cnt + width < len)
231 tcp_probe.tail = (tcp_probe.tail + 1) & (bufsize - 1);
232
233 spin_unlock_bh(&tcp_probe.lock);
234
235 /* if record greater than space available
236 return partial buffer (so far) */
237 if (cnt + width >= len)
238 break;
239
240 if (copy_to_user(buf + cnt, tbuf, width))
241 return -EFAULT;
242 cnt += width;
243 }
244
245 return cnt == 0 ? error : cnt;
246}
247
248static const struct file_operations tcpprobe_fops = {
249 .owner = THIS_MODULE,
250 .open = tcpprobe_open,
251 .read = tcpprobe_read,
252 .llseek = noop_llseek,
253};
254
255static __init int tcpprobe_init(void)
256{
257 int ret = -ENOMEM;
258
259 /* Warning: if the function signature of tcp_rcv_established,
260 * has been changed, you also have to change the signature of
261 * jtcp_rcv_established, otherwise you end up right here!
262 */
263 BUILD_BUG_ON(__same_type(tcp_rcv_established,
264 jtcp_rcv_established) == 0);
265
266 init_waitqueue_head(&tcp_probe.wait);
267 spin_lock_init(&tcp_probe.lock);
268
269 if (bufsize == 0)
270 return -EINVAL;
271
272 bufsize = roundup_pow_of_two(bufsize);
273 tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL);
274 if (!tcp_probe.log)
275 goto err0;
276
277 if (!proc_create(procname, S_IRUSR, init_net.proc_net, &tcpprobe_fops))
278 goto err0;
279
280 ret = register_jprobe(&tcp_jprobe);
281 if (ret)
282 goto err1;
283
284 pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n",
285 port, fwmark, bufsize);
286 return 0;
287 err1:
288 remove_proc_entry(procname, init_net.proc_net);
289 err0:
290 kfree(tcp_probe.log);
291 return ret;
292}
293module_init(tcpprobe_init);
294
295static __exit void tcpprobe_exit(void)
296{
297 remove_proc_entry(procname, init_net.proc_net);
298 unregister_jprobe(&tcp_jprobe);
299 kfree(tcp_probe.log);
300}
301module_exit(tcpprobe_exit);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 388158c9d9f6..71fc60f1b326 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -198,11 +198,6 @@ static int tcp_write_timeout(struct sock *sk)
198 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 198 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
199 if (icsk->icsk_retransmits) { 199 if (icsk->icsk_retransmits) {
200 dst_negative_advice(sk); 200 dst_negative_advice(sk);
201 if (tp->syn_fastopen || tp->syn_data)
202 tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
203 if (tp->syn_data && icsk->icsk_retransmits == 1)
204 NET_INC_STATS(sock_net(sk),
205 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
206 } else if (!tp->syn_data && !tp->syn_fastopen) { 201 } else if (!tp->syn_data && !tp->syn_fastopen) {
207 sk_rethink_txhash(sk); 202 sk_rethink_txhash(sk);
208 } 203 }
@@ -210,17 +205,6 @@ static int tcp_write_timeout(struct sock *sk)
210 expired = icsk->icsk_retransmits >= retry_until; 205 expired = icsk->icsk_retransmits >= retry_until;
211 } else { 206 } else {
212 if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) { 207 if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) {
213 /* Some middle-boxes may black-hole Fast Open _after_
214 * the handshake. Therefore we conservatively disable
215 * Fast Open on this path on recurring timeouts after
216 * successful Fast Open.
217 */
218 if (tp->syn_data_acked) {
219 tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
220 if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1)
221 NET_INC_STATS(sock_net(sk),
222 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
223 }
224 /* Black hole detection */ 208 /* Black hole detection */
225 tcp_mtu_probing(icsk, sk); 209 tcp_mtu_probing(icsk, sk);
226 210
@@ -243,11 +227,19 @@ static int tcp_write_timeout(struct sock *sk)
243 expired = retransmits_timed_out(sk, retry_until, 227 expired = retransmits_timed_out(sk, retry_until,
244 icsk->icsk_user_timeout); 228 icsk->icsk_user_timeout);
245 } 229 }
230 tcp_fastopen_active_detect_blackhole(sk, expired);
231
232 if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
233 tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB,
234 icsk->icsk_retransmits,
235 icsk->icsk_rto, (int)expired);
236
246 if (expired) { 237 if (expired) {
247 /* Has it gone just too far? */ 238 /* Has it gone just too far? */
248 tcp_write_err(sk); 239 tcp_write_err(sk);
249 return 1; 240 return 1;
250 } 241 }
242
251 return 0; 243 return 0;
252} 244}
253 245
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 6bb9e14c710a..622caa4039e0 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -29,6 +29,18 @@ static struct tcp_ulp_ops *tcp_ulp_find(const char *name)
29 return NULL; 29 return NULL;
30} 30}
31 31
32static struct tcp_ulp_ops *tcp_ulp_find_id(const int ulp)
33{
34 struct tcp_ulp_ops *e;
35
36 list_for_each_entry_rcu(e, &tcp_ulp_list, list) {
37 if (e->uid == ulp)
38 return e;
39 }
40
41 return NULL;
42}
43
32static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name) 44static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name)
33{ 45{
34 const struct tcp_ulp_ops *ulp = NULL; 46 const struct tcp_ulp_ops *ulp = NULL;
@@ -51,6 +63,18 @@ static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name)
51 return ulp; 63 return ulp;
52} 64}
53 65
66static const struct tcp_ulp_ops *__tcp_ulp_lookup(const int uid)
67{
68 const struct tcp_ulp_ops *ulp;
69
70 rcu_read_lock();
71 ulp = tcp_ulp_find_id(uid);
72 if (!ulp || !try_module_get(ulp->owner))
73 ulp = NULL;
74 rcu_read_unlock();
75 return ulp;
76}
77
54/* Attach new upper layer protocol to the list 78/* Attach new upper layer protocol to the list
55 * of available protocols. 79 * of available protocols.
56 */ 80 */
@@ -59,13 +83,10 @@ int tcp_register_ulp(struct tcp_ulp_ops *ulp)
59 int ret = 0; 83 int ret = 0;
60 84
61 spin_lock(&tcp_ulp_list_lock); 85 spin_lock(&tcp_ulp_list_lock);
62 if (tcp_ulp_find(ulp->name)) { 86 if (tcp_ulp_find(ulp->name))
63 pr_notice("%s already registered or non-unique name\n",
64 ulp->name);
65 ret = -EEXIST; 87 ret = -EEXIST;
66 } else { 88 else
67 list_add_tail_rcu(&ulp->list, &tcp_ulp_list); 89 list_add_tail_rcu(&ulp->list, &tcp_ulp_list);
68 }
69 spin_unlock(&tcp_ulp_list_lock); 90 spin_unlock(&tcp_ulp_list_lock);
70 91
71 return ret; 92 return ret;
@@ -124,6 +145,34 @@ int tcp_set_ulp(struct sock *sk, const char *name)
124 if (!ulp_ops) 145 if (!ulp_ops)
125 return -ENOENT; 146 return -ENOENT;
126 147
148 if (!ulp_ops->user_visible) {
149 module_put(ulp_ops->owner);
150 return -ENOENT;
151 }
152
153 err = ulp_ops->init(sk);
154 if (err) {
155 module_put(ulp_ops->owner);
156 return err;
157 }
158
159 icsk->icsk_ulp_ops = ulp_ops;
160 return 0;
161}
162
163int tcp_set_ulp_id(struct sock *sk, int ulp)
164{
165 struct inet_connection_sock *icsk = inet_csk(sk);
166 const struct tcp_ulp_ops *ulp_ops;
167 int err;
168
169 if (icsk->icsk_ulp_ops)
170 return -EEXIST;
171
172 ulp_ops = __tcp_ulp_lookup(ulp);
173 if (!ulp_ops)
174 return -ENOENT;
175
127 err = ulp_ops->init(sk); 176 err = ulp_ops->init(sk);
128 if (err) { 177 if (err) {
129 module_put(ulp_ops->owner); 178 module_put(ulp_ops->owner);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e4ff25c947c5..bfaefe560b5c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -357,18 +357,12 @@ fail:
357} 357}
358EXPORT_SYMBOL(udp_lib_get_port); 358EXPORT_SYMBOL(udp_lib_get_port);
359 359
360static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
361 unsigned int port)
362{
363 return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
364}
365
366int udp_v4_get_port(struct sock *sk, unsigned short snum) 360int udp_v4_get_port(struct sock *sk, unsigned short snum)
367{ 361{
368 unsigned int hash2_nulladdr = 362 unsigned int hash2_nulladdr =
369 udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); 363 ipv4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
370 unsigned int hash2_partial = 364 unsigned int hash2_partial =
371 udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); 365 ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
372 366
373 /* precompute partial secondary hash */ 367 /* precompute partial secondary hash */
374 udp_sk(sk)->udp_portaddr_hash = hash2_partial; 368 udp_sk(sk)->udp_portaddr_hash = hash2_partial;
@@ -445,7 +439,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
445 struct sk_buff *skb) 439 struct sk_buff *skb)
446{ 440{
447 struct sock *sk, *result; 441 struct sock *sk, *result;
448 int score, badness, matches = 0, reuseport = 0; 442 int score, badness;
449 u32 hash = 0; 443 u32 hash = 0;
450 444
451 result = NULL; 445 result = NULL;
@@ -454,23 +448,16 @@ static struct sock *udp4_lib_lookup2(struct net *net,
454 score = compute_score(sk, net, saddr, sport, 448 score = compute_score(sk, net, saddr, sport,
455 daddr, hnum, dif, sdif, exact_dif); 449 daddr, hnum, dif, sdif, exact_dif);
456 if (score > badness) { 450 if (score > badness) {
457 reuseport = sk->sk_reuseport; 451 if (sk->sk_reuseport) {
458 if (reuseport) {
459 hash = udp_ehashfn(net, daddr, hnum, 452 hash = udp_ehashfn(net, daddr, hnum,
460 saddr, sport); 453 saddr, sport);
461 result = reuseport_select_sock(sk, hash, skb, 454 result = reuseport_select_sock(sk, hash, skb,
462 sizeof(struct udphdr)); 455 sizeof(struct udphdr));
463 if (result) 456 if (result)
464 return result; 457 return result;
465 matches = 1;
466 } 458 }
467 badness = score; 459 badness = score;
468 result = sk; 460 result = sk;
469 } else if (score == badness && reuseport) {
470 matches++;
471 if (reciprocal_scale(hash, matches) == 0)
472 result = sk;
473 hash = next_pseudo_random32(hash);
474 } 461 }
475 } 462 }
476 return result; 463 return result;
@@ -488,11 +475,11 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
488 unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); 475 unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
489 struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; 476 struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
490 bool exact_dif = udp_lib_exact_dif_match(net, skb); 477 bool exact_dif = udp_lib_exact_dif_match(net, skb);
491 int score, badness, matches = 0, reuseport = 0; 478 int score, badness;
492 u32 hash = 0; 479 u32 hash = 0;
493 480
494 if (hslot->count > 10) { 481 if (hslot->count > 10) {
495 hash2 = udp4_portaddr_hash(net, daddr, hnum); 482 hash2 = ipv4_portaddr_hash(net, daddr, hnum);
496 slot2 = hash2 & udptable->mask; 483 slot2 = hash2 & udptable->mask;
497 hslot2 = &udptable->hash2[slot2]; 484 hslot2 = &udptable->hash2[slot2];
498 if (hslot->count < hslot2->count) 485 if (hslot->count < hslot2->count)
@@ -503,7 +490,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
503 exact_dif, hslot2, skb); 490 exact_dif, hslot2, skb);
504 if (!result) { 491 if (!result) {
505 unsigned int old_slot2 = slot2; 492 unsigned int old_slot2 = slot2;
506 hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); 493 hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
507 slot2 = hash2 & udptable->mask; 494 slot2 = hash2 & udptable->mask;
508 /* avoid searching the same slot again. */ 495 /* avoid searching the same slot again. */
509 if (unlikely(slot2 == old_slot2)) 496 if (unlikely(slot2 == old_slot2))
@@ -526,23 +513,16 @@ begin:
526 score = compute_score(sk, net, saddr, sport, 513 score = compute_score(sk, net, saddr, sport,
527 daddr, hnum, dif, sdif, exact_dif); 514 daddr, hnum, dif, sdif, exact_dif);
528 if (score > badness) { 515 if (score > badness) {
529 reuseport = sk->sk_reuseport; 516 if (sk->sk_reuseport) {
530 if (reuseport) {
531 hash = udp_ehashfn(net, daddr, hnum, 517 hash = udp_ehashfn(net, daddr, hnum,
532 saddr, sport); 518 saddr, sport);
533 result = reuseport_select_sock(sk, hash, skb, 519 result = reuseport_select_sock(sk, hash, skb,
534 sizeof(struct udphdr)); 520 sizeof(struct udphdr));
535 if (result) 521 if (result)
536 return result; 522 return result;
537 matches = 1;
538 } 523 }
539 result = sk; 524 result = sk;
540 badness = score; 525 badness = score;
541 } else if (score == badness && reuseport) {
542 matches++;
543 if (reciprocal_scale(hash, matches) == 0)
544 result = sk;
545 hash = next_pseudo_random32(hash);
546 } 526 }
547 } 527 }
548 return result; 528 return result;
@@ -997,8 +977,21 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
997 if (!saddr) 977 if (!saddr)
998 saddr = inet->mc_addr; 978 saddr = inet->mc_addr;
999 connected = 0; 979 connected = 0;
1000 } else if (!ipc.oif) 980 } else if (!ipc.oif) {
1001 ipc.oif = inet->uc_index; 981 ipc.oif = inet->uc_index;
982 } else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
983 /* oif is set, packet is to local broadcast and
984 * and uc_index is set. oif is most likely set
985 * by sk_bound_dev_if. If uc_index != oif check if the
986 * oif is an L3 master and uc_index is an L3 slave.
987 * If so, we want to allow the send using the uc_index.
988 */
989 if (ipc.oif != inet->uc_index &&
990 ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
991 inet->uc_index)) {
992 ipc.oif = inet->uc_index;
993 }
994 }
1002 995
1003 if (connected) 996 if (connected)
1004 rt = (struct rtable *)sk_dst_check(sk, 0); 997 rt = (struct rtable *)sk_dst_check(sk, 0);
@@ -1775,7 +1768,7 @@ EXPORT_SYMBOL(udp_lib_rehash);
1775 1768
1776static void udp_v4_rehash(struct sock *sk) 1769static void udp_v4_rehash(struct sock *sk)
1777{ 1770{
1778 u16 new_hash = udp4_portaddr_hash(sock_net(sk), 1771 u16 new_hash = ipv4_portaddr_hash(sock_net(sk),
1779 inet_sk(sk)->inet_rcv_saddr, 1772 inet_sk(sk)->inet_rcv_saddr,
1780 inet_sk(sk)->inet_num); 1773 inet_sk(sk)->inet_num);
1781 udp_lib_rehash(sk, new_hash); 1774 udp_lib_rehash(sk, new_hash);
@@ -1966,9 +1959,9 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
1966 struct sk_buff *nskb; 1959 struct sk_buff *nskb;
1967 1960
1968 if (use_hash2) { 1961 if (use_hash2) {
1969 hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & 1962 hash2_any = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
1970 udptable->mask; 1963 udptable->mask;
1971 hash2 = udp4_portaddr_hash(net, daddr, hnum) & udptable->mask; 1964 hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask;
1972start_lookup: 1965start_lookup:
1973 hslot = &udptable->hash2[hash2]; 1966 hslot = &udptable->hash2[hash2];
1974 offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); 1967 offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
@@ -2200,7 +2193,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
2200 int dif, int sdif) 2193 int dif, int sdif)
2201{ 2194{
2202 unsigned short hnum = ntohs(loc_port); 2195 unsigned short hnum = ntohs(loc_port);
2203 unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); 2196 unsigned int hash2 = ipv4_portaddr_hash(net, loc_addr, hnum);
2204 unsigned int slot2 = hash2 & udp_table.mask; 2197 unsigned int slot2 = hash2 & udp_table.mask;
2205 struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; 2198 struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
2206 INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); 2199 INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
@@ -2502,20 +2495,18 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname,
2502 * but then block when reading it. Add special case code 2495 * but then block when reading it. Add special case code
2503 * to work around these arguably broken applications. 2496 * to work around these arguably broken applications.
2504 */ 2497 */
2505unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) 2498__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
2506{ 2499{
2507 unsigned int mask = datagram_poll(file, sock, wait); 2500 __poll_t mask = datagram_poll(file, sock, wait);
2508 struct sock *sk = sock->sk; 2501 struct sock *sk = sock->sk;
2509 2502
2510 if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) 2503 if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
2511 mask |= POLLIN | POLLRDNORM; 2504 mask |= EPOLLIN | EPOLLRDNORM;
2512
2513 sock_rps_record_flow(sk);
2514 2505
2515 /* Check for false positives due to checksum errors */ 2506 /* Check for false positives due to checksum errors */
2516 if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && 2507 if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
2517 !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) 2508 !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
2518 mask &= ~(POLLIN | POLLRDNORM); 2509 mask &= ~(EPOLLIN | EPOLLRDNORM);
2519 2510
2520 return mask; 2511 return mask;
2521 2512
@@ -2736,7 +2727,6 @@ int udp4_seq_show(struct seq_file *seq, void *v)
2736} 2727}
2737 2728
2738static const struct file_operations udp_afinfo_seq_fops = { 2729static const struct file_operations udp_afinfo_seq_fops = {
2739 .owner = THIS_MODULE,
2740 .open = udp_seq_open, 2730 .open = udp_seq_open,
2741 .read = seq_read, 2731 .read = seq_read,
2742 .llseek = seq_lseek, 2732 .llseek = seq_lseek,
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 59f10fe9782e..f96614e9b9a5 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -75,7 +75,6 @@ static struct inet_protosw udplite4_protosw = {
75#ifdef CONFIG_PROC_FS 75#ifdef CONFIG_PROC_FS
76 76
77static const struct file_operations udplite_afinfo_seq_fops = { 77static const struct file_operations udplite_afinfo_seq_fops = {
78 .owner = THIS_MODULE,
79 .open = udp_seq_open, 78 .open = udp_seq_open,
80 .read = seq_read, 79 .read = seq_read,
81 .llseek = seq_lseek, 80 .llseek = seq_lseek,
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 20ca486b3cad..63faeee989a9 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -62,7 +62,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
62 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 62 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
63 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); 63 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
64 64
65 top_iph->ttl = ip4_dst_hoplimit(dst->child); 65 top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst));
66 66
67 top_iph->saddr = x->props.saddr.a4; 67 top_iph->saddr = x->props.saddr.a4;
68 top_iph->daddr = x->id.daddr.a4; 68 top_iph->daddr = x->id.daddr.a4;
@@ -106,18 +106,15 @@ static struct sk_buff *xfrm4_mode_tunnel_gso_segment(struct xfrm_state *x,
106{ 106{
107 __skb_push(skb, skb->mac_len); 107 __skb_push(skb, skb->mac_len);
108 return skb_mac_gso_segment(skb, features); 108 return skb_mac_gso_segment(skb, features);
109
110} 109}
111 110
112static void xfrm4_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb) 111static void xfrm4_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb)
113{ 112{
114 struct xfrm_offload *xo = xfrm_offload(skb); 113 struct xfrm_offload *xo = xfrm_offload(skb);
115 114
116 if (xo->flags & XFRM_GSO_SEGMENT) { 115 if (xo->flags & XFRM_GSO_SEGMENT)
117 skb->network_header = skb->network_header - x->props.header_len;
118 skb->transport_header = skb->network_header + 116 skb->transport_header = skb->network_header +
119 sizeof(struct iphdr); 117 sizeof(struct iphdr);
120 }
121 118
122 skb_reset_mac_len(skb); 119 skb_reset_mac_len(skb);
123 pskb_pull(skb, skb->mac_len + x->props.header_len); 120 pskb_pull(skb, skb->mac_len + x->props.header_len);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f49bd7897e95..e1846b97ee69 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -186,7 +186,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
186 186
187static void addrconf_dad_start(struct inet6_ifaddr *ifp); 187static void addrconf_dad_start(struct inet6_ifaddr *ifp);
188static void addrconf_dad_work(struct work_struct *w); 188static void addrconf_dad_work(struct work_struct *w);
189static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id); 189static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
190 bool send_na);
190static void addrconf_dad_run(struct inet6_dev *idev); 191static void addrconf_dad_run(struct inet6_dev *idev);
191static void addrconf_rs_timer(struct timer_list *t); 192static void addrconf_rs_timer(struct timer_list *t);
192static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); 193static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
@@ -3438,6 +3439,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
3438 } else if (event == NETDEV_CHANGE) { 3439 } else if (event == NETDEV_CHANGE) {
3439 if (!addrconf_link_ready(dev)) { 3440 if (!addrconf_link_ready(dev)) {
3440 /* device is still not ready. */ 3441 /* device is still not ready. */
3442 rt6_sync_down_dev(dev, event);
3441 break; 3443 break;
3442 } 3444 }
3443 3445
@@ -3449,6 +3451,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
3449 * multicast snooping switches 3451 * multicast snooping switches
3450 */ 3452 */
3451 ipv6_mc_up(idev); 3453 ipv6_mc_up(idev);
3454 rt6_sync_up(dev, RTNH_F_LINKDOWN);
3452 break; 3455 break;
3453 } 3456 }
3454 idev->if_flags |= IF_READY; 3457 idev->if_flags |= IF_READY;
@@ -3484,6 +3487,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
3484 if (run_pending) 3487 if (run_pending)
3485 addrconf_dad_run(idev); 3488 addrconf_dad_run(idev);
3486 3489
3490 /* Device has an address by now */
3491 rt6_sync_up(dev, RTNH_F_DEAD);
3492
3487 /* 3493 /*
3488 * If the MTU changed during the interface down, 3494 * If the MTU changed during the interface down,
3489 * when the interface up, the changed MTU must be 3495 * when the interface up, the changed MTU must be
@@ -3577,6 +3583,7 @@ static bool addr_is_local(const struct in6_addr *addr)
3577 3583
3578static int addrconf_ifdown(struct net_device *dev, int how) 3584static int addrconf_ifdown(struct net_device *dev, int how)
3579{ 3585{
3586 unsigned long event = how ? NETDEV_UNREGISTER : NETDEV_DOWN;
3580 struct net *net = dev_net(dev); 3587 struct net *net = dev_net(dev);
3581 struct inet6_dev *idev; 3588 struct inet6_dev *idev;
3582 struct inet6_ifaddr *ifa, *tmp; 3589 struct inet6_ifaddr *ifa, *tmp;
@@ -3586,8 +3593,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
3586 3593
3587 ASSERT_RTNL(); 3594 ASSERT_RTNL();
3588 3595
3589 rt6_ifdown(net, dev); 3596 rt6_disable_ip(dev, event);
3590 neigh_ifdown(&nd_tbl, dev);
3591 3597
3592 idev = __in6_dev_get(dev); 3598 idev = __in6_dev_get(dev);
3593 if (!idev) 3599 if (!idev)
@@ -3833,12 +3839,17 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
3833 idev->cnf.accept_dad < 1) || 3839 idev->cnf.accept_dad < 1) ||
3834 !(ifp->flags&IFA_F_TENTATIVE) || 3840 !(ifp->flags&IFA_F_TENTATIVE) ||
3835 ifp->flags & IFA_F_NODAD) { 3841 ifp->flags & IFA_F_NODAD) {
3842 bool send_na = false;
3843
3844 if (ifp->flags & IFA_F_TENTATIVE &&
3845 !(ifp->flags & IFA_F_OPTIMISTIC))
3846 send_na = true;
3836 bump_id = ifp->flags & IFA_F_TENTATIVE; 3847 bump_id = ifp->flags & IFA_F_TENTATIVE;
3837 ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); 3848 ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
3838 spin_unlock(&ifp->lock); 3849 spin_unlock(&ifp->lock);
3839 read_unlock_bh(&idev->lock); 3850 read_unlock_bh(&idev->lock);
3840 3851
3841 addrconf_dad_completed(ifp, bump_id); 3852 addrconf_dad_completed(ifp, bump_id, send_na);
3842 return; 3853 return;
3843 } 3854 }
3844 3855
@@ -3967,16 +3978,21 @@ static void addrconf_dad_work(struct work_struct *w)
3967 } 3978 }
3968 3979
3969 if (ifp->dad_probes == 0) { 3980 if (ifp->dad_probes == 0) {
3981 bool send_na = false;
3982
3970 /* 3983 /*
3971 * DAD was successful 3984 * DAD was successful
3972 */ 3985 */
3973 3986
3987 if (ifp->flags & IFA_F_TENTATIVE &&
3988 !(ifp->flags & IFA_F_OPTIMISTIC))
3989 send_na = true;
3974 bump_id = ifp->flags & IFA_F_TENTATIVE; 3990 bump_id = ifp->flags & IFA_F_TENTATIVE;
3975 ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); 3991 ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
3976 spin_unlock(&ifp->lock); 3992 spin_unlock(&ifp->lock);
3977 write_unlock_bh(&idev->lock); 3993 write_unlock_bh(&idev->lock);
3978 3994
3979 addrconf_dad_completed(ifp, bump_id); 3995 addrconf_dad_completed(ifp, bump_id, send_na);
3980 3996
3981 goto out; 3997 goto out;
3982 } 3998 }
@@ -4014,7 +4030,8 @@ static bool ipv6_lonely_lladdr(struct inet6_ifaddr *ifp)
4014 return true; 4030 return true;
4015} 4031}
4016 4032
4017static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id) 4033static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
4034 bool send_na)
4018{ 4035{
4019 struct net_device *dev = ifp->idev->dev; 4036 struct net_device *dev = ifp->idev->dev;
4020 struct in6_addr lladdr; 4037 struct in6_addr lladdr;
@@ -4046,6 +4063,16 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id)
4046 if (send_mld) 4063 if (send_mld)
4047 ipv6_mc_dad_complete(ifp->idev); 4064 ipv6_mc_dad_complete(ifp->idev);
4048 4065
4066 /* send unsolicited NA if enabled */
4067 if (send_na &&
4068 (ifp->idev->cnf.ndisc_notify ||
4069 dev_net(dev)->ipv6.devconf_all->ndisc_notify)) {
4070 ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifp->addr,
4071 /*router=*/ !!ifp->idev->cnf.forwarding,
4072 /*solicited=*/ false, /*override=*/ true,
4073 /*inc_opt=*/ true);
4074 }
4075
4049 if (send_rs) { 4076 if (send_rs) {
4050 /* 4077 /*
4051 * If a host as already performed a random delay 4078 * If a host as already performed a random delay
@@ -4209,7 +4236,6 @@ static int if6_seq_open(struct inode *inode, struct file *file)
4209} 4236}
4210 4237
4211static const struct file_operations if6_fops = { 4238static const struct file_operations if6_fops = {
4212 .owner = THIS_MODULE,
4213 .open = if6_seq_open, 4239 .open = if6_seq_open,
4214 .read = seq_read, 4240 .read = seq_read,
4215 .llseek = seq_lseek, 4241 .llseek = seq_lseek,
@@ -4352,9 +4378,11 @@ restart:
4352 spin_lock(&ifpub->lock); 4378 spin_lock(&ifpub->lock);
4353 ifpub->regen_count = 0; 4379 ifpub->regen_count = 0;
4354 spin_unlock(&ifpub->lock); 4380 spin_unlock(&ifpub->lock);
4381 rcu_read_unlock_bh();
4355 ipv6_create_tempaddr(ifpub, ifp, true); 4382 ipv6_create_tempaddr(ifpub, ifp, true);
4356 in6_ifa_put(ifpub); 4383 in6_ifa_put(ifpub);
4357 in6_ifa_put(ifp); 4384 in6_ifa_put(ifp);
4385 rcu_read_lock_bh();
4358 goto restart; 4386 goto restart;
4359 } 4387 }
4360 } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next)) 4388 } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
@@ -6595,27 +6623,45 @@ int __init addrconf_init(void)
6595 6623
6596 rtnl_af_register(&inet6_ops); 6624 rtnl_af_register(&inet6_ops);
6597 6625
6598 err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo, 6626 err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK,
6599 0); 6627 NULL, inet6_dump_ifinfo, 0);
6600 if (err < 0) 6628 if (err < 0)
6601 goto errout; 6629 goto errout;
6602 6630
6603 /* Only the first call to __rtnl_register can fail */ 6631 err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDR,
6604 __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0); 6632 inet6_rtm_newaddr, NULL, 0);
6605 __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0); 6633 if (err < 0)
6606 __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, 6634 goto errout;
6607 inet6_dump_ifaddr, RTNL_FLAG_DOIT_UNLOCKED); 6635 err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDR,
6608 __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, 6636 inet6_rtm_deladdr, NULL, 0);
6609 inet6_dump_ifmcaddr, 0); 6637 if (err < 0)
6610 __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, 6638 goto errout;
6611 inet6_dump_ifacaddr, 0); 6639 err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR,
6612 __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, 6640 inet6_rtm_getaddr, inet6_dump_ifaddr,
6613 inet6_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED); 6641 RTNL_FLAG_DOIT_UNLOCKED);
6614 6642 if (err < 0)
6615 ipv6_addr_label_rtnl_register(); 6643 goto errout;
6644 err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST,
6645 NULL, inet6_dump_ifmcaddr, 0);
6646 if (err < 0)
6647 goto errout;
6648 err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST,
6649 NULL, inet6_dump_ifacaddr, 0);
6650 if (err < 0)
6651 goto errout;
6652 err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF,
6653 inet6_netconf_get_devconf,
6654 inet6_netconf_dump_devconf,
6655 RTNL_FLAG_DOIT_UNLOCKED);
6656 if (err < 0)
6657 goto errout;
6658 err = ipv6_addr_label_rtnl_register();
6659 if (err < 0)
6660 goto errout;
6616 6661
6617 return 0; 6662 return 0;
6618errout: 6663errout:
6664 rtnl_unregister_all(PF_INET6);
6619 rtnl_af_unregister(&inet6_ops); 6665 rtnl_af_unregister(&inet6_ops);
6620 unregister_netdevice_notifier(&ipv6_dev_notf); 6666 unregister_netdevice_notifier(&ipv6_dev_notf);
6621errlo: 6667errlo:
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 00e1f8ee08f8..1d6ced37ad71 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -547,13 +547,22 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
547 return err; 547 return err;
548} 548}
549 549
550void __init ipv6_addr_label_rtnl_register(void) 550int __init ipv6_addr_label_rtnl_register(void)
551{ 551{
552 __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, 552 int ret;
553 NULL, RTNL_FLAG_DOIT_UNLOCKED);
554 __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel,
555 NULL, RTNL_FLAG_DOIT_UNLOCKED);
556 __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get,
557 ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
558}
559 553
554 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDRLABEL,
555 ip6addrlbl_newdel,
556 NULL, RTNL_FLAG_DOIT_UNLOCKED);
557 if (ret < 0)
558 return ret;
559 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDRLABEL,
560 ip6addrlbl_newdel,
561 NULL, RTNL_FLAG_DOIT_UNLOCKED);
562 if (ret < 0)
563 return ret;
564 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDRLABEL,
565 ip6addrlbl_get,
566 ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
567 return ret;
568}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c9441ca45399..416917719a6f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -284,6 +284,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
284 struct net *net = sock_net(sk); 284 struct net *net = sock_net(sk);
285 __be32 v4addr = 0; 285 __be32 v4addr = 0;
286 unsigned short snum; 286 unsigned short snum;
287 bool saved_ipv6only;
287 int addr_type = 0; 288 int addr_type = 0;
288 int err = 0; 289 int err = 0;
289 290
@@ -389,19 +390,21 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
389 if (!(addr_type & IPV6_ADDR_MULTICAST)) 390 if (!(addr_type & IPV6_ADDR_MULTICAST))
390 np->saddr = addr->sin6_addr; 391 np->saddr = addr->sin6_addr;
391 392
393 saved_ipv6only = sk->sk_ipv6only;
394 if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED)
395 sk->sk_ipv6only = 1;
396
392 /* Make sure we are allowed to bind here. */ 397 /* Make sure we are allowed to bind here. */
393 if ((snum || !inet->bind_address_no_port) && 398 if ((snum || !inet->bind_address_no_port) &&
394 sk->sk_prot->get_port(sk, snum)) { 399 sk->sk_prot->get_port(sk, snum)) {
400 sk->sk_ipv6only = saved_ipv6only;
395 inet_reset_saddr(sk); 401 inet_reset_saddr(sk);
396 err = -EADDRINUSE; 402 err = -EADDRINUSE;
397 goto out; 403 goto out;
398 } 404 }
399 405
400 if (addr_type != IPV6_ADDR_ANY) { 406 if (addr_type != IPV6_ADDR_ANY)
401 sk->sk_userlocks |= SOCK_BINDADDR_LOCK; 407 sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
402 if (addr_type != IPV6_ADDR_MAPPED)
403 sk->sk_ipv6only = 1;
404 }
405 if (snum) 408 if (snum)
406 sk->sk_userlocks |= SOCK_BINDPORT_LOCK; 409 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
407 inet->inet_sport = htons(inet->inet_num); 410 inet->inet_sport = htons(inet->inet_num);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 0bbab8a4b5d8..8e085cc05aeb 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -533,7 +533,6 @@ static int ac6_seq_open(struct inode *inode, struct file *file)
533} 533}
534 534
535static const struct file_operations ac6_seq_fops = { 535static const struct file_operations ac6_seq_fops = {
536 .owner = THIS_MODULE,
537 .open = ac6_seq_open, 536 .open = ac6_seq_open,
538 .read = seq_read, 537 .read = seq_read,
539 .llseek = seq_lseek, 538 .llseek = seq_lseek,
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index a1f918713006..fbf08ce3f5ab 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -221,8 +221,7 @@ ipv4_connected:
221 if (__ipv6_addr_needs_scope_id(addr_type)) { 221 if (__ipv6_addr_needs_scope_id(addr_type)) {
222 if (addr_len >= sizeof(struct sockaddr_in6) && 222 if (addr_len >= sizeof(struct sockaddr_in6) &&
223 usin->sin6_scope_id) { 223 usin->sin6_scope_id) {
224 if (sk->sk_bound_dev_if && 224 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) {
225 sk->sk_bound_dev_if != usin->sin6_scope_id) {
226 err = -EINVAL; 225 err = -EINVAL;
227 goto out; 226 goto out;
228 } 227 }
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 1a7f00cd4803..97513f35bcc5 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -141,14 +141,32 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
141static void esp_output_done(struct crypto_async_request *base, int err) 141static void esp_output_done(struct crypto_async_request *base, int err)
142{ 142{
143 struct sk_buff *skb = base->data; 143 struct sk_buff *skb = base->data;
144 struct xfrm_offload *xo = xfrm_offload(skb);
144 void *tmp; 145 void *tmp;
145 struct dst_entry *dst = skb_dst(skb); 146 struct xfrm_state *x;
146 struct xfrm_state *x = dst->xfrm; 147
148 if (xo && (xo->flags & XFRM_DEV_RESUME))
149 x = skb->sp->xvec[skb->sp->len - 1];
150 else
151 x = skb_dst(skb)->xfrm;
147 152
148 tmp = ESP_SKB_CB(skb)->tmp; 153 tmp = ESP_SKB_CB(skb)->tmp;
149 esp_ssg_unref(x, tmp); 154 esp_ssg_unref(x, tmp);
150 kfree(tmp); 155 kfree(tmp);
151 xfrm_output_resume(skb, err); 156
157 if (xo && (xo->flags & XFRM_DEV_RESUME)) {
158 if (err) {
159 XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
160 kfree_skb(skb);
161 return;
162 }
163
164 skb_push(skb, skb->data - skb_mac_header(skb));
165 secpath_reset(skb);
166 xfrm_dev_resume(skb);
167 } else {
168 xfrm_output_resume(skb, err);
169 }
152} 170}
153 171
154/* Move ESP header back into place. */ 172/* Move ESP header back into place. */
@@ -734,17 +752,13 @@ static int esp_init_aead(struct xfrm_state *x)
734 char aead_name[CRYPTO_MAX_ALG_NAME]; 752 char aead_name[CRYPTO_MAX_ALG_NAME];
735 struct crypto_aead *aead; 753 struct crypto_aead *aead;
736 int err; 754 int err;
737 u32 mask = 0;
738 755
739 err = -ENAMETOOLONG; 756 err = -ENAMETOOLONG;
740 if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", 757 if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
741 x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME) 758 x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
742 goto error; 759 goto error;
743 760
744 if (x->xso.offload_handle) 761 aead = crypto_alloc_aead(aead_name, 0, 0);
745 mask |= CRYPTO_ALG_ASYNC;
746
747 aead = crypto_alloc_aead(aead_name, 0, mask);
748 err = PTR_ERR(aead); 762 err = PTR_ERR(aead);
749 if (IS_ERR(aead)) 763 if (IS_ERR(aead))
750 goto error; 764 goto error;
@@ -774,7 +788,6 @@ static int esp_init_authenc(struct xfrm_state *x)
774 char authenc_name[CRYPTO_MAX_ALG_NAME]; 788 char authenc_name[CRYPTO_MAX_ALG_NAME];
775 unsigned int keylen; 789 unsigned int keylen;
776 int err; 790 int err;
777 u32 mask = 0;
778 791
779 err = -EINVAL; 792 err = -EINVAL;
780 if (!x->ealg) 793 if (!x->ealg)
@@ -800,10 +813,7 @@ static int esp_init_authenc(struct xfrm_state *x)
800 goto error; 813 goto error;
801 } 814 }
802 815
803 if (x->xso.offload_handle) 816 aead = crypto_alloc_aead(authenc_name, 0, 0);
804 mask |= CRYPTO_ALG_ASYNC;
805
806 aead = crypto_alloc_aead(authenc_name, 0, mask);
807 err = PTR_ERR(aead); 817 err = PTR_ERR(aead);
808 if (IS_ERR(aead)) 818 if (IS_ERR(aead))
809 goto error; 819 goto error;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index f52c314d4c97..3fd1ec775dc2 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -136,78 +136,39 @@ static void esp6_gso_encap(struct xfrm_state *x, struct sk_buff *skb)
136static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, 136static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
137 netdev_features_t features) 137 netdev_features_t features)
138{ 138{
139 __u32 seq;
140 int err = 0;
141 struct sk_buff *skb2;
142 struct xfrm_state *x; 139 struct xfrm_state *x;
143 struct ip_esp_hdr *esph; 140 struct ip_esp_hdr *esph;
144 struct crypto_aead *aead; 141 struct crypto_aead *aead;
145 struct sk_buff *segs = ERR_PTR(-EINVAL);
146 netdev_features_t esp_features = features; 142 netdev_features_t esp_features = features;
147 struct xfrm_offload *xo = xfrm_offload(skb); 143 struct xfrm_offload *xo = xfrm_offload(skb);
148 144
149 if (!xo) 145 if (!xo)
150 goto out; 146 return ERR_PTR(-EINVAL);
151 147
152 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) 148 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
153 goto out; 149 return ERR_PTR(-EINVAL);
154
155 seq = xo->seq.low;
156 150
157 x = skb->sp->xvec[skb->sp->len - 1]; 151 x = skb->sp->xvec[skb->sp->len - 1];
158 aead = x->data; 152 aead = x->data;
159 esph = ip_esp_hdr(skb); 153 esph = ip_esp_hdr(skb);
160 154
161 if (esph->spi != x->id.spi) 155 if (esph->spi != x->id.spi)
162 goto out; 156 return ERR_PTR(-EINVAL);
163 157
164 if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) 158 if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)))
165 goto out; 159 return ERR_PTR(-EINVAL);
166 160
167 __skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)); 161 __skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead));
168 162
169 skb->encap_hdr_csum = 1; 163 skb->encap_hdr_csum = 1;
170 164
171 if (!(features & NETIF_F_HW_ESP)) 165 if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
166 (x->xso.dev != skb->dev))
172 esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); 167 esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
173 168
174 segs = x->outer_mode->gso_segment(x, skb, esp_features); 169 xo->flags |= XFRM_GSO_SEGMENT;
175 if (IS_ERR_OR_NULL(segs))
176 goto out;
177
178 __skb_pull(skb, skb->data - skb_mac_header(skb));
179
180 skb2 = segs;
181 do {
182 struct sk_buff *nskb = skb2->next;
183
184 xo = xfrm_offload(skb2);
185 xo->flags |= XFRM_GSO_SEGMENT;
186 xo->seq.low = seq;
187 xo->seq.hi = xfrm_replay_seqhi(x, seq);
188
189 if(!(features & NETIF_F_HW_ESP))
190 xo->flags |= CRYPTO_FALLBACK;
191
192 x->outer_mode->xmit(x, skb2);
193
194 err = x->type_offload->xmit(x, skb2, esp_features);
195 if (err) {
196 kfree_skb_list(segs);
197 return ERR_PTR(err);
198 }
199 170
200 if (!skb_is_gso(skb2)) 171 return x->outer_mode->gso_segment(x, skb, esp_features);
201 seq++;
202 else
203 seq += skb_shinfo(skb2)->gso_segs;
204
205 skb_push(skb2, skb2->mac_len);
206 skb2 = nskb;
207 } while (skb2);
208
209out:
210 return segs;
211} 172}
212 173
213static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb) 174static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
@@ -226,6 +187,7 @@ static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
226 187
227static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) 188static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features)
228{ 189{
190 int len;
229 int err; 191 int err;
230 int alen; 192 int alen;
231 int blksize; 193 int blksize;
@@ -234,6 +196,7 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
234 struct crypto_aead *aead; 196 struct crypto_aead *aead;
235 struct esp_info esp; 197 struct esp_info esp;
236 bool hw_offload = true; 198 bool hw_offload = true;
199 __u32 seq;
237 200
238 esp.inplace = true; 201 esp.inplace = true;
239 202
@@ -269,28 +232,33 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
269 return esp.nfrags; 232 return esp.nfrags;
270 } 233 }
271 234
235 seq = xo->seq.low;
236
272 esph = ip_esp_hdr(skb); 237 esph = ip_esp_hdr(skb);
273 esph->spi = x->id.spi; 238 esph->spi = x->id.spi;
274 239
275 skb_push(skb, -skb_network_offset(skb)); 240 skb_push(skb, -skb_network_offset(skb));
276 241
277 if (xo->flags & XFRM_GSO_SEGMENT) { 242 if (xo->flags & XFRM_GSO_SEGMENT) {
278 esph->seq_no = htonl(xo->seq.low); 243 esph->seq_no = htonl(seq);
279 } else {
280 int len;
281
282 len = skb->len - sizeof(struct ipv6hdr);
283 if (len > IPV6_MAXPLEN)
284 len = 0;
285 244
286 ipv6_hdr(skb)->payload_len = htons(len); 245 if (!skb_is_gso(skb))
246 xo->seq.low++;
247 else
248 xo->seq.low += skb_shinfo(skb)->gso_segs;
287 } 249 }
288 250
251 esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
252
253 len = skb->len - sizeof(struct ipv6hdr);
254 if (len > IPV6_MAXPLEN)
255 len = 0;
256
257 ipv6_hdr(skb)->payload_len = htons(len);
258
289 if (hw_offload) 259 if (hw_offload)
290 return 0; 260 return 0;
291 261
292 esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
293
294 err = esp6_output_tail(x, skb, &esp); 262 err = esp6_output_tail(x, skb, &esp);
295 if (err) 263 if (err)
296 return err; 264 return err;
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 6eb5e68f112a..44c39c5f0638 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -512,9 +512,7 @@ static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
512 struct ila_map *ila; 512 struct ila_map *ila;
513 int ret; 513 int ret;
514 514
515 ret = rhashtable_walk_start(rhiter); 515 rhashtable_walk_start(rhiter);
516 if (ret && ret != -EAGAIN)
517 goto done;
518 516
519 for (;;) { 517 for (;;) {
520 ila = rhashtable_walk_next(rhiter); 518 ila = rhashtable_walk_next(rhiter);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b01858f5deb1..2febe26de6a1 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -125,6 +125,40 @@ static inline int compute_score(struct sock *sk, struct net *net,
125} 125}
126 126
127/* called with rcu_read_lock() */ 127/* called with rcu_read_lock() */
128static struct sock *inet6_lhash2_lookup(struct net *net,
129 struct inet_listen_hashbucket *ilb2,
130 struct sk_buff *skb, int doff,
131 const struct in6_addr *saddr,
132 const __be16 sport, const struct in6_addr *daddr,
133 const unsigned short hnum, const int dif, const int sdif)
134{
135 bool exact_dif = inet6_exact_dif_match(net, skb);
136 struct inet_connection_sock *icsk;
137 struct sock *sk, *result = NULL;
138 int score, hiscore = 0;
139 u32 phash = 0;
140
141 inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
142 sk = (struct sock *)icsk;
143 score = compute_score(sk, net, hnum, daddr, dif, sdif,
144 exact_dif);
145 if (score > hiscore) {
146 if (sk->sk_reuseport) {
147 phash = inet6_ehashfn(net, daddr, hnum,
148 saddr, sport);
149 result = reuseport_select_sock(sk, phash,
150 skb, doff);
151 if (result)
152 return result;
153 }
154 result = sk;
155 hiscore = score;
156 }
157 }
158
159 return result;
160}
161
128struct sock *inet6_lookup_listener(struct net *net, 162struct sock *inet6_lookup_listener(struct net *net,
129 struct inet_hashinfo *hashinfo, 163 struct inet_hashinfo *hashinfo,
130 struct sk_buff *skb, int doff, 164 struct sk_buff *skb, int doff,
@@ -134,31 +168,56 @@ struct sock *inet6_lookup_listener(struct net *net,
134{ 168{
135 unsigned int hash = inet_lhashfn(net, hnum); 169 unsigned int hash = inet_lhashfn(net, hnum);
136 struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; 170 struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
137 int score, hiscore = 0, matches = 0, reuseport = 0;
138 bool exact_dif = inet6_exact_dif_match(net, skb); 171 bool exact_dif = inet6_exact_dif_match(net, skb);
172 struct inet_listen_hashbucket *ilb2;
139 struct sock *sk, *result = NULL; 173 struct sock *sk, *result = NULL;
174 int score, hiscore = 0;
175 unsigned int hash2;
140 u32 phash = 0; 176 u32 phash = 0;
141 177
178 if (ilb->count <= 10 || !hashinfo->lhash2)
179 goto port_lookup;
180
181 /* Too many sk in the ilb bucket (which is hashed by port alone).
182 * Try lhash2 (which is hashed by port and addr) instead.
183 */
184
185 hash2 = ipv6_portaddr_hash(net, daddr, hnum);
186 ilb2 = inet_lhash2_bucket(hashinfo, hash2);
187 if (ilb2->count > ilb->count)
188 goto port_lookup;
189
190 result = inet6_lhash2_lookup(net, ilb2, skb, doff,
191 saddr, sport, daddr, hnum,
192 dif, sdif);
193 if (result)
194 return result;
195
196 /* Lookup lhash2 with in6addr_any */
197
198 hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
199 ilb2 = inet_lhash2_bucket(hashinfo, hash2);
200 if (ilb2->count > ilb->count)
201 goto port_lookup;
202
203 return inet6_lhash2_lookup(net, ilb2, skb, doff,
204 saddr, sport, daddr, hnum,
205 dif, sdif);
206
207port_lookup:
142 sk_for_each(sk, &ilb->head) { 208 sk_for_each(sk, &ilb->head) {
143 score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); 209 score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
144 if (score > hiscore) { 210 if (score > hiscore) {
145 reuseport = sk->sk_reuseport; 211 if (sk->sk_reuseport) {
146 if (reuseport) {
147 phash = inet6_ehashfn(net, daddr, hnum, 212 phash = inet6_ehashfn(net, daddr, hnum,
148 saddr, sport); 213 saddr, sport);
149 result = reuseport_select_sock(sk, phash, 214 result = reuseport_select_sock(sk, phash,
150 skb, doff); 215 skb, doff);
151 if (result) 216 if (result)
152 return result; 217 return result;
153 matches = 1;
154 } 218 }
155 result = sk; 219 result = sk;
156 hiscore = score; 220 hiscore = score;
157 } else if (score == hiscore && reuseport) {
158 matches++;
159 if (reciprocal_scale(phash, matches) == 0)
160 result = sk;
161 phash = next_pseudo_random32(phash);
162 } 221 }
163 } 222 }
164 return result; 223 return result;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 217683d40f12..92b8d8c75eed 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -107,16 +107,13 @@ enum {
107 107
108void fib6_update_sernum(struct rt6_info *rt) 108void fib6_update_sernum(struct rt6_info *rt)
109{ 109{
110 struct fib6_table *table = rt->rt6i_table;
111 struct net *net = dev_net(rt->dst.dev); 110 struct net *net = dev_net(rt->dst.dev);
112 struct fib6_node *fn; 111 struct fib6_node *fn;
113 112
114 spin_lock_bh(&table->tb6_lock);
115 fn = rcu_dereference_protected(rt->rt6i_node, 113 fn = rcu_dereference_protected(rt->rt6i_node,
116 lockdep_is_held(&table->tb6_lock)); 114 lockdep_is_held(&rt->rt6i_table->tb6_lock));
117 if (fn) 115 if (fn)
118 fn->fn_sernum = fib6_new_sernum(net); 116 fn->fn_sernum = fib6_new_sernum(net);
119 spin_unlock_bh(&table->tb6_lock);
120} 117}
121 118
122/* 119/*
@@ -804,12 +801,6 @@ insert_above:
804 return ln; 801 return ln;
805} 802}
806 803
807static bool rt6_qualify_for_ecmp(struct rt6_info *rt)
808{
809 return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
810 RTF_GATEWAY;
811}
812
813static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc) 804static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc)
814{ 805{
815 int i; 806 int i;
@@ -898,7 +889,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
898 ins = &fn->leaf; 889 ins = &fn->leaf;
899 890
900 for (iter = leaf; iter; 891 for (iter = leaf; iter;
901 iter = rcu_dereference_protected(iter->dst.rt6_next, 892 iter = rcu_dereference_protected(iter->rt6_next,
902 lockdep_is_held(&rt->rt6i_table->tb6_lock))) { 893 lockdep_is_held(&rt->rt6i_table->tb6_lock))) {
903 /* 894 /*
904 * Search for duplicates 895 * Search for duplicates
@@ -955,7 +946,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
955 break; 946 break;
956 947
957next_iter: 948next_iter:
958 ins = &iter->dst.rt6_next; 949 ins = &iter->rt6_next;
959 } 950 }
960 951
961 if (fallback_ins && !found) { 952 if (fallback_ins && !found) {
@@ -984,7 +975,7 @@ next_iter:
984 &sibling->rt6i_siblings); 975 &sibling->rt6i_siblings);
985 break; 976 break;
986 } 977 }
987 sibling = rcu_dereference_protected(sibling->dst.rt6_next, 978 sibling = rcu_dereference_protected(sibling->rt6_next,
988 lockdep_is_held(&rt->rt6i_table->tb6_lock)); 979 lockdep_is_held(&rt->rt6i_table->tb6_lock));
989 } 980 }
990 /* For each sibling in the list, increment the counter of 981 /* For each sibling in the list, increment the counter of
@@ -999,6 +990,7 @@ next_iter:
999 rt6i_nsiblings++; 990 rt6i_nsiblings++;
1000 } 991 }
1001 BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings); 992 BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings);
993 rt6_multipath_rebalance(temp_sibling);
1002 } 994 }
1003 995
1004 /* 996 /*
@@ -1014,7 +1006,7 @@ add:
1014 if (err) 1006 if (err)
1015 return err; 1007 return err;
1016 1008
1017 rcu_assign_pointer(rt->dst.rt6_next, iter); 1009 rcu_assign_pointer(rt->rt6_next, iter);
1018 atomic_inc(&rt->rt6i_ref); 1010 atomic_inc(&rt->rt6i_ref);
1019 rcu_assign_pointer(rt->rt6i_node, fn); 1011 rcu_assign_pointer(rt->rt6i_node, fn);
1020 rcu_assign_pointer(*ins, rt); 1012 rcu_assign_pointer(*ins, rt);
@@ -1045,7 +1037,7 @@ add:
1045 1037
1046 atomic_inc(&rt->rt6i_ref); 1038 atomic_inc(&rt->rt6i_ref);
1047 rcu_assign_pointer(rt->rt6i_node, fn); 1039 rcu_assign_pointer(rt->rt6i_node, fn);
1048 rt->dst.rt6_next = iter->dst.rt6_next; 1040 rt->rt6_next = iter->rt6_next;
1049 rcu_assign_pointer(*ins, rt); 1041 rcu_assign_pointer(*ins, rt);
1050 call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE, 1042 call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
1051 rt, extack); 1043 rt, extack);
@@ -1064,14 +1056,14 @@ add:
1064 1056
1065 if (nsiblings) { 1057 if (nsiblings) {
1066 /* Replacing an ECMP route, remove all siblings */ 1058 /* Replacing an ECMP route, remove all siblings */
1067 ins = &rt->dst.rt6_next; 1059 ins = &rt->rt6_next;
1068 iter = rcu_dereference_protected(*ins, 1060 iter = rcu_dereference_protected(*ins,
1069 lockdep_is_held(&rt->rt6i_table->tb6_lock)); 1061 lockdep_is_held(&rt->rt6i_table->tb6_lock));
1070 while (iter) { 1062 while (iter) {
1071 if (iter->rt6i_metric > rt->rt6i_metric) 1063 if (iter->rt6i_metric > rt->rt6i_metric)
1072 break; 1064 break;
1073 if (rt6_qualify_for_ecmp(iter)) { 1065 if (rt6_qualify_for_ecmp(iter)) {
1074 *ins = iter->dst.rt6_next; 1066 *ins = iter->rt6_next;
1075 iter->rt6i_node = NULL; 1067 iter->rt6i_node = NULL;
1076 fib6_purge_rt(iter, fn, info->nl_net); 1068 fib6_purge_rt(iter, fn, info->nl_net);
1077 if (rcu_access_pointer(fn->rr_ptr) == iter) 1069 if (rcu_access_pointer(fn->rr_ptr) == iter)
@@ -1080,7 +1072,7 @@ add:
1080 nsiblings--; 1072 nsiblings--;
1081 info->nl_net->ipv6.rt6_stats->fib_rt_entries--; 1073 info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
1082 } else { 1074 } else {
1083 ins = &iter->dst.rt6_next; 1075 ins = &iter->rt6_next;
1084 } 1076 }
1085 iter = rcu_dereference_protected(*ins, 1077 iter = rcu_dereference_protected(*ins,
1086 lockdep_is_held(&rt->rt6i_table->tb6_lock)); 1078 lockdep_is_held(&rt->rt6i_table->tb6_lock));
@@ -1107,8 +1099,8 @@ void fib6_force_start_gc(struct net *net)
1107 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); 1099 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
1108} 1100}
1109 1101
1110static void fib6_update_sernum_upto_root(struct rt6_info *rt, 1102static void __fib6_update_sernum_upto_root(struct rt6_info *rt,
1111 int sernum) 1103 int sernum)
1112{ 1104{
1113 struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, 1105 struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
1114 lockdep_is_held(&rt->rt6i_table->tb6_lock)); 1106 lockdep_is_held(&rt->rt6i_table->tb6_lock));
@@ -1122,6 +1114,11 @@ static void fib6_update_sernum_upto_root(struct rt6_info *rt,
1122 } 1114 }
1123} 1115}
1124 1116
1117void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt)
1118{
1119 __fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
1120}
1121
1125/* 1122/*
1126 * Add routing information to the routing tree. 1123 * Add routing information to the routing tree.
1127 * <destination addr>/<source addr> 1124 * <destination addr>/<source addr>
@@ -1241,7 +1238,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
1241 1238
1242 err = fib6_add_rt2node(fn, rt, info, mxc, extack); 1239 err = fib6_add_rt2node(fn, rt, info, mxc, extack);
1243 if (!err) { 1240 if (!err) {
1244 fib6_update_sernum_upto_root(rt, sernum); 1241 __fib6_update_sernum_upto_root(rt, sernum);
1245 fib6_start_gc(info->nl_net, rt); 1242 fib6_start_gc(info->nl_net, rt);
1246 } 1243 }
1247 1244
@@ -1670,7 +1667,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
1670 WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE); 1667 WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
1671 1668
1672 /* Unlink it */ 1669 /* Unlink it */
1673 *rtp = rt->dst.rt6_next; 1670 *rtp = rt->rt6_next;
1674 rt->rt6i_node = NULL; 1671 rt->rt6i_node = NULL;
1675 net->ipv6.rt6_stats->fib_rt_entries--; 1672 net->ipv6.rt6_stats->fib_rt_entries--;
1676 net->ipv6.rt6_stats->fib_discarded_routes++; 1673 net->ipv6.rt6_stats->fib_discarded_routes++;
@@ -1691,6 +1688,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
1691 sibling->rt6i_nsiblings--; 1688 sibling->rt6i_nsiblings--;
1692 rt->rt6i_nsiblings = 0; 1689 rt->rt6i_nsiblings = 0;
1693 list_del_init(&rt->rt6i_siblings); 1690 list_del_init(&rt->rt6i_siblings);
1691 rt6_multipath_rebalance(next_sibling);
1694 } 1692 }
1695 1693
1696 /* Adjust walkers */ 1694 /* Adjust walkers */
@@ -1698,7 +1696,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
1698 FOR_WALKERS(net, w) { 1696 FOR_WALKERS(net, w) {
1699 if (w->state == FWS_C && w->leaf == rt) { 1697 if (w->state == FWS_C && w->leaf == rt) {
1700 RT6_TRACE("walker %p adjusted by delroute\n", w); 1698 RT6_TRACE("walker %p adjusted by delroute\n", w);
1701 w->leaf = rcu_dereference_protected(rt->dst.rt6_next, 1699 w->leaf = rcu_dereference_protected(rt->rt6_next,
1702 lockdep_is_held(&table->tb6_lock)); 1700 lockdep_is_held(&table->tb6_lock));
1703 if (!w->leaf) 1701 if (!w->leaf)
1704 w->state = FWS_U; 1702 w->state = FWS_U;
@@ -1762,7 +1760,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
1762 fib6_del_route(table, fn, rtp, info); 1760 fib6_del_route(table, fn, rtp, info);
1763 return 0; 1761 return 0;
1764 } 1762 }
1765 rtp_next = &cur->dst.rt6_next; 1763 rtp_next = &cur->rt6_next;
1766 } 1764 }
1767 return -ENOENT; 1765 return -ENOENT;
1768} 1766}
@@ -1918,7 +1916,7 @@ static int fib6_clean_node(struct fib6_walker *w)
1918 1916
1919 for_each_fib6_walker_rt(w) { 1917 for_each_fib6_walker_rt(w) {
1920 res = c->func(rt, c->arg); 1918 res = c->func(rt, c->arg);
1921 if (res < 0) { 1919 if (res == -1) {
1922 w->leaf = rt; 1920 w->leaf = rt;
1923 res = fib6_del(rt, &info); 1921 res = fib6_del(rt, &info);
1924 if (res) { 1922 if (res) {
@@ -1931,6 +1929,12 @@ static int fib6_clean_node(struct fib6_walker *w)
1931 continue; 1929 continue;
1932 } 1930 }
1933 return 0; 1931 return 0;
1932 } else if (res == -2) {
1933 if (WARN_ON(!rt->rt6i_nsiblings))
1934 continue;
1935 rt = list_last_entry(&rt->rt6i_siblings,
1936 struct rt6_info, rt6i_siblings);
1937 continue;
1934 } 1938 }
1935 WARN_ON(res != 0); 1939 WARN_ON(res != 0);
1936 } 1940 }
@@ -1942,7 +1946,8 @@ static int fib6_clean_node(struct fib6_walker *w)
1942 * Convenient frontend to tree walker. 1946 * Convenient frontend to tree walker.
1943 * 1947 *
1944 * func is called on each route. 1948 * func is called on each route.
1945 * It may return -1 -> delete this route. 1949 * It may return -2 -> skip multipath route.
1950 * -1 -> delete this route.
1946 * 0 -> continue walking 1951 * 0 -> continue walking
1947 */ 1952 */
1948 1953
@@ -2134,7 +2139,6 @@ static void fib6_net_exit(struct net *net)
2134{ 2139{
2135 unsigned int i; 2140 unsigned int i;
2136 2141
2137 rt6_ifdown(net, NULL);
2138 del_timer_sync(&net->ipv6.ip6_fib_timer); 2142 del_timer_sync(&net->ipv6.ip6_fib_timer);
2139 2143
2140 for (i = 0; i < FIB6_TABLE_HASHSZ; i++) { 2144 for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
@@ -2173,8 +2177,8 @@ int __init fib6_init(void)
2173 if (ret) 2177 if (ret)
2174 goto out_kmem_cache_create; 2178 goto out_kmem_cache_create;
2175 2179
2176 ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, 2180 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
2177 0); 2181 inet6_dump_fib, 0);
2178 if (ret) 2182 if (ret)
2179 goto out_unregister_subsys; 2183 goto out_unregister_subsys;
2180 2184
@@ -2239,7 +2243,7 @@ static int ipv6_route_yield(struct fib6_walker *w)
2239 2243
2240 do { 2244 do {
2241 iter->w.leaf = rcu_dereference_protected( 2245 iter->w.leaf = rcu_dereference_protected(
2242 iter->w.leaf->dst.rt6_next, 2246 iter->w.leaf->rt6_next,
2243 lockdep_is_held(&iter->tbl->tb6_lock)); 2247 lockdep_is_held(&iter->tbl->tb6_lock));
2244 iter->skip--; 2248 iter->skip--;
2245 if (!iter->skip && iter->w.leaf) 2249 if (!iter->skip && iter->w.leaf)
@@ -2305,7 +2309,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2305 if (!v) 2309 if (!v)
2306 goto iter_table; 2310 goto iter_table;
2307 2311
2308 n = rcu_dereference_bh(((struct rt6_info *)v)->dst.rt6_next); 2312 n = rcu_dereference_bh(((struct rt6_info *)v)->rt6_next);
2309 if (n) { 2313 if (n) {
2310 ++*pos; 2314 ++*pos;
2311 return n; 2315 return n;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 7f59c8fabeeb..3dab664ff503 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -836,7 +836,6 @@ static int ip6fl_seq_release(struct inode *inode, struct file *file)
836} 836}
837 837
838static const struct file_operations ip6fl_seq_fops = { 838static const struct file_operations ip6fl_seq_fops = {
839 .owner = THIS_MODULE,
840 .open = ip6fl_seq_open, 839 .open = ip6fl_seq_open,
841 .read = seq_read, 840 .read = seq_read,
842 .llseek = seq_lseek, 841 .llseek = seq_lseek,
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 873549228ccb..3c353125546d 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -55,6 +55,8 @@
55#include <net/ip6_route.h> 55#include <net/ip6_route.h>
56#include <net/ip6_tunnel.h> 56#include <net/ip6_tunnel.h>
57#include <net/gre.h> 57#include <net/gre.h>
58#include <net/erspan.h>
59#include <net/dst_metadata.h>
58 60
59 61
60static bool log_ecn_error = true; 62static bool log_ecn_error = true;
@@ -68,11 +70,13 @@ static unsigned int ip6gre_net_id __read_mostly;
68struct ip6gre_net { 70struct ip6gre_net {
69 struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE]; 71 struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
70 72
73 struct ip6_tnl __rcu *collect_md_tun;
71 struct net_device *fb_tunnel_dev; 74 struct net_device *fb_tunnel_dev;
72}; 75};
73 76
74static struct rtnl_link_ops ip6gre_link_ops __read_mostly; 77static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
75static struct rtnl_link_ops ip6gre_tap_ops __read_mostly; 78static struct rtnl_link_ops ip6gre_tap_ops __read_mostly;
79static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly;
76static int ip6gre_tunnel_init(struct net_device *dev); 80static int ip6gre_tunnel_init(struct net_device *dev);
77static void ip6gre_tunnel_setup(struct net_device *dev); 81static void ip6gre_tunnel_setup(struct net_device *dev);
78static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t); 82static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
@@ -121,7 +125,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
121 unsigned int h1 = HASH_KEY(key); 125 unsigned int h1 = HASH_KEY(key);
122 struct ip6_tnl *t, *cand = NULL; 126 struct ip6_tnl *t, *cand = NULL;
123 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); 127 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
124 int dev_type = (gre_proto == htons(ETH_P_TEB)) ? 128 int dev_type = (gre_proto == htons(ETH_P_TEB) ||
129 gre_proto == htons(ETH_P_ERSPAN)) ?
125 ARPHRD_ETHER : ARPHRD_IP6GRE; 130 ARPHRD_ETHER : ARPHRD_IP6GRE;
126 int score, cand_score = 4; 131 int score, cand_score = 4;
127 132
@@ -226,6 +231,10 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
226 if (cand) 231 if (cand)
227 return cand; 232 return cand;
228 233
234 t = rcu_dereference(ign->collect_md_tun);
235 if (t && t->dev->flags & IFF_UP)
236 return t;
237
229 dev = ign->fb_tunnel_dev; 238 dev = ign->fb_tunnel_dev;
230 if (dev->flags & IFF_UP) 239 if (dev->flags & IFF_UP)
231 return netdev_priv(dev); 240 return netdev_priv(dev);
@@ -261,6 +270,9 @@ static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
261{ 270{
262 struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t); 271 struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
263 272
273 if (t->parms.collect_md)
274 rcu_assign_pointer(ign->collect_md_tun, t);
275
264 rcu_assign_pointer(t->next, rtnl_dereference(*tp)); 276 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
265 rcu_assign_pointer(*tp, t); 277 rcu_assign_pointer(*tp, t);
266} 278}
@@ -270,6 +282,9 @@ static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
270 struct ip6_tnl __rcu **tp; 282 struct ip6_tnl __rcu **tp;
271 struct ip6_tnl *iter; 283 struct ip6_tnl *iter;
272 284
285 if (t->parms.collect_md)
286 rcu_assign_pointer(ign->collect_md_tun, NULL);
287
273 for (tp = ip6gre_bucket(ign, t); 288 for (tp = ip6gre_bucket(ign, t);
274 (iter = rtnl_dereference(*tp)) != NULL; 289 (iter = rtnl_dereference(*tp)) != NULL;
275 tp = &iter->next) { 290 tp = &iter->next) {
@@ -461,7 +476,94 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
461 &ipv6h->saddr, &ipv6h->daddr, tpi->key, 476 &ipv6h->saddr, &ipv6h->daddr, tpi->key,
462 tpi->proto); 477 tpi->proto);
463 if (tunnel) { 478 if (tunnel) {
464 ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error); 479 if (tunnel->parms.collect_md) {
480 struct metadata_dst *tun_dst;
481 __be64 tun_id;
482 __be16 flags;
483
484 flags = tpi->flags;
485 tun_id = key32_to_tunnel_id(tpi->key);
486
487 tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, 0);
488 if (!tun_dst)
489 return PACKET_REJECT;
490
491 ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
492 } else {
493 ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
494 }
495
496 return PACKET_RCVD;
497 }
498
499 return PACKET_REJECT;
500}
501
502static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len,
503 struct tnl_ptk_info *tpi)
504{
505 struct erspan_base_hdr *ershdr;
506 struct erspan_metadata *pkt_md;
507 const struct ipv6hdr *ipv6h;
508 struct erspan_md2 *md2;
509 struct ip6_tnl *tunnel;
510 u8 ver;
511
512 if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
513 return PACKET_REJECT;
514
515 ipv6h = ipv6_hdr(skb);
516 ershdr = (struct erspan_base_hdr *)skb->data;
517 ver = ershdr->ver;
518 tpi->key = cpu_to_be32(get_session_id(ershdr));
519
520 tunnel = ip6gre_tunnel_lookup(skb->dev,
521 &ipv6h->saddr, &ipv6h->daddr, tpi->key,
522 tpi->proto);
523 if (tunnel) {
524 int len = erspan_hdr_len(ver);
525
526 if (unlikely(!pskb_may_pull(skb, len)))
527 return PACKET_REJECT;
528
529 ershdr = (struct erspan_base_hdr *)skb->data;
530 pkt_md = (struct erspan_metadata *)(ershdr + 1);
531
532 if (__iptunnel_pull_header(skb, len,
533 htons(ETH_P_TEB),
534 false, false) < 0)
535 return PACKET_REJECT;
536
537 if (tunnel->parms.collect_md) {
538 struct metadata_dst *tun_dst;
539 struct ip_tunnel_info *info;
540 struct erspan_metadata *md;
541 __be64 tun_id;
542 __be16 flags;
543
544 tpi->flags |= TUNNEL_KEY;
545 flags = tpi->flags;
546 tun_id = key32_to_tunnel_id(tpi->key);
547
548 tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id,
549 sizeof(*md));
550 if (!tun_dst)
551 return PACKET_REJECT;
552
553 info = &tun_dst->u.tun_info;
554 md = ip_tunnel_info_opts(info);
555 md->version = ver;
556 md2 = &md->u.md2;
557 memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
558 ERSPAN_V2_MDSIZE);
559 info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
560 info->options_len = sizeof(*md);
561
562 ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
563
564 } else {
565 ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
566 }
465 567
466 return PACKET_RCVD; 568 return PACKET_RCVD;
467 } 569 }
@@ -482,9 +584,17 @@ static int gre_rcv(struct sk_buff *skb)
482 if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false)) 584 if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false))
483 goto drop; 585 goto drop;
484 586
587 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
588 tpi.proto == htons(ETH_P_ERSPAN2))) {
589 if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD)
590 return 0;
591 goto out;
592 }
593
485 if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD) 594 if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD)
486 return 0; 595 return 0;
487 596
597out:
488 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); 598 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
489drop: 599drop:
490 kfree_skb(skb); 600 kfree_skb(skb);
@@ -497,6 +607,78 @@ static int gre_handle_offloads(struct sk_buff *skb, bool csum)
497 csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); 607 csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
498} 608}
499 609
610static void prepare_ip6gre_xmit_ipv4(struct sk_buff *skb,
611 struct net_device *dev,
612 struct flowi6 *fl6, __u8 *dsfield,
613 int *encap_limit)
614{
615 const struct iphdr *iph = ip_hdr(skb);
616 struct ip6_tnl *t = netdev_priv(dev);
617
618 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
619 *encap_limit = t->parms.encap_limit;
620
621 memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6));
622
623 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
624 *dsfield = ipv4_get_dsfield(iph);
625 else
626 *dsfield = ip6_tclass(t->parms.flowinfo);
627
628 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
629 fl6->flowi6_mark = skb->mark;
630 else
631 fl6->flowi6_mark = t->parms.fwmark;
632
633 fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
634}
635
636static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb,
637 struct net_device *dev,
638 struct flowi6 *fl6, __u8 *dsfield,
639 int *encap_limit)
640{
641 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
642 struct ip6_tnl *t = netdev_priv(dev);
643 __u16 offset;
644
645 offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
646 /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
647
648 if (offset > 0) {
649 struct ipv6_tlv_tnl_enc_lim *tel;
650
651 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
652 if (tel->encap_limit == 0) {
653 icmpv6_send(skb, ICMPV6_PARAMPROB,
654 ICMPV6_HDR_FIELD, offset + 2);
655 return -1;
656 }
657 *encap_limit = tel->encap_limit - 1;
658 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
659 *encap_limit = t->parms.encap_limit;
660 }
661
662 memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6));
663
664 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
665 *dsfield = ipv6_get_dsfield(ipv6h);
666 else
667 *dsfield = ip6_tclass(t->parms.flowinfo);
668
669 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
670 fl6->flowlabel |= ip6_flowlabel(ipv6h);
671
672 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
673 fl6->flowi6_mark = skb->mark;
674 else
675 fl6->flowi6_mark = t->parms.fwmark;
676
677 fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
678
679 return 0;
680}
681
500static netdev_tx_t __gre6_xmit(struct sk_buff *skb, 682static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
501 struct net_device *dev, __u8 dsfield, 683 struct net_device *dev, __u8 dsfield,
502 struct flowi6 *fl6, int encap_limit, 684 struct flowi6 *fl6, int encap_limit,
@@ -518,8 +700,38 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
518 700
519 /* Push GRE header. */ 701 /* Push GRE header. */
520 protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; 702 protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
521 gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, 703
522 protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); 704 if (tunnel->parms.collect_md) {
705 struct ip_tunnel_info *tun_info;
706 const struct ip_tunnel_key *key;
707 __be16 flags;
708
709 tun_info = skb_tunnel_info(skb);
710 if (unlikely(!tun_info ||
711 !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
712 ip_tunnel_info_af(tun_info) != AF_INET6))
713 return -EINVAL;
714
715 key = &tun_info->key;
716 memset(fl6, 0, sizeof(*fl6));
717 fl6->flowi6_proto = IPPROTO_GRE;
718 fl6->daddr = key->u.ipv6.dst;
719 fl6->flowlabel = key->label;
720 fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
721
722 dsfield = key->tos;
723 flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
724 tunnel->tun_hlen = gre_calc_hlen(flags);
725
726 gre_build_header(skb, tunnel->tun_hlen,
727 flags, protocol,
728 tunnel_id_to_key32(tun_info->key.tun_id), 0);
729
730 } else {
731 gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
732 protocol, tunnel->parms.o_key,
733 htonl(tunnel->o_seqno));
734 }
523 735
524 return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, 736 return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
525 NEXTHDR_GRE); 737 NEXTHDR_GRE);
@@ -528,30 +740,17 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
528static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) 740static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
529{ 741{
530 struct ip6_tnl *t = netdev_priv(dev); 742 struct ip6_tnl *t = netdev_priv(dev);
531 const struct iphdr *iph = ip_hdr(skb);
532 int encap_limit = -1; 743 int encap_limit = -1;
533 struct flowi6 fl6; 744 struct flowi6 fl6;
534 __u8 dsfield; 745 __u8 dsfield = 0;
535 __u32 mtu; 746 __u32 mtu;
536 int err; 747 int err;
537 748
538 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 749 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
539 750
540 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 751 if (!t->parms.collect_md)
541 encap_limit = t->parms.encap_limit; 752 prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
542 753 &dsfield, &encap_limit);
543 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
544
545 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
546 dsfield = ipv4_get_dsfield(iph);
547 else
548 dsfield = ip6_tclass(t->parms.flowinfo);
549 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
550 fl6.flowi6_mark = skb->mark;
551 else
552 fl6.flowi6_mark = t->parms.fwmark;
553
554 fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
555 754
556 err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); 755 err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
557 if (err) 756 if (err)
@@ -575,46 +774,17 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
575 struct ip6_tnl *t = netdev_priv(dev); 774 struct ip6_tnl *t = netdev_priv(dev);
576 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 775 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
577 int encap_limit = -1; 776 int encap_limit = -1;
578 __u16 offset;
579 struct flowi6 fl6; 777 struct flowi6 fl6;
580 __u8 dsfield; 778 __u8 dsfield = 0;
581 __u32 mtu; 779 __u32 mtu;
582 int err; 780 int err;
583 781
584 if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr)) 782 if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
585 return -1; 783 return -1;
586 784
587 offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); 785 if (!t->parms.collect_md &&
588 /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ 786 prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit))
589 ipv6h = ipv6_hdr(skb); 787 return -1;
590
591 if (offset > 0) {
592 struct ipv6_tlv_tnl_enc_lim *tel;
593 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
594 if (tel->encap_limit == 0) {
595 icmpv6_send(skb, ICMPV6_PARAMPROB,
596 ICMPV6_HDR_FIELD, offset + 2);
597 return -1;
598 }
599 encap_limit = tel->encap_limit - 1;
600 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
601 encap_limit = t->parms.encap_limit;
602
603 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
604
605 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
606 dsfield = ipv6_get_dsfield(ipv6h);
607 else
608 dsfield = ip6_tclass(t->parms.flowinfo);
609
610 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
611 fl6.flowlabel |= ip6_flowlabel(ipv6h);
612 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
613 fl6.flowi6_mark = skb->mark;
614 else
615 fl6.flowi6_mark = t->parms.fwmark;
616
617 fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
618 788
619 if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM))) 789 if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
620 return -1; 790 return -1;
@@ -661,7 +831,8 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
661 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 831 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
662 encap_limit = t->parms.encap_limit; 832 encap_limit = t->parms.encap_limit;
663 833
664 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); 834 if (!t->parms.collect_md)
835 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
665 836
666 err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); 837 err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
667 if (err) 838 if (err)
@@ -706,6 +877,137 @@ tx_err:
706 return NETDEV_TX_OK; 877 return NETDEV_TX_OK;
707} 878}
708 879
880static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
881 struct net_device *dev)
882{
883 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
884 struct ip6_tnl *t = netdev_priv(dev);
885 struct dst_entry *dst = skb_dst(skb);
886 struct net_device_stats *stats;
887 bool truncate = false;
888 int encap_limit = -1;
889 __u8 dsfield = false;
890 struct flowi6 fl6;
891 int err = -EINVAL;
892 __u32 mtu;
893
894 if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
895 goto tx_err;
896
897 if (gre_handle_offloads(skb, false))
898 goto tx_err;
899
900 if (skb->len > dev->mtu + dev->hard_header_len) {
901 pskb_trim(skb, dev->mtu + dev->hard_header_len);
902 truncate = true;
903 }
904
905 t->parms.o_flags &= ~TUNNEL_KEY;
906 IPCB(skb)->flags = 0;
907
908 /* For collect_md mode, derive fl6 from the tunnel key,
909 * for native mode, call prepare_ip6gre_xmit_{ipv4,ipv6}.
910 */
911 if (t->parms.collect_md) {
912 struct ip_tunnel_info *tun_info;
913 const struct ip_tunnel_key *key;
914 struct erspan_metadata *md;
915 __be32 tun_id;
916
917 tun_info = skb_tunnel_info(skb);
918 if (unlikely(!tun_info ||
919 !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
920 ip_tunnel_info_af(tun_info) != AF_INET6))
921 return -EINVAL;
922
923 key = &tun_info->key;
924 memset(&fl6, 0, sizeof(fl6));
925 fl6.flowi6_proto = IPPROTO_GRE;
926 fl6.daddr = key->u.ipv6.dst;
927 fl6.flowlabel = key->label;
928 fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
929
930 dsfield = key->tos;
931 md = ip_tunnel_info_opts(tun_info);
932 if (!md)
933 goto tx_err;
934
935 tun_id = tunnel_id_to_key32(key->tun_id);
936 if (md->version == 1) {
937 erspan_build_header(skb,
938 ntohl(tun_id),
939 ntohl(md->u.index), truncate,
940 false);
941 } else if (md->version == 2) {
942 erspan_build_header_v2(skb,
943 ntohl(tun_id),
944 md->u.md2.dir,
945 get_hwid(&md->u.md2),
946 truncate, false);
947 }
948 } else {
949 switch (skb->protocol) {
950 case htons(ETH_P_IP):
951 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
952 prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
953 &dsfield, &encap_limit);
954 break;
955 case htons(ETH_P_IPV6):
956 if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
957 goto tx_err;
958 if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6,
959 &dsfield, &encap_limit))
960 goto tx_err;
961 break;
962 default:
963 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
964 break;
965 }
966
967 if (t->parms.erspan_ver == 1)
968 erspan_build_header(skb, ntohl(t->parms.o_key),
969 t->parms.index,
970 truncate, false);
971 else
972 erspan_build_header_v2(skb, ntohl(t->parms.o_key),
973 t->parms.dir,
974 t->parms.hwid,
975 truncate, false);
976 fl6.daddr = t->parms.raddr;
977 }
978
979 /* Push GRE header. */
980 gre_build_header(skb, 8, TUNNEL_SEQ,
981 htons(ETH_P_ERSPAN), 0, htonl(t->o_seqno++));
982
983 /* TooBig packet may have updated dst->dev's mtu */
984 if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
985 dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
986
987 err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
988 NEXTHDR_GRE);
989 if (err != 0) {
990 /* XXX: send ICMP error even if DF is not set. */
991 if (err == -EMSGSIZE) {
992 if (skb->protocol == htons(ETH_P_IP))
993 icmp_send(skb, ICMP_DEST_UNREACH,
994 ICMP_FRAG_NEEDED, htonl(mtu));
995 else
996 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
997 }
998
999 goto tx_err;
1000 }
1001 return NETDEV_TX_OK;
1002
1003tx_err:
1004 stats = &t->dev->stats;
1005 stats->tx_errors++;
1006 stats->tx_dropped++;
1007 kfree_skb(skb);
1008 return NETDEV_TX_OK;
1009}
1010
709static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) 1011static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
710{ 1012{
711 struct net_device *dev = t->dev; 1013 struct net_device *dev = t->dev;
@@ -1079,6 +1381,10 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
1079 if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1381 if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1080 dev->mtu -= 8; 1382 dev->mtu -= 8;
1081 1383
1384 if (tunnel->parms.collect_md) {
1385 dev->features |= NETIF_F_NETNS_LOCAL;
1386 netif_keep_dst(dev);
1387 }
1082 ip6gre_tnl_init_features(dev); 1388 ip6gre_tnl_init_features(dev);
1083 1389
1084 return 0; 1390 return 0;
@@ -1095,6 +1401,9 @@ static int ip6gre_tunnel_init(struct net_device *dev)
1095 1401
1096 tunnel = netdev_priv(dev); 1402 tunnel = netdev_priv(dev);
1097 1403
1404 if (tunnel->parms.collect_md)
1405 return 0;
1406
1098 memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); 1407 memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
1099 memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr)); 1408 memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
1100 1409
@@ -1117,7 +1426,6 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
1117 dev_hold(dev); 1426 dev_hold(dev);
1118} 1427}
1119 1428
1120
1121static struct inet6_protocol ip6gre_protocol __read_mostly = { 1429static struct inet6_protocol ip6gre_protocol __read_mostly = {
1122 .handler = gre_rcv, 1430 .handler = gre_rcv,
1123 .err_handler = ip6gre_err, 1431 .err_handler = ip6gre_err,
@@ -1132,7 +1440,8 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
1132 1440
1133 for_each_netdev_safe(net, dev, aux) 1441 for_each_netdev_safe(net, dev, aux)
1134 if (dev->rtnl_link_ops == &ip6gre_link_ops || 1442 if (dev->rtnl_link_ops == &ip6gre_link_ops ||
1135 dev->rtnl_link_ops == &ip6gre_tap_ops) 1443 dev->rtnl_link_ops == &ip6gre_tap_ops ||
1444 dev->rtnl_link_ops == &ip6erspan_tap_ops)
1136 unregister_netdevice_queue(dev, head); 1445 unregister_netdevice_queue(dev, head);
1137 1446
1138 for (prio = 0; prio < 4; prio++) { 1447 for (prio = 0; prio < 4; prio++) {
@@ -1254,6 +1563,70 @@ out:
1254 return ip6gre_tunnel_validate(tb, data, extack); 1563 return ip6gre_tunnel_validate(tb, data, extack);
1255} 1564}
1256 1565
1566static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1567 struct netlink_ext_ack *extack)
1568{
1569 __be16 flags = 0;
1570 int ret, ver = 0;
1571
1572 if (!data)
1573 return 0;
1574
1575 ret = ip6gre_tap_validate(tb, data, extack);
1576 if (ret)
1577 return ret;
1578
1579 /* ERSPAN should only have GRE sequence and key flag */
1580 if (data[IFLA_GRE_OFLAGS])
1581 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1582 if (data[IFLA_GRE_IFLAGS])
1583 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1584 if (!data[IFLA_GRE_COLLECT_METADATA] &&
1585 flags != (GRE_SEQ | GRE_KEY))
1586 return -EINVAL;
1587
1588 /* ERSPAN Session ID only has 10-bit. Since we reuse
1589 * 32-bit key field as ID, check it's range.
1590 */
1591 if (data[IFLA_GRE_IKEY] &&
1592 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1593 return -EINVAL;
1594
1595 if (data[IFLA_GRE_OKEY] &&
1596 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1597 return -EINVAL;
1598
1599 if (data[IFLA_GRE_ERSPAN_VER]) {
1600 ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1601 if (ver != 1 && ver != 2)
1602 return -EINVAL;
1603 }
1604
1605 if (ver == 1) {
1606 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1607 u32 index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1608
1609 if (index & ~INDEX_MASK)
1610 return -EINVAL;
1611 }
1612 } else if (ver == 2) {
1613 if (data[IFLA_GRE_ERSPAN_DIR]) {
1614 u16 dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1615
1616 if (dir & ~(DIR_MASK >> DIR_OFFSET))
1617 return -EINVAL;
1618 }
1619
1620 if (data[IFLA_GRE_ERSPAN_HWID]) {
1621 u16 hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1622
1623 if (hwid & ~(HWID_MASK >> HWID_OFFSET))
1624 return -EINVAL;
1625 }
1626 }
1627
1628 return 0;
1629}
1257 1630
1258static void ip6gre_netlink_parms(struct nlattr *data[], 1631static void ip6gre_netlink_parms(struct nlattr *data[],
1259 struct __ip6_tnl_parm *parms) 1632 struct __ip6_tnl_parm *parms)
@@ -1300,6 +1673,22 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
1300 1673
1301 if (data[IFLA_GRE_FWMARK]) 1674 if (data[IFLA_GRE_FWMARK])
1302 parms->fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); 1675 parms->fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1676
1677 if (data[IFLA_GRE_COLLECT_METADATA])
1678 parms->collect_md = true;
1679
1680 if (data[IFLA_GRE_ERSPAN_VER])
1681 parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1682
1683 if (parms->erspan_ver == 1) {
1684 if (data[IFLA_GRE_ERSPAN_INDEX])
1685 parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1686 } else if (parms->erspan_ver == 2) {
1687 if (data[IFLA_GRE_ERSPAN_DIR])
1688 parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1689 if (data[IFLA_GRE_ERSPAN_HWID])
1690 parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1691 }
1303} 1692}
1304 1693
1305static int ip6gre_tap_init(struct net_device *dev) 1694static int ip6gre_tap_init(struct net_device *dev)
@@ -1326,6 +1715,59 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
1326 .ndo_get_iflink = ip6_tnl_get_iflink, 1715 .ndo_get_iflink = ip6_tnl_get_iflink,
1327}; 1716};
1328 1717
1718static int ip6erspan_tap_init(struct net_device *dev)
1719{
1720 struct ip6_tnl *tunnel;
1721 int t_hlen;
1722 int ret;
1723
1724 tunnel = netdev_priv(dev);
1725
1726 tunnel->dev = dev;
1727 tunnel->net = dev_net(dev);
1728 strcpy(tunnel->parms.name, dev->name);
1729
1730 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1731 if (!dev->tstats)
1732 return -ENOMEM;
1733
1734 ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1735 if (ret) {
1736 free_percpu(dev->tstats);
1737 dev->tstats = NULL;
1738 return ret;
1739 }
1740
1741 tunnel->tun_hlen = 8;
1742 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1743 erspan_hdr_len(tunnel->parms.erspan_ver);
1744 t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
1745
1746 dev->hard_header_len = LL_MAX_HEADER + t_hlen;
1747 dev->mtu = ETH_DATA_LEN - t_hlen;
1748 if (dev->type == ARPHRD_ETHER)
1749 dev->mtu -= ETH_HLEN;
1750 if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1751 dev->mtu -= 8;
1752
1753 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1754 tunnel = netdev_priv(dev);
1755 ip6gre_tnl_link_config(tunnel, 1);
1756
1757 return 0;
1758}
1759
1760static const struct net_device_ops ip6erspan_netdev_ops = {
1761 .ndo_init = ip6erspan_tap_init,
1762 .ndo_uninit = ip6gre_tunnel_uninit,
1763 .ndo_start_xmit = ip6erspan_tunnel_xmit,
1764 .ndo_set_mac_address = eth_mac_addr,
1765 .ndo_validate_addr = eth_validate_addr,
1766 .ndo_change_mtu = ip6_tnl_change_mtu,
1767 .ndo_get_stats64 = ip_tunnel_get_stats64,
1768 .ndo_get_iflink = ip6_tnl_get_iflink,
1769};
1770
1329static void ip6gre_tap_setup(struct net_device *dev) 1771static void ip6gre_tap_setup(struct net_device *dev)
1330{ 1772{
1331 1773
@@ -1396,8 +1838,13 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
1396 1838
1397 ip6gre_netlink_parms(data, &nt->parms); 1839 ip6gre_netlink_parms(data, &nt->parms);
1398 1840
1399 if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) 1841 if (nt->parms.collect_md) {
1400 return -EEXIST; 1842 if (rtnl_dereference(ign->collect_md_tun))
1843 return -EEXIST;
1844 } else {
1845 if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
1846 return -EEXIST;
1847 }
1401 1848
1402 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 1849 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1403 eth_hw_addr_random(dev); 1850 eth_hw_addr_random(dev);
@@ -1500,8 +1947,12 @@ static size_t ip6gre_get_size(const struct net_device *dev)
1500 nla_total_size(2) + 1947 nla_total_size(2) +
1501 /* IFLA_GRE_ENCAP_DPORT */ 1948 /* IFLA_GRE_ENCAP_DPORT */
1502 nla_total_size(2) + 1949 nla_total_size(2) +
1950 /* IFLA_GRE_COLLECT_METADATA */
1951 nla_total_size(0) +
1503 /* IFLA_GRE_FWMARK */ 1952 /* IFLA_GRE_FWMARK */
1504 nla_total_size(4) + 1953 nla_total_size(4) +
1954 /* IFLA_GRE_ERSPAN_INDEX */
1955 nla_total_size(4) +
1505 0; 1956 0;
1506} 1957}
1507 1958
@@ -1523,7 +1974,8 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1523 nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || 1974 nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
1524 nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) || 1975 nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
1525 nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) || 1976 nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) ||
1526 nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark)) 1977 nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark) ||
1978 nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
1527 goto nla_put_failure; 1979 goto nla_put_failure;
1528 1980
1529 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, 1981 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
@@ -1536,6 +1988,24 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1536 t->encap.flags)) 1988 t->encap.flags))
1537 goto nla_put_failure; 1989 goto nla_put_failure;
1538 1990
1991 if (p->collect_md) {
1992 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1993 goto nla_put_failure;
1994 }
1995
1996 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
1997 goto nla_put_failure;
1998
1999 if (p->erspan_ver == 1) {
2000 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
2001 goto nla_put_failure;
2002 } else if (p->erspan_ver == 2) {
2003 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir))
2004 goto nla_put_failure;
2005 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid))
2006 goto nla_put_failure;
2007 }
2008
1539 return 0; 2009 return 0;
1540 2010
1541nla_put_failure: 2011nla_put_failure:
@@ -1558,9 +2028,28 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
1558 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, 2028 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1559 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, 2029 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1560 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, 2030 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
2031 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
1561 [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, 2032 [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
2033 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
2034 [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
2035 [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
2036 [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
1562}; 2037};
1563 2038
2039static void ip6erspan_tap_setup(struct net_device *dev)
2040{
2041 ether_setup(dev);
2042
2043 dev->netdev_ops = &ip6erspan_netdev_ops;
2044 dev->needs_free_netdev = true;
2045 dev->priv_destructor = ip6gre_dev_free;
2046
2047 dev->features |= NETIF_F_NETNS_LOCAL;
2048 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
2049 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
2050 netif_keep_dst(dev);
2051}
2052
1564static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { 2053static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
1565 .kind = "ip6gre", 2054 .kind = "ip6gre",
1566 .maxtype = IFLA_GRE_MAX, 2055 .maxtype = IFLA_GRE_MAX,
@@ -1590,6 +2079,20 @@ static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
1590 .get_link_net = ip6_tnl_get_link_net, 2079 .get_link_net = ip6_tnl_get_link_net,
1591}; 2080};
1592 2081
2082static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = {
2083 .kind = "ip6erspan",
2084 .maxtype = IFLA_GRE_MAX,
2085 .policy = ip6gre_policy,
2086 .priv_size = sizeof(struct ip6_tnl),
2087 .setup = ip6erspan_tap_setup,
2088 .validate = ip6erspan_tap_validate,
2089 .newlink = ip6gre_newlink,
2090 .changelink = ip6gre_changelink,
2091 .get_size = ip6gre_get_size,
2092 .fill_info = ip6gre_fill_info,
2093 .get_link_net = ip6_tnl_get_link_net,
2094};
2095
1593/* 2096/*
1594 * And now the modules code and kernel interface. 2097 * And now the modules code and kernel interface.
1595 */ 2098 */
@@ -1618,9 +2121,15 @@ static int __init ip6gre_init(void)
1618 if (err < 0) 2121 if (err < 0)
1619 goto tap_ops_failed; 2122 goto tap_ops_failed;
1620 2123
2124 err = rtnl_link_register(&ip6erspan_tap_ops);
2125 if (err < 0)
2126 goto erspan_link_failed;
2127
1621out: 2128out:
1622 return err; 2129 return err;
1623 2130
2131erspan_link_failed:
2132 rtnl_link_unregister(&ip6gre_tap_ops);
1624tap_ops_failed: 2133tap_ops_failed:
1625 rtnl_link_unregister(&ip6gre_link_ops); 2134 rtnl_link_unregister(&ip6gre_link_ops);
1626rtnl_link_failed: 2135rtnl_link_failed:
@@ -1634,6 +2143,7 @@ static void __exit ip6gre_fini(void)
1634{ 2143{
1635 rtnl_link_unregister(&ip6gre_tap_ops); 2144 rtnl_link_unregister(&ip6gre_tap_ops);
1636 rtnl_link_unregister(&ip6gre_link_ops); 2145 rtnl_link_unregister(&ip6gre_link_ops);
2146 rtnl_link_unregister(&ip6erspan_tap_ops);
1637 inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE); 2147 inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
1638 unregister_pernet_device(&ip6gre_net_ops); 2148 unregister_pernet_device(&ip6gre_net_ops);
1639} 2149}
@@ -1645,4 +2155,5 @@ MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
1645MODULE_DESCRIPTION("GRE over IPv6 tunneling device"); 2155MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
1646MODULE_ALIAS_RTNL_LINK("ip6gre"); 2156MODULE_ALIAS_RTNL_LINK("ip6gre");
1647MODULE_ALIAS_RTNL_LINK("ip6gretap"); 2157MODULE_ALIAS_RTNL_LINK("ip6gretap");
2158MODULE_ALIAS_RTNL_LINK("ip6erspan");
1648MODULE_ALIAS_NETDEV("ip6gre0"); 2159MODULE_ALIAS_NETDEV("ip6gre0");
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3763dc01e374..997c7f19ad62 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -138,6 +138,14 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
138 return ret; 138 return ret;
139 } 139 }
140 140
141#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
142 /* Policy lookup after SNAT yielded a new policy */
143 if (skb_dst(skb)->xfrm) {
144 IPCB(skb)->flags |= IPSKB_REROUTED;
145 return dst_output(net, sk, skb);
146 }
147#endif
148
141 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 149 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
142 dst_allfrag(skb_dst(skb)) || 150 dst_allfrag(skb_dst(skb)) ||
143 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) 151 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
@@ -370,7 +378,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
370 return dst_output(net, sk, skb); 378 return dst_output(net, sk, skb);
371} 379}
372 380
373static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) 381unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
374{ 382{
375 unsigned int mtu; 383 unsigned int mtu;
376 struct inet6_dev *idev; 384 struct inet6_dev *idev;
@@ -390,6 +398,7 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
390 398
391 return mtu; 399 return mtu;
392} 400}
401EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
393 402
394static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 403static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
395{ 404{
@@ -1209,7 +1218,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1209 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); 1218 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1210 else 1219 else
1211 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1220 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1212 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path); 1221 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1213 if (np->frag_size < mtu) { 1222 if (np->frag_size < mtu) {
1214 if (np->frag_size) 1223 if (np->frag_size)
1215 mtu = np->frag_size; 1224 mtu = np->frag_size;
@@ -1217,7 +1226,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1217 if (mtu < IPV6_MIN_MTU) 1226 if (mtu < IPV6_MIN_MTU)
1218 return -EINVAL; 1227 return -EINVAL;
1219 cork->base.fragsize = mtu; 1228 cork->base.fragsize = mtu;
1220 if (dst_allfrag(rt->dst.path)) 1229 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1221 cork->base.flags |= IPCORK_ALLFRAG; 1230 cork->base.flags |= IPCORK_ALLFRAG;
1222 cork->base.length = 0; 1231 cork->base.length = 0;
1223 1232
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 1ee5584c3555..4b15fe928278 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -860,7 +860,7 @@ int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
860 struct metadata_dst *tun_dst, 860 struct metadata_dst *tun_dst,
861 bool log_ecn_err) 861 bool log_ecn_err)
862{ 862{
863 return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate, 863 return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate,
864 log_ecn_err); 864 log_ecn_err);
865} 865}
866EXPORT_SYMBOL(ip6_tnl_rcv); 866EXPORT_SYMBOL(ip6_tnl_rcv);
@@ -978,6 +978,9 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
978 int ret = 0; 978 int ret = 0;
979 struct net *net = t->net; 979 struct net *net = t->net;
980 980
981 if (t->parms.collect_md)
982 return 1;
983
981 if ((p->flags & IP6_TNL_F_CAP_XMIT) || 984 if ((p->flags & IP6_TNL_F_CAP_XMIT) ||
982 ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && 985 ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
983 (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) { 986 (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) {
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 8c184f84f353..fa3ae1cb50d3 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -626,6 +626,7 @@ static void vti6_link_config(struct ip6_tnl *t)
626{ 626{
627 struct net_device *dev = t->dev; 627 struct net_device *dev = t->dev;
628 struct __ip6_tnl_parm *p = &t->parms; 628 struct __ip6_tnl_parm *p = &t->parms;
629 struct net_device *tdev = NULL;
629 630
630 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); 631 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
631 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); 632 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
@@ -638,6 +639,25 @@ static void vti6_link_config(struct ip6_tnl *t)
638 dev->flags |= IFF_POINTOPOINT; 639 dev->flags |= IFF_POINTOPOINT;
639 else 640 else
640 dev->flags &= ~IFF_POINTOPOINT; 641 dev->flags &= ~IFF_POINTOPOINT;
642
643 if (p->flags & IP6_TNL_F_CAP_XMIT) {
644 int strict = (ipv6_addr_type(&p->raddr) &
645 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
646 struct rt6_info *rt = rt6_lookup(t->net,
647 &p->raddr, &p->laddr,
648 p->link, strict);
649
650 if (rt)
651 tdev = rt->dst.dev;
652 ip6_rt_put(rt);
653 }
654
655 if (!tdev && p->link)
656 tdev = __dev_get_by_index(t->net, p->link);
657
658 if (tdev)
659 dev->mtu = max_t(int, tdev->mtu - dev->hard_header_len,
660 IPV6_MIN_MTU);
641} 661}
642 662
643/** 663/**
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index a2e1a864eb46..9f6cace9c817 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -477,7 +477,6 @@ static int ip6mr_vif_open(struct inode *inode, struct file *file)
477} 477}
478 478
479static const struct file_operations ip6mr_vif_fops = { 479static const struct file_operations ip6mr_vif_fops = {
480 .owner = THIS_MODULE,
481 .open = ip6mr_vif_open, 480 .open = ip6mr_vif_open,
482 .read = seq_read, 481 .read = seq_read,
483 .llseek = seq_lseek, 482 .llseek = seq_lseek,
@@ -495,6 +494,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
495 return ERR_PTR(-ENOENT); 494 return ERR_PTR(-ENOENT);
496 495
497 it->mrt = mrt; 496 it->mrt = mrt;
497 it->cache = NULL;
498 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) 498 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
499 : SEQ_START_TOKEN; 499 : SEQ_START_TOKEN;
500} 500}
@@ -609,7 +609,6 @@ static int ipmr_mfc_open(struct inode *inode, struct file *file)
609} 609}
610 610
611static const struct file_operations ip6mr_mfc_fops = { 611static const struct file_operations ip6mr_mfc_fops = {
612 .owner = THIS_MODULE,
613 .open = ipmr_mfc_open, 612 .open = ipmr_mfc_open,
614 .read = seq_read, 613 .read = seq_read,
615 .llseek = seq_lseek, 614 .llseek = seq_lseek,
@@ -1425,10 +1424,13 @@ int __init ip6_mr_init(void)
1425 goto add_proto_fail; 1424 goto add_proto_fail;
1426 } 1425 }
1427#endif 1426#endif
1428 rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, 1427 err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1429 ip6mr_rtm_dumproute, 0); 1428 NULL, ip6mr_rtm_dumproute, 0);
1430 return 0; 1429 if (err == 0)
1430 return 0;
1431
1431#ifdef CONFIG_IPV6_PIMSM_V2 1432#ifdef CONFIG_IPV6_PIMSM_V2
1433 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1432add_proto_fail: 1434add_proto_fail:
1433 unregister_netdevice_notifier(&ip6_mr_notifier); 1435 unregister_netdevice_notifier(&ip6_mr_notifier);
1434#endif 1436#endif
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index e8ffb5b5d84e..d78d41fc4b1a 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -923,12 +923,8 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
923#ifdef CONFIG_NETFILTER 923#ifdef CONFIG_NETFILTER
924 /* we need to exclude all possible ENOPROTOOPTs except default case */ 924 /* we need to exclude all possible ENOPROTOOPTs except default case */
925 if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY && 925 if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY &&
926 optname != IPV6_XFRM_POLICY) { 926 optname != IPV6_XFRM_POLICY)
927 lock_sock(sk); 927 err = nf_setsockopt(sk, PF_INET6, optname, optval, optlen);
928 err = nf_setsockopt(sk, PF_INET6, optname, optval,
929 optlen);
930 release_sock(sk);
931 }
932#endif 928#endif
933 return err; 929 return err;
934} 930}
@@ -958,12 +954,9 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
958#ifdef CONFIG_NETFILTER 954#ifdef CONFIG_NETFILTER
959 /* we need to exclude all possible ENOPROTOOPTs except default case */ 955 /* we need to exclude all possible ENOPROTOOPTs except default case */
960 if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY && 956 if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY &&
961 optname != IPV6_XFRM_POLICY) { 957 optname != IPV6_XFRM_POLICY)
962 lock_sock(sk); 958 err = compat_nf_setsockopt(sk, PF_INET6, optname, optval,
963 err = compat_nf_setsockopt(sk, PF_INET6, optname, 959 optlen);
964 optval, optlen);
965 release_sock(sk);
966 }
967#endif 960#endif
968 return err; 961 return err;
969} 962}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 844642682b83..9b9d2ff01b35 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -65,10 +65,10 @@
65#include <net/ip6_checksum.h> 65#include <net/ip6_checksum.h>
66 66
67/* Ensure that we have struct in6_addr aligned on 32bit word. */ 67/* Ensure that we have struct in6_addr aligned on 32bit word. */
68static void *__mld2_query_bugs[] __attribute__((__unused__)) = { 68static int __mld2_query_bugs[] __attribute__((__unused__)) = {
69 BUILD_BUG_ON_NULL(offsetof(struct mld2_query, mld2q_srcs) % 4), 69 BUILD_BUG_ON_ZERO(offsetof(struct mld2_query, mld2q_srcs) % 4),
70 BUILD_BUG_ON_NULL(offsetof(struct mld2_report, mld2r_grec) % 4), 70 BUILD_BUG_ON_ZERO(offsetof(struct mld2_report, mld2r_grec) % 4),
71 BUILD_BUG_ON_NULL(offsetof(struct mld2_grec, grec_mca) % 4) 71 BUILD_BUG_ON_ZERO(offsetof(struct mld2_grec, grec_mca) % 4)
72}; 72};
73 73
74static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; 74static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
@@ -1655,8 +1655,6 @@ static void mld_sendpack(struct sk_buff *skb)
1655 if (err) 1655 if (err)
1656 goto err_out; 1656 goto err_out;
1657 1657
1658 payload_len = skb->len;
1659
1660 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, 1658 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1661 net, net->ipv6.igmp_sk, skb, NULL, skb->dev, 1659 net, net->ipv6.igmp_sk, skb, NULL, skb->dev,
1662 dst_output); 1660 dst_output);
@@ -2758,7 +2756,6 @@ static int igmp6_mc_seq_open(struct inode *inode, struct file *file)
2758} 2756}
2759 2757
2760static const struct file_operations igmp6_mc_seq_fops = { 2758static const struct file_operations igmp6_mc_seq_fops = {
2761 .owner = THIS_MODULE,
2762 .open = igmp6_mc_seq_open, 2759 .open = igmp6_mc_seq_open,
2763 .read = seq_read, 2760 .read = seq_read,
2764 .llseek = seq_lseek, 2761 .llseek = seq_lseek,
@@ -2913,7 +2910,6 @@ static int igmp6_mcf_seq_open(struct inode *inode, struct file *file)
2913} 2910}
2914 2911
2915static const struct file_operations igmp6_mcf_seq_fops = { 2912static const struct file_operations igmp6_mcf_seq_fops = {
2916 .owner = THIS_MODULE,
2917 .open = igmp6_mcf_seq_open, 2913 .open = igmp6_mcf_seq_open,
2918 .read = seq_read, 2914 .read = seq_read,
2919 .llseek = seq_lseek, 2915 .llseek = seq_lseek,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b3cea200c85e..f61a5b613b52 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -566,6 +566,11 @@ static void ndisc_send_unsol_na(struct net_device *dev)
566 566
567 read_lock_bh(&idev->lock); 567 read_lock_bh(&idev->lock);
568 list_for_each_entry(ifa, &idev->addr_list, if_list) { 568 list_for_each_entry(ifa, &idev->addr_list, if_list) {
569 /* skip tentative addresses until dad completes */
570 if (ifa->flags & IFA_F_TENTATIVE &&
571 !(ifa->flags & IFA_F_OPTIMISTIC))
572 continue;
573
569 ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr, 574 ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr,
570 /*router=*/ !!idev->cnf.forwarding, 575 /*router=*/ !!idev->cnf.forwarding,
571 /*solicited=*/ false, /*override=*/ true, 576 /*solicited=*/ false, /*override=*/ true,
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 39970e212ad5..d95ceca7ff8f 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -68,32 +68,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
68} 68}
69EXPORT_SYMBOL(ip6_route_me_harder); 69EXPORT_SYMBOL(ip6_route_me_harder);
70 70
71/* 71static int nf_ip6_reroute(struct sk_buff *skb,
72 * Extra routing may needed on local out, as the QUEUE target never
73 * returns control to the table.
74 */
75
76struct ip6_rt_info {
77 struct in6_addr daddr;
78 struct in6_addr saddr;
79 u_int32_t mark;
80};
81
82static void nf_ip6_saveroute(const struct sk_buff *skb,
83 struct nf_queue_entry *entry)
84{
85 struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
86
87 if (entry->state.hook == NF_INET_LOCAL_OUT) {
88 const struct ipv6hdr *iph = ipv6_hdr(skb);
89
90 rt_info->daddr = iph->daddr;
91 rt_info->saddr = iph->saddr;
92 rt_info->mark = skb->mark;
93 }
94}
95
96static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
97 const struct nf_queue_entry *entry) 72 const struct nf_queue_entry *entry)
98{ 73{
99 struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); 74 struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
@@ -103,7 +78,7 @@ static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
103 if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || 78 if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
104 !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || 79 !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
105 skb->mark != rt_info->mark) 80 skb->mark != rt_info->mark)
106 return ip6_route_me_harder(net, skb); 81 return ip6_route_me_harder(entry->state.net, skb);
107 } 82 }
108 return 0; 83 return 0;
109} 84}
@@ -190,25 +165,19 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
190}; 165};
191 166
192static const struct nf_ipv6_ops ipv6ops = { 167static const struct nf_ipv6_ops ipv6ops = {
193 .chk_addr = ipv6_chk_addr, 168 .chk_addr = ipv6_chk_addr,
194 .route_input = ip6_route_input, 169 .route_input = ip6_route_input,
195 .fragment = ip6_fragment 170 .fragment = ip6_fragment,
196};
197
198static const struct nf_afinfo nf_ip6_afinfo = {
199 .family = AF_INET6,
200 .checksum = nf_ip6_checksum, 171 .checksum = nf_ip6_checksum,
201 .checksum_partial = nf_ip6_checksum_partial, 172 .checksum_partial = nf_ip6_checksum_partial,
202 .route = nf_ip6_route, 173 .route = nf_ip6_route,
203 .saveroute = nf_ip6_saveroute,
204 .reroute = nf_ip6_reroute, 174 .reroute = nf_ip6_reroute,
205 .route_key_size = sizeof(struct ip6_rt_info),
206}; 175};
207 176
208int __init ipv6_netfilter_init(void) 177int __init ipv6_netfilter_init(void)
209{ 178{
210 RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops); 179 RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops);
211 return nf_register_afinfo(&nf_ip6_afinfo); 180 return 0;
212} 181}
213 182
214/* This can be called from inet6_init() on errors, so it cannot 183/* This can be called from inet6_init() on errors, so it cannot
@@ -217,5 +186,4 @@ int __init ipv6_netfilter_init(void)
217void ipv6_netfilter_fini(void) 186void ipv6_netfilter_fini(void)
218{ 187{
219 RCU_INIT_POINTER(nf_ipv6_ops, NULL); 188 RCU_INIT_POINTER(nf_ipv6_ops, NULL);
220 nf_unregister_afinfo(&nf_ip6_afinfo);
221} 189}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 6acb2eecd986..d395d1590699 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -71,6 +71,14 @@ config NFT_FIB_IPV6
71endif # NF_TABLES_IPV6 71endif # NF_TABLES_IPV6
72endif # NF_TABLES 72endif # NF_TABLES
73 73
74config NF_FLOW_TABLE_IPV6
75 tristate "Netfilter flow table IPv6 module"
76 depends on NF_FLOW_TABLE
77 help
78 This option adds the flow table IPv6 support.
79
80 To compile it as a module, choose M here.
81
74config NF_DUP_IPV6 82config NF_DUP_IPV6
75 tristate "Netfilter IPv6 packet duplication to alternate destination" 83 tristate "Netfilter IPv6 packet duplication to alternate destination"
76 depends on !NF_CONNTRACK || NF_CONNTRACK 84 depends on !NF_CONNTRACK || NF_CONNTRACK
@@ -232,6 +240,15 @@ config IP6_NF_MATCH_RT
232 240
233 To compile it as a module, choose M here. If unsure, say N. 241 To compile it as a module, choose M here. If unsure, say N.
234 242
243config IP6_NF_MATCH_SRH
244 tristate '"srh" Segment Routing header match support'
245 depends on NETFILTER_ADVANCED
246 help
247 srh matching allows you to match packets based on the segment
248 routing header of the packet.
249
250 To compile it as a module, choose M here. If unsure, say N.
251
235# The targets 252# The targets
236config IP6_NF_TARGET_HL 253config IP6_NF_TARGET_HL
237 tristate '"HL" hoplimit target support' 254 tristate '"HL" hoplimit target support'
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index c6ee0cdd0ba9..d984057b8395 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -45,6 +45,9 @@ obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
45obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o 45obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
46obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o 46obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
47 47
48# flow table support
49obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o
50
48# matches 51# matches
49obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o 52obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
50obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o 53obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
@@ -54,6 +57,7 @@ obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o
54obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o 57obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
55obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o 58obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o
56obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o 59obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
60obj-$(CONFIG_IP6_NF_MATCH_SRH) += ip6t_srh.o
57 61
58# targets 62# targets
59obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o 63obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 1d7ae9366335..af4c917e0836 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -282,12 +282,7 @@ ip6t_do_table(struct sk_buff *skb,
282 282
283 local_bh_disable(); 283 local_bh_disable();
284 addend = xt_write_recseq_begin(); 284 addend = xt_write_recseq_begin();
285 private = table->private; 285 private = READ_ONCE(table->private); /* Address dependency. */
286 /*
287 * Ensure we load private-> members after we've fetched the base
288 * pointer.
289 */
290 smp_read_barrier_depends();
291 cpu = smp_processor_id(); 286 cpu = smp_processor_id();
292 table_base = private->entries; 287 table_base = private->entries;
293 jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; 288 jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
@@ -991,9 +986,8 @@ static int get_info(struct net *net, void __user *user,
991 if (compat) 986 if (compat)
992 xt_compat_lock(AF_INET6); 987 xt_compat_lock(AF_INET6);
993#endif 988#endif
994 t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), 989 t = xt_request_find_table_lock(net, AF_INET6, name);
995 "ip6table_%s", name); 990 if (!IS_ERR(t)) {
996 if (t) {
997 struct ip6t_getinfo info; 991 struct ip6t_getinfo info;
998 const struct xt_table_info *private = t->private; 992 const struct xt_table_info *private = t->private;
999#ifdef CONFIG_COMPAT 993#ifdef CONFIG_COMPAT
@@ -1023,7 +1017,7 @@ static int get_info(struct net *net, void __user *user,
1023 xt_table_unlock(t); 1017 xt_table_unlock(t);
1024 module_put(t->me); 1018 module_put(t->me);
1025 } else 1019 } else
1026 ret = -ENOENT; 1020 ret = PTR_ERR(t);
1027#ifdef CONFIG_COMPAT 1021#ifdef CONFIG_COMPAT
1028 if (compat) 1022 if (compat)
1029 xt_compat_unlock(AF_INET6); 1023 xt_compat_unlock(AF_INET6);
@@ -1049,7 +1043,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
1049 get.name[sizeof(get.name) - 1] = '\0'; 1043 get.name[sizeof(get.name) - 1] = '\0';
1050 1044
1051 t = xt_find_table_lock(net, AF_INET6, get.name); 1045 t = xt_find_table_lock(net, AF_INET6, get.name);
1052 if (t) { 1046 if (!IS_ERR(t)) {
1053 struct xt_table_info *private = t->private; 1047 struct xt_table_info *private = t->private;
1054 if (get.size == private->size) 1048 if (get.size == private->size)
1055 ret = copy_entries_to_user(private->size, 1049 ret = copy_entries_to_user(private->size,
@@ -1060,7 +1054,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
1060 module_put(t->me); 1054 module_put(t->me);
1061 xt_table_unlock(t); 1055 xt_table_unlock(t);
1062 } else 1056 } else
1063 ret = -ENOENT; 1057 ret = PTR_ERR(t);
1064 1058
1065 return ret; 1059 return ret;
1066} 1060}
@@ -1083,10 +1077,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1083 goto out; 1077 goto out;
1084 } 1078 }
1085 1079
1086 t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), 1080 t = xt_request_find_table_lock(net, AF_INET6, name);
1087 "ip6table_%s", name); 1081 if (IS_ERR(t)) {
1088 if (!t) { 1082 ret = PTR_ERR(t);
1089 ret = -ENOENT;
1090 goto free_newinfo_counters_untrans; 1083 goto free_newinfo_counters_untrans;
1091 } 1084 }
1092 1085
@@ -1199,8 +1192,8 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
1199 if (IS_ERR(paddc)) 1192 if (IS_ERR(paddc))
1200 return PTR_ERR(paddc); 1193 return PTR_ERR(paddc);
1201 t = xt_find_table_lock(net, AF_INET6, tmp.name); 1194 t = xt_find_table_lock(net, AF_INET6, tmp.name);
1202 if (!t) { 1195 if (IS_ERR(t)) {
1203 ret = -ENOENT; 1196 ret = PTR_ERR(t);
1204 goto free; 1197 goto free;
1205 } 1198 }
1206 1199
@@ -1636,7 +1629,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
1636 1629
1637 xt_compat_lock(AF_INET6); 1630 xt_compat_lock(AF_INET6);
1638 t = xt_find_table_lock(net, AF_INET6, get.name); 1631 t = xt_find_table_lock(net, AF_INET6, get.name);
1639 if (t) { 1632 if (!IS_ERR(t)) {
1640 const struct xt_table_info *private = t->private; 1633 const struct xt_table_info *private = t->private;
1641 struct xt_table_info info; 1634 struct xt_table_info info;
1642 ret = compat_table_info(private, &info); 1635 ret = compat_table_info(private, &info);
@@ -1650,7 +1643,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
1650 module_put(t->me); 1643 module_put(t->me);
1651 xt_table_unlock(t); 1644 xt_table_unlock(t);
1652 } else 1645 } else
1653 ret = -ENOENT; 1646 ret = PTR_ERR(t);
1654 1647
1655 xt_compat_unlock(AF_INET6); 1648 xt_compat_unlock(AF_INET6);
1656 return ret; 1649 return ret;
@@ -1954,7 +1947,6 @@ static int __init ip6_tables_init(void)
1954 if (ret < 0) 1947 if (ret < 0)
1955 goto err5; 1948 goto err5;
1956 1949
1957 pr_info("(C) 2000-2006 Netfilter Core Team\n");
1958 return 0; 1950 return 0;
1959 1951
1960err5: 1952err5:
diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c
new file mode 100644
index 000000000000..9642164107ce
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_srh.c
@@ -0,0 +1,161 @@
1/* Kernel module to match Segment Routing Header (SRH) parameters. */
2
3/* Author:
4 * Ahmed Abdelsalam <amsalam20@gmail.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 */
11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/ipv6.h>
16#include <linux/types.h>
17#include <net/ipv6.h>
18#include <net/seg6.h>
19
20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter_ipv6/ip6t_srh.h>
22#include <linux/netfilter_ipv6/ip6_tables.h>
23
24/* Test a struct->mt_invflags and a boolean for inequality */
25#define NF_SRH_INVF(ptr, flag, boolean) \
26 ((boolean) ^ !!((ptr)->mt_invflags & (flag)))
27
28static bool srh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
29{
30 const struct ip6t_srh *srhinfo = par->matchinfo;
31 struct ipv6_sr_hdr *srh;
32 struct ipv6_sr_hdr _srh;
33 int hdrlen, srhoff = 0;
34
35 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
36 return false;
37 srh = skb_header_pointer(skb, srhoff, sizeof(_srh), &_srh);
38 if (!srh)
39 return false;
40
41 hdrlen = ipv6_optlen(srh);
42 if (skb->len - srhoff < hdrlen)
43 return false;
44
45 if (srh->type != IPV6_SRCRT_TYPE_4)
46 return false;
47
48 if (srh->segments_left > srh->first_segment)
49 return false;
50
51 /* Next Header matching */
52 if (srhinfo->mt_flags & IP6T_SRH_NEXTHDR)
53 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NEXTHDR,
54 !(srh->nexthdr == srhinfo->next_hdr)))
55 return false;
56
57 /* Header Extension Length matching */
58 if (srhinfo->mt_flags & IP6T_SRH_LEN_EQ)
59 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_EQ,
60 !(srh->hdrlen == srhinfo->hdr_len)))
61 return false;
62
63 if (srhinfo->mt_flags & IP6T_SRH_LEN_GT)
64 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_GT,
65 !(srh->hdrlen > srhinfo->hdr_len)))
66 return false;
67
68 if (srhinfo->mt_flags & IP6T_SRH_LEN_LT)
69 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_LT,
70 !(srh->hdrlen < srhinfo->hdr_len)))
71 return false;
72
73 /* Segments Left matching */
74 if (srhinfo->mt_flags & IP6T_SRH_SEGS_EQ)
75 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_EQ,
76 !(srh->segments_left == srhinfo->segs_left)))
77 return false;
78
79 if (srhinfo->mt_flags & IP6T_SRH_SEGS_GT)
80 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_GT,
81 !(srh->segments_left > srhinfo->segs_left)))
82 return false;
83
84 if (srhinfo->mt_flags & IP6T_SRH_SEGS_LT)
85 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_LT,
86 !(srh->segments_left < srhinfo->segs_left)))
87 return false;
88
89 /**
90 * Last Entry matching
91 * Last_Entry field was introduced in revision 6 of the SRH draft.
92 * It was called First_Segment in the previous revision
93 */
94 if (srhinfo->mt_flags & IP6T_SRH_LAST_EQ)
95 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_EQ,
96 !(srh->first_segment == srhinfo->last_entry)))
97 return false;
98
99 if (srhinfo->mt_flags & IP6T_SRH_LAST_GT)
100 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_GT,
101 !(srh->first_segment > srhinfo->last_entry)))
102 return false;
103
104 if (srhinfo->mt_flags & IP6T_SRH_LAST_LT)
105 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_LT,
106 !(srh->first_segment < srhinfo->last_entry)))
107 return false;
108
109 /**
110 * Tag matchig
111 * Tag field was introduced in revision 6 of the SRH draft.
112 */
113 if (srhinfo->mt_flags & IP6T_SRH_TAG)
114 if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_TAG,
115 !(srh->tag == srhinfo->tag)))
116 return false;
117 return true;
118}
119
120static int srh_mt6_check(const struct xt_mtchk_param *par)
121{
122 const struct ip6t_srh *srhinfo = par->matchinfo;
123
124 if (srhinfo->mt_flags & ~IP6T_SRH_MASK) {
125 pr_err("unknown srh match flags %X\n", srhinfo->mt_flags);
126 return -EINVAL;
127 }
128
129 if (srhinfo->mt_invflags & ~IP6T_SRH_INV_MASK) {
130 pr_err("unknown srh invflags %X\n", srhinfo->mt_invflags);
131 return -EINVAL;
132 }
133
134 return 0;
135}
136
137static struct xt_match srh_mt6_reg __read_mostly = {
138 .name = "srh",
139 .family = NFPROTO_IPV6,
140 .match = srh_mt6,
141 .matchsize = sizeof(struct ip6t_srh),
142 .checkentry = srh_mt6_check,
143 .me = THIS_MODULE,
144};
145
146static int __init srh_mt6_init(void)
147{
148 return xt_register_match(&srh_mt6_reg);
149}
150
151static void __exit srh_mt6_exit(void)
152{
153 xt_unregister_match(&srh_mt6_reg);
154}
155
156module_init(srh_mt6_init);
157module_exit(srh_mt6_exit);
158
159MODULE_LICENSE("GPL");
160MODULE_DESCRIPTION("Xtables: IPv6 Segment Routing Header match");
161MODULE_AUTHOR("Ahmed Abdelsalam <amsalam20@gmail.com>");
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 2b1a9dcdbcb3..b0524b18c4fb 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -42,14 +42,6 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
42 u_int8_t hop_limit; 42 u_int8_t hop_limit;
43 u_int32_t flowlabel, mark; 43 u_int32_t flowlabel, mark;
44 int err; 44 int err;
45#if 0
46 /* root is playing with raw sockets. */
47 if (skb->len < sizeof(struct iphdr) ||
48 ip_hdrlen(skb) < sizeof(struct iphdr)) {
49 net_warn_ratelimited("ip6t_hook: happy cracking\n");
50 return NF_ACCEPT;
51 }
52#endif
53 45
54 /* save source/dest address, mark, hoplimit, flowlabel, priority, */ 46 /* save source/dest address, mark, hoplimit, flowlabel, priority, */
55 memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); 47 memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 991512576c8c..47306e45a80a 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -74,6 +74,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
74 { 74 {
75 .hook = ip6table_nat_in, 75 .hook = ip6table_nat_in,
76 .pf = NFPROTO_IPV6, 76 .pf = NFPROTO_IPV6,
77 .nat_hook = true,
77 .hooknum = NF_INET_PRE_ROUTING, 78 .hooknum = NF_INET_PRE_ROUTING,
78 .priority = NF_IP6_PRI_NAT_DST, 79 .priority = NF_IP6_PRI_NAT_DST,
79 }, 80 },
@@ -81,6 +82,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
81 { 82 {
82 .hook = ip6table_nat_out, 83 .hook = ip6table_nat_out,
83 .pf = NFPROTO_IPV6, 84 .pf = NFPROTO_IPV6,
85 .nat_hook = true,
84 .hooknum = NF_INET_POST_ROUTING, 86 .hooknum = NF_INET_POST_ROUTING,
85 .priority = NF_IP6_PRI_NAT_SRC, 87 .priority = NF_IP6_PRI_NAT_SRC,
86 }, 88 },
@@ -88,12 +90,14 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
88 { 90 {
89 .hook = ip6table_nat_local_fn, 91 .hook = ip6table_nat_local_fn,
90 .pf = NFPROTO_IPV6, 92 .pf = NFPROTO_IPV6,
93 .nat_hook = true,
91 .hooknum = NF_INET_LOCAL_OUT, 94 .hooknum = NF_INET_LOCAL_OUT,
92 .priority = NF_IP6_PRI_NAT_DST, 95 .priority = NF_IP6_PRI_NAT_DST,
93 }, 96 },
94 /* After packet filtering, change source */ 97 /* After packet filtering, change source */
95 { 98 {
96 .hook = ip6table_nat_fn, 99 .hook = ip6table_nat_fn,
100 .nat_hook = true,
97 .pf = NFPROTO_IPV6, 101 .pf = NFPROTO_IPV6,
98 .hooknum = NF_INET_LOCAL_IN, 102 .hooknum = NF_INET_LOCAL_IN,
99 .priority = NF_IP6_PRI_NAT_SRC, 103 .priority = NF_IP6_PRI_NAT_SRC,
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index d4bc56443dc1..710fa0806c37 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -3,6 +3,7 @@
3 * 3 *
4 * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 4 * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
5 */ 5 */
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
6#include <linux/module.h> 7#include <linux/module.h>
7#include <linux/netfilter_ipv6/ip6_tables.h> 8#include <linux/netfilter_ipv6/ip6_tables.h>
8#include <linux/slab.h> 9#include <linux/slab.h>
@@ -11,6 +12,10 @@
11 12
12static int __net_init ip6table_raw_table_init(struct net *net); 13static int __net_init ip6table_raw_table_init(struct net *net);
13 14
15static bool raw_before_defrag __read_mostly;
16MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag");
17module_param(raw_before_defrag, bool, 0000);
18
14static const struct xt_table packet_raw = { 19static const struct xt_table packet_raw = {
15 .name = "raw", 20 .name = "raw",
16 .valid_hooks = RAW_VALID_HOOKS, 21 .valid_hooks = RAW_VALID_HOOKS,
@@ -20,6 +25,15 @@ static const struct xt_table packet_raw = {
20 .table_init = ip6table_raw_table_init, 25 .table_init = ip6table_raw_table_init,
21}; 26};
22 27
28static const struct xt_table packet_raw_before_defrag = {
29 .name = "raw",
30 .valid_hooks = RAW_VALID_HOOKS,
31 .me = THIS_MODULE,
32 .af = NFPROTO_IPV6,
33 .priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG,
34 .table_init = ip6table_raw_table_init,
35};
36
23/* The work comes in here from netfilter.c. */ 37/* The work comes in here from netfilter.c. */
24static unsigned int 38static unsigned int
25ip6table_raw_hook(void *priv, struct sk_buff *skb, 39ip6table_raw_hook(void *priv, struct sk_buff *skb,
@@ -33,15 +47,19 @@ static struct nf_hook_ops *rawtable_ops __read_mostly;
33static int __net_init ip6table_raw_table_init(struct net *net) 47static int __net_init ip6table_raw_table_init(struct net *net)
34{ 48{
35 struct ip6t_replace *repl; 49 struct ip6t_replace *repl;
50 const struct xt_table *table = &packet_raw;
36 int ret; 51 int ret;
37 52
53 if (raw_before_defrag)
54 table = &packet_raw_before_defrag;
55
38 if (net->ipv6.ip6table_raw) 56 if (net->ipv6.ip6table_raw)
39 return 0; 57 return 0;
40 58
41 repl = ip6t_alloc_initial_table(&packet_raw); 59 repl = ip6t_alloc_initial_table(table);
42 if (repl == NULL) 60 if (repl == NULL)
43 return -ENOMEM; 61 return -ENOMEM;
44 ret = ip6t_register_table(net, &packet_raw, repl, rawtable_ops, 62 ret = ip6t_register_table(net, table, repl, rawtable_ops,
45 &net->ipv6.ip6table_raw); 63 &net->ipv6.ip6table_raw);
46 kfree(repl); 64 kfree(repl);
47 return ret; 65 return ret;
@@ -62,9 +80,16 @@ static struct pernet_operations ip6table_raw_net_ops = {
62static int __init ip6table_raw_init(void) 80static int __init ip6table_raw_init(void)
63{ 81{
64 int ret; 82 int ret;
83 const struct xt_table *table = &packet_raw;
84
85 if (raw_before_defrag) {
86 table = &packet_raw_before_defrag;
87
88 pr_info("Enabling raw table before defrag\n");
89 }
65 90
66 /* Register hooks */ 91 /* Register hooks */
67 rawtable_ops = xt_hook_ops_alloc(&packet_raw, ip6table_raw_hook); 92 rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook);
68 if (IS_ERR(rawtable_ops)) 93 if (IS_ERR(rawtable_ops))
69 return PTR_ERR(rawtable_ops); 94 return PTR_ERR(rawtable_ops);
70 95
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 3b80a38f62b8..663827ee3cf8 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -176,11 +176,6 @@ static unsigned int ipv6_conntrack_local(void *priv,
176 struct sk_buff *skb, 176 struct sk_buff *skb,
177 const struct nf_hook_state *state) 177 const struct nf_hook_state *state)
178{ 178{
179 /* root is playing with raw sockets. */
180 if (skb->len < sizeof(struct ipv6hdr)) {
181 net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
182 return NF_ACCEPT;
183 }
184 return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); 179 return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
185} 180}
186 181
@@ -226,20 +221,27 @@ static const struct nf_hook_ops ipv6_conntrack_ops[] = {
226static int 221static int
227ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) 222ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
228{ 223{
229 const struct inet_sock *inet = inet_sk(sk); 224 struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
230 const struct ipv6_pinfo *inet6 = inet6_sk(sk); 225 const struct ipv6_pinfo *inet6 = inet6_sk(sk);
226 const struct inet_sock *inet = inet_sk(sk);
231 const struct nf_conntrack_tuple_hash *h; 227 const struct nf_conntrack_tuple_hash *h;
232 struct sockaddr_in6 sin6; 228 struct sockaddr_in6 sin6;
233 struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
234 struct nf_conn *ct; 229 struct nf_conn *ct;
230 __be32 flow_label;
231 int bound_dev_if;
235 232
233 lock_sock(sk);
236 tuple.src.u3.in6 = sk->sk_v6_rcv_saddr; 234 tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
237 tuple.src.u.tcp.port = inet->inet_sport; 235 tuple.src.u.tcp.port = inet->inet_sport;
238 tuple.dst.u3.in6 = sk->sk_v6_daddr; 236 tuple.dst.u3.in6 = sk->sk_v6_daddr;
239 tuple.dst.u.tcp.port = inet->inet_dport; 237 tuple.dst.u.tcp.port = inet->inet_dport;
240 tuple.dst.protonum = sk->sk_protocol; 238 tuple.dst.protonum = sk->sk_protocol;
239 bound_dev_if = sk->sk_bound_dev_if;
240 flow_label = inet6->flow_label;
241 release_sock(sk);
241 242
242 if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) 243 if (tuple.dst.protonum != IPPROTO_TCP &&
244 tuple.dst.protonum != IPPROTO_SCTP)
243 return -ENOPROTOOPT; 245 return -ENOPROTOOPT;
244 246
245 if (*len < 0 || (unsigned int) *len < sizeof(sin6)) 247 if (*len < 0 || (unsigned int) *len < sizeof(sin6))
@@ -257,14 +259,13 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
257 259
258 sin6.sin6_family = AF_INET6; 260 sin6.sin6_family = AF_INET6;
259 sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; 261 sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
260 sin6.sin6_flowinfo = inet6->flow_label & IPV6_FLOWINFO_MASK; 262 sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK;
261 memcpy(&sin6.sin6_addr, 263 memcpy(&sin6.sin6_addr,
262 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6, 264 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
263 sizeof(sin6.sin6_addr)); 265 sizeof(sin6.sin6_addr));
264 266
265 nf_ct_put(ct); 267 nf_ct_put(ct);
266 sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, 268 sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if);
267 sk->sk_bound_dev_if);
268 return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; 269 return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
269} 270}
270 271
@@ -368,7 +369,7 @@ static struct nf_sockopt_ops so_getorigdst6 = {
368 .owner = THIS_MODULE, 369 .owner = THIS_MODULE,
369}; 370};
370 371
371static struct nf_conntrack_l4proto *builtin_l4proto6[] = { 372static const struct nf_conntrack_l4proto * const builtin_l4proto6[] = {
372 &nf_conntrack_l4proto_tcp6, 373 &nf_conntrack_l4proto_tcp6,
373 &nf_conntrack_l4proto_udp6, 374 &nf_conntrack_l4proto_udp6,
374 &nf_conntrack_l4proto_icmpv6, 375 &nf_conntrack_l4proto_icmpv6,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 3ac0d826afc4..2548e2c8aedd 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -27,7 +27,7 @@
27#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> 27#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
28#include <net/netfilter/nf_log.h> 28#include <net/netfilter/nf_log.h>
29 29
30static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ; 30static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
31 31
32static inline struct nf_icmp_net *icmpv6_pernet(struct net *net) 32static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
33{ 33{
@@ -352,7 +352,7 @@ static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
352 return &net->ct.nf_ct_proto.icmpv6.pn; 352 return &net->ct.nf_ct_proto.icmpv6.pn;
353} 353}
354 354
355struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = 355const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
356{ 356{
357 .l3proto = PF_INET6, 357 .l3proto = PF_INET6,
358 .l4proto = IPPROTO_ICMPV6, 358 .l4proto = IPPROTO_ICMPV6,
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 977d8900cfd1..b84ce3e6d728 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -231,7 +231,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
231 231
232 if ((unsigned int)end > IPV6_MAXPLEN) { 232 if ((unsigned int)end > IPV6_MAXPLEN) {
233 pr_debug("offset is too large.\n"); 233 pr_debug("offset is too large.\n");
234 return -1; 234 return -EINVAL;
235 } 235 }
236 236
237 ecn = ip6_frag_ecn(ipv6_hdr(skb)); 237 ecn = ip6_frag_ecn(ipv6_hdr(skb));
@@ -264,7 +264,8 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
264 * this case. -DaveM 264 * this case. -DaveM
265 */ 265 */
266 pr_debug("end of fragment not rounded to 8 bytes.\n"); 266 pr_debug("end of fragment not rounded to 8 bytes.\n");
267 return -1; 267 inet_frag_kill(&fq->q, &nf_frags);
268 return -EPROTO;
268 } 269 }
269 if (end > fq->q.len) { 270 if (end > fq->q.len) {
270 /* Some bits beyond end -> corruption. */ 271 /* Some bits beyond end -> corruption. */
@@ -358,7 +359,7 @@ found:
358discard_fq: 359discard_fq:
359 inet_frag_kill(&fq->q, &nf_frags); 360 inet_frag_kill(&fq->q, &nf_frags);
360err: 361err:
361 return -1; 362 return -EINVAL;
362} 363}
363 364
364/* 365/*
@@ -567,6 +568,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
567 568
568int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) 569int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
569{ 570{
571 u16 savethdr = skb->transport_header;
570 struct net_device *dev = skb->dev; 572 struct net_device *dev = skb->dev;
571 int fhoff, nhoff, ret; 573 int fhoff, nhoff, ret;
572 struct frag_hdr *fhdr; 574 struct frag_hdr *fhdr;
@@ -600,8 +602,12 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
600 602
601 spin_lock_bh(&fq->q.lock); 603 spin_lock_bh(&fq->q.lock);
602 604
603 if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) { 605 ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff);
604 ret = -EINVAL; 606 if (ret < 0) {
607 if (ret == -EPROTO) {
608 skb->transport_header = savethdr;
609 ret = 0;
610 }
605 goto out_unlock; 611 goto out_unlock;
606 } 612 }
607 613
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index b326da59257f..c87b48359e8f 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -63,6 +63,9 @@ static unsigned int ipv6_defrag(void *priv,
63 /* Previously seen (loopback)? */ 63 /* Previously seen (loopback)? */
64 if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb))) 64 if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb)))
65 return NF_ACCEPT; 65 return NF_ACCEPT;
66
67 if (skb->_nfct == IP_CT_UNTRACKED)
68 return NF_ACCEPT;
66#endif 69#endif
67 70
68 err = nf_ct_frag6_gather(state->net, skb, 71 err = nf_ct_frag6_gather(state->net, skb,
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
new file mode 100644
index 000000000000..d346705d6ee6
--- /dev/null
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -0,0 +1,278 @@
1#include <linux/kernel.h>
2#include <linux/init.h>
3#include <linux/module.h>
4#include <linux/netfilter.h>
5#include <linux/rhashtable.h>
6#include <linux/ipv6.h>
7#include <linux/netdevice.h>
8#include <net/ipv6.h>
9#include <net/ip6_route.h>
10#include <net/neighbour.h>
11#include <net/netfilter/nf_flow_table.h>
12#include <net/netfilter/nf_tables.h>
13/* For layer 4 checksum field offset. */
14#include <linux/tcp.h>
15#include <linux/udp.h>
16
17static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
18 struct in6_addr *addr,
19 struct in6_addr *new_addr)
20{
21 struct tcphdr *tcph;
22
23 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
24 skb_try_make_writable(skb, thoff + sizeof(*tcph)))
25 return -1;
26
27 tcph = (void *)(skb_network_header(skb) + thoff);
28 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
29 new_addr->s6_addr32, true);
30
31 return 0;
32}
33
34static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
35 struct in6_addr *addr,
36 struct in6_addr *new_addr)
37{
38 struct udphdr *udph;
39
40 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
41 skb_try_make_writable(skb, thoff + sizeof(*udph)))
42 return -1;
43
44 udph = (void *)(skb_network_header(skb) + thoff);
45 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
46 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
47 new_addr->s6_addr32, true);
48 if (!udph->check)
49 udph->check = CSUM_MANGLED_0;
50 }
51
52 return 0;
53}
54
55static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
56 unsigned int thoff, struct in6_addr *addr,
57 struct in6_addr *new_addr)
58{
59 switch (ip6h->nexthdr) {
60 case IPPROTO_TCP:
61 if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
62 return NF_DROP;
63 break;
64 case IPPROTO_UDP:
65 if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
66 return NF_DROP;
67 break;
68 }
69
70 return 0;
71}
72
73static int nf_flow_snat_ipv6(const struct flow_offload *flow,
74 struct sk_buff *skb, struct ipv6hdr *ip6h,
75 unsigned int thoff,
76 enum flow_offload_tuple_dir dir)
77{
78 struct in6_addr addr, new_addr;
79
80 switch (dir) {
81 case FLOW_OFFLOAD_DIR_ORIGINAL:
82 addr = ip6h->saddr;
83 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
84 ip6h->saddr = new_addr;
85 break;
86 case FLOW_OFFLOAD_DIR_REPLY:
87 addr = ip6h->daddr;
88 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
89 ip6h->daddr = new_addr;
90 break;
91 default:
92 return -1;
93 }
94
95 return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
96}
97
98static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
99 struct sk_buff *skb, struct ipv6hdr *ip6h,
100 unsigned int thoff,
101 enum flow_offload_tuple_dir dir)
102{
103 struct in6_addr addr, new_addr;
104
105 switch (dir) {
106 case FLOW_OFFLOAD_DIR_ORIGINAL:
107 addr = ip6h->daddr;
108 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
109 ip6h->daddr = new_addr;
110 break;
111 case FLOW_OFFLOAD_DIR_REPLY:
112 addr = ip6h->saddr;
113 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
114 ip6h->saddr = new_addr;
115 break;
116 default:
117 return -1;
118 }
119
120 return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
121}
122
123static int nf_flow_nat_ipv6(const struct flow_offload *flow,
124 struct sk_buff *skb,
125 enum flow_offload_tuple_dir dir)
126{
127 struct ipv6hdr *ip6h = ipv6_hdr(skb);
128 unsigned int thoff = sizeof(*ip6h);
129
130 if (flow->flags & FLOW_OFFLOAD_SNAT &&
131 (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
132 nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
133 return -1;
134 if (flow->flags & FLOW_OFFLOAD_DNAT &&
135 (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
136 nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
137 return -1;
138
139 return 0;
140}
141
142static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
143 struct flow_offload_tuple *tuple)
144{
145 struct flow_ports *ports;
146 struct ipv6hdr *ip6h;
147 unsigned int thoff;
148
149 if (!pskb_may_pull(skb, sizeof(*ip6h)))
150 return -1;
151
152 ip6h = ipv6_hdr(skb);
153
154 if (ip6h->nexthdr != IPPROTO_TCP &&
155 ip6h->nexthdr != IPPROTO_UDP)
156 return -1;
157
158 thoff = sizeof(*ip6h);
159 if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
160 return -1;
161
162 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
163
164 tuple->src_v6 = ip6h->saddr;
165 tuple->dst_v6 = ip6h->daddr;
166 tuple->src_port = ports->source;
167 tuple->dst_port = ports->dest;
168 tuple->l3proto = AF_INET6;
169 tuple->l4proto = ip6h->nexthdr;
170 tuple->iifidx = dev->ifindex;
171
172 return 0;
173}
174
175/* Based on ip_exceeds_mtu(). */
176static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
177{
178 if (skb->len <= mtu)
179 return false;
180
181 if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
182 return false;
183
184 return true;
185}
186
187static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
188{
189 u32 mtu;
190
191 mtu = ip6_dst_mtu_forward(&rt->dst);
192 if (__nf_flow_exceeds_mtu(skb, mtu))
193 return true;
194
195 return false;
196}
197
198unsigned int
199nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
200 const struct nf_hook_state *state)
201{
202 struct flow_offload_tuple_rhash *tuplehash;
203 struct nf_flowtable *flow_table = priv;
204 struct flow_offload_tuple tuple = {};
205 enum flow_offload_tuple_dir dir;
206 struct flow_offload *flow;
207 struct net_device *outdev;
208 struct in6_addr *nexthop;
209 struct ipv6hdr *ip6h;
210 struct rt6_info *rt;
211
212 if (skb->protocol != htons(ETH_P_IPV6))
213 return NF_ACCEPT;
214
215 if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
216 return NF_ACCEPT;
217
218 tuplehash = flow_offload_lookup(flow_table, &tuple);
219 if (tuplehash == NULL)
220 return NF_ACCEPT;
221
222 outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
223 if (!outdev)
224 return NF_ACCEPT;
225
226 dir = tuplehash->tuple.dir;
227 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
228
229 rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
230 if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
231 return NF_ACCEPT;
232
233 if (skb_try_make_writable(skb, sizeof(*ip6h)))
234 return NF_DROP;
235
236 if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
237 nf_flow_nat_ipv6(flow, skb, dir) < 0)
238 return NF_DROP;
239
240 flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
241 ip6h = ipv6_hdr(skb);
242 ip6h->hop_limit--;
243
244 skb->dev = outdev;
245 nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
246 neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
247
248 return NF_STOLEN;
249}
250EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
251
252static struct nf_flowtable_type flowtable_ipv6 = {
253 .family = NFPROTO_IPV6,
254 .params = &nf_flow_offload_rhash_params,
255 .gc = nf_flow_offload_work_gc,
256 .free = nf_flow_table_free,
257 .hook = nf_flow_offload_ipv6_hook,
258 .owner = THIS_MODULE,
259};
260
261static int __init nf_flow_ipv6_module_init(void)
262{
263 nft_register_flowtable_type(&flowtable_ipv6);
264
265 return 0;
266}
267
268static void __exit nf_flow_ipv6_module_exit(void)
269{
270 nft_unregister_flowtable_type(&flowtable_ipv6);
271}
272
273module_init(nf_flow_ipv6_module_init);
274module_exit(nf_flow_ipv6_module_exit);
275
276MODULE_LICENSE("GPL");
277MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
278MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 1d2fb9267d6f..bed57ee65f7b 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -369,10 +369,6 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
369#endif 369#endif
370 unsigned int ret; 370 unsigned int ret;
371 371
372 /* root is playing with raw sockets. */
373 if (skb->len < sizeof(struct ipv6hdr))
374 return NF_ACCEPT;
375
376 ret = nf_nat_ipv6_fn(priv, skb, state, do_chain); 372 ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
377#ifdef CONFIG_XFRM 373#ifdef CONFIG_XFRM
378 if (ret != NF_DROP && ret != NF_STOLEN && 374 if (ret != NF_DROP && ret != NF_STOLEN &&
@@ -408,10 +404,6 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
408 unsigned int ret; 404 unsigned int ret;
409 int err; 405 int err;
410 406
411 /* root is playing with raw sockets. */
412 if (skb->len < sizeof(struct ipv6hdr))
413 return NF_ACCEPT;
414
415 ret = nf_nat_ipv6_fn(priv, skb, state, do_chain); 407 ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
416 if (ret != NF_DROP && ret != NF_STOLEN && 408 if (ret != NF_DROP && ret != NF_STOLEN &&
417 (ct = nf_ct_get(skb, &ctinfo)) != NULL) { 409 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
index d6e4ba5de916..17e03589331c 100644
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -22,68 +22,12 @@ static unsigned int nft_do_chain_ipv6(void *priv,
22{ 22{
23 struct nft_pktinfo pkt; 23 struct nft_pktinfo pkt;
24 24
25 nft_set_pktinfo_ipv6(&pkt, skb, state); 25 nft_set_pktinfo(&pkt, skb, state);
26 nft_set_pktinfo_ipv6(&pkt, skb);
26 27
27 return nft_do_chain(&pkt, priv); 28 return nft_do_chain(&pkt, priv);
28} 29}
29 30
30static unsigned int nft_ipv6_output(void *priv,
31 struct sk_buff *skb,
32 const struct nf_hook_state *state)
33{
34 if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
35 if (net_ratelimit())
36 pr_info("nf_tables_ipv6: ignoring short SOCK_RAW "
37 "packet\n");
38 return NF_ACCEPT;
39 }
40
41 return nft_do_chain_ipv6(priv, skb, state);
42}
43
44struct nft_af_info nft_af_ipv6 __read_mostly = {
45 .family = NFPROTO_IPV6,
46 .nhooks = NF_INET_NUMHOOKS,
47 .owner = THIS_MODULE,
48 .nops = 1,
49 .hooks = {
50 [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
51 [NF_INET_LOCAL_OUT] = nft_ipv6_output,
52 [NF_INET_FORWARD] = nft_do_chain_ipv6,
53 [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
54 [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
55 },
56};
57EXPORT_SYMBOL_GPL(nft_af_ipv6);
58
59static int nf_tables_ipv6_init_net(struct net *net)
60{
61 net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
62 if (net->nft.ipv6 == NULL)
63 return -ENOMEM;
64
65 memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6));
66
67 if (nft_register_afinfo(net, net->nft.ipv6) < 0)
68 goto err;
69
70 return 0;
71err:
72 kfree(net->nft.ipv6);
73 return -ENOMEM;
74}
75
76static void nf_tables_ipv6_exit_net(struct net *net)
77{
78 nft_unregister_afinfo(net, net->nft.ipv6);
79 kfree(net->nft.ipv6);
80}
81
82static struct pernet_operations nf_tables_ipv6_net_ops = {
83 .init = nf_tables_ipv6_init_net,
84 .exit = nf_tables_ipv6_exit_net,
85};
86
87static const struct nf_chain_type filter_ipv6 = { 31static const struct nf_chain_type filter_ipv6 = {
88 .name = "filter", 32 .name = "filter",
89 .type = NFT_CHAIN_T_DEFAULT, 33 .type = NFT_CHAIN_T_DEFAULT,
@@ -94,26 +38,22 @@ static const struct nf_chain_type filter_ipv6 = {
94 (1 << NF_INET_FORWARD) | 38 (1 << NF_INET_FORWARD) |
95 (1 << NF_INET_PRE_ROUTING) | 39 (1 << NF_INET_PRE_ROUTING) |
96 (1 << NF_INET_POST_ROUTING), 40 (1 << NF_INET_POST_ROUTING),
41 .hooks = {
42 [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
43 [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6,
44 [NF_INET_FORWARD] = nft_do_chain_ipv6,
45 [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
46 [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
47 },
97}; 48};
98 49
99static int __init nf_tables_ipv6_init(void) 50static int __init nf_tables_ipv6_init(void)
100{ 51{
101 int ret; 52 return nft_register_chain_type(&filter_ipv6);
102
103 ret = nft_register_chain_type(&filter_ipv6);
104 if (ret < 0)
105 return ret;
106
107 ret = register_pernet_subsys(&nf_tables_ipv6_net_ops);
108 if (ret < 0)
109 nft_unregister_chain_type(&filter_ipv6);
110
111 return ret;
112} 53}
113 54
114static void __exit nf_tables_ipv6_exit(void) 55static void __exit nf_tables_ipv6_exit(void)
115{ 56{
116 unregister_pernet_subsys(&nf_tables_ipv6_net_ops);
117 nft_unregister_chain_type(&filter_ipv6); 57 nft_unregister_chain_type(&filter_ipv6);
118} 58}
119 59
@@ -122,4 +62,4 @@ module_exit(nf_tables_ipv6_exit);
122 62
123MODULE_LICENSE("GPL"); 63MODULE_LICENSE("GPL");
124MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 64MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
125MODULE_ALIAS_NFT_FAMILY(AF_INET6); 65MODULE_ALIAS_NFT_CHAIN(AF_INET6, "filter");
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 443cd306c0b0..73fe2bd13fcf 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -31,7 +31,8 @@ static unsigned int nft_nat_do_chain(void *priv,
31{ 31{
32 struct nft_pktinfo pkt; 32 struct nft_pktinfo pkt;
33 33
34 nft_set_pktinfo_ipv6(&pkt, skb, state); 34 nft_set_pktinfo(&pkt, skb, state);
35 nft_set_pktinfo_ipv6(&pkt, skb);
35 36
36 return nft_do_chain(&pkt, priv); 37 return nft_do_chain(&pkt, priv);
37} 38}
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index f2727475895e..11d3c3b9aa18 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -33,7 +33,8 @@ static unsigned int nf_route_table_hook(void *priv,
33 u32 mark, flowlabel; 33 u32 mark, flowlabel;
34 int err; 34 int err;
35 35
36 nft_set_pktinfo_ipv6(&pkt, skb, state); 36 nft_set_pktinfo(&pkt, skb, state);
37 nft_set_pktinfo_ipv6(&pkt, skb);
37 38
38 /* save source/dest address, mark, hoplimit, flowlabel, priority */ 39 /* save source/dest address, mark, hoplimit, flowlabel, priority */
39 memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); 40 memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 54b5899543ef..cc5174c7254c 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -60,7 +60,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
60{ 60{
61 const struct net_device *dev = NULL; 61 const struct net_device *dev = NULL;
62 const struct nf_ipv6_ops *v6ops; 62 const struct nf_ipv6_ops *v6ops;
63 const struct nf_afinfo *afinfo;
64 int route_err, addrtype; 63 int route_err, addrtype;
65 struct rt6_info *rt; 64 struct rt6_info *rt;
66 struct flowi6 fl6 = { 65 struct flowi6 fl6 = {
@@ -69,8 +68,8 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
69 }; 68 };
70 u32 ret = 0; 69 u32 ret = 0;
71 70
72 afinfo = nf_get_afinfo(NFPROTO_IPV6); 71 v6ops = nf_get_ipv6_ops();
73 if (!afinfo) 72 if (!v6ops)
74 return RTN_UNREACHABLE; 73 return RTN_UNREACHABLE;
75 74
76 if (priv->flags & NFTA_FIB_F_IIF) 75 if (priv->flags & NFTA_FIB_F_IIF)
@@ -80,12 +79,11 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
80 79
81 nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); 80 nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
82 81
83 v6ops = nf_get_ipv6_ops(); 82 if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
84 if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
85 ret = RTN_LOCAL; 83 ret = RTN_LOCAL;
86 84
87 route_err = afinfo->route(nft_net(pkt), (struct dst_entry **)&rt, 85 route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt,
88 flowi6_to_flowi(&fl6), false); 86 flowi6_to_flowi(&fl6), false);
89 if (route_err) 87 if (route_err)
90 goto err; 88 goto err;
91 89
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index e88bcb8ff0fd..b67814242f78 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -58,7 +58,6 @@ static int sockstat6_seq_open(struct inode *inode, struct file *file)
58} 58}
59 59
60static const struct file_operations sockstat6_seq_fops = { 60static const struct file_operations sockstat6_seq_fops = {
61 .owner = THIS_MODULE,
62 .open = sockstat6_seq_open, 61 .open = sockstat6_seq_open,
63 .read = seq_read, 62 .read = seq_read,
64 .llseek = seq_lseek, 63 .llseek = seq_lseek,
@@ -248,7 +247,6 @@ static int snmp6_seq_open(struct inode *inode, struct file *file)
248} 247}
249 248
250static const struct file_operations snmp6_seq_fops = { 249static const struct file_operations snmp6_seq_fops = {
251 .owner = THIS_MODULE,
252 .open = snmp6_seq_open, 250 .open = snmp6_seq_open,
253 .read = seq_read, 251 .read = seq_read,
254 .llseek = seq_lseek, 252 .llseek = seq_lseek,
@@ -274,7 +272,6 @@ static int snmp6_dev_seq_open(struct inode *inode, struct file *file)
274} 272}
275 273
276static const struct file_operations snmp6_dev_seq_fops = { 274static const struct file_operations snmp6_dev_seq_fops = {
277 .owner = THIS_MODULE,
278 .open = snmp6_dev_seq_open, 275 .open = snmp6_dev_seq_open,
279 .read = seq_read, 276 .read = seq_read,
280 .llseek = seq_lseek, 277 .llseek = seq_lseek,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 761a473a07c5..4c25339b1984 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1272,6 +1272,8 @@ struct proto rawv6_prot = {
1272 .hash = raw_hash_sk, 1272 .hash = raw_hash_sk,
1273 .unhash = raw_unhash_sk, 1273 .unhash = raw_unhash_sk,
1274 .obj_size = sizeof(struct raw6_sock), 1274 .obj_size = sizeof(struct raw6_sock),
1275 .useroffset = offsetof(struct raw6_sock, filter),
1276 .usersize = sizeof_field(struct raw6_sock, filter),
1275 .h.raw_hash = &raw_v6_hashinfo, 1277 .h.raw_hash = &raw_v6_hashinfo,
1276#ifdef CONFIG_COMPAT 1278#ifdef CONFIG_COMPAT
1277 .compat_setsockopt = compat_rawv6_setsockopt, 1279 .compat_setsockopt = compat_rawv6_setsockopt,
@@ -1308,7 +1310,6 @@ static int raw6_seq_open(struct inode *inode, struct file *file)
1308} 1310}
1309 1311
1310static const struct file_operations raw6_seq_fops = { 1312static const struct file_operations raw6_seq_fops = {
1311 .owner = THIS_MODULE,
1312 .open = raw6_seq_open, 1313 .open = raw6_seq_open,
1313 .read = seq_read, 1314 .read = seq_read,
1314 .llseek = seq_lseek, 1315 .llseek = seq_lseek,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0458b761f3c5..9dcfadddd800 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -186,7 +186,7 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
186 186
187static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt) 187static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
188{ 188{
189 return dst_metrics_write_ptr(rt->dst.from); 189 return dst_metrics_write_ptr(&rt->from->dst);
190} 190}
191 191
192static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) 192static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
@@ -391,7 +391,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
391{ 391{
392 struct rt6_info *rt = (struct rt6_info *)dst; 392 struct rt6_info *rt = (struct rt6_info *)dst;
393 struct rt6_exception_bucket *bucket; 393 struct rt6_exception_bucket *bucket;
394 struct dst_entry *from = dst->from; 394 struct rt6_info *from = rt->from;
395 struct inet6_dev *idev; 395 struct inet6_dev *idev;
396 396
397 dst_destroy_metrics_generic(dst); 397 dst_destroy_metrics_generic(dst);
@@ -409,8 +409,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)
409 kfree(bucket); 409 kfree(bucket);
410 } 410 }
411 411
412 dst->from = NULL; 412 rt->from = NULL;
413 dst_release(from); 413 dst_release(&from->dst);
414} 414}
415 415
416static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 416static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -443,9 +443,9 @@ static bool rt6_check_expired(const struct rt6_info *rt)
443 if (rt->rt6i_flags & RTF_EXPIRES) { 443 if (rt->rt6i_flags & RTF_EXPIRES) {
444 if (time_after(jiffies, rt->dst.expires)) 444 if (time_after(jiffies, rt->dst.expires))
445 return true; 445 return true;
446 } else if (rt->dst.from) { 446 } else if (rt->from) {
447 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK || 447 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
448 rt6_check_expired((struct rt6_info *)rt->dst.from); 448 rt6_check_expired(rt->from);
449 } 449 }
450 return false; 450 return false;
451} 451}
@@ -455,7 +455,6 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
455 int strict) 455 int strict)
456{ 456{
457 struct rt6_info *sibling, *next_sibling; 457 struct rt6_info *sibling, *next_sibling;
458 int route_choosen;
459 458
460 /* We might have already computed the hash for ICMPv6 errors. In such 459 /* We might have already computed the hash for ICMPv6 errors. In such
461 * case it will always be non-zero. Otherwise now is the time to do it. 460 * case it will always be non-zero. Otherwise now is the time to do it.
@@ -463,26 +462,19 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
463 if (!fl6->mp_hash) 462 if (!fl6->mp_hash)
464 fl6->mp_hash = rt6_multipath_hash(fl6, NULL); 463 fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
465 464
466 route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1); 465 if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
467 /* Don't change the route, if route_choosen == 0 466 return match;
468 * (siblings does not include ourself) 467
469 */ 468 list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
470 if (route_choosen) 469 rt6i_siblings) {
471 list_for_each_entry_safe(sibling, next_sibling, 470 if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound))
472 &match->rt6i_siblings, rt6i_siblings) { 471 continue;
473 route_choosen--; 472 if (rt6_score_route(sibling, oif, strict) < 0)
474 if (route_choosen == 0) { 473 break;
475 struct inet6_dev *idev = sibling->rt6i_idev; 474 match = sibling;
476 475 break;
477 if (!netif_carrier_ok(sibling->dst.dev) && 476 }
478 idev->cnf.ignore_routes_with_linkdown) 477
479 break;
480 if (rt6_score_route(sibling, oif, strict) < 0)
481 break;
482 match = sibling;
483 break;
484 }
485 }
486 return match; 478 return match;
487} 479}
488 480
@@ -499,12 +491,15 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
499 struct rt6_info *local = NULL; 491 struct rt6_info *local = NULL;
500 struct rt6_info *sprt; 492 struct rt6_info *sprt;
501 493
502 if (!oif && ipv6_addr_any(saddr)) 494 if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD))
503 goto out; 495 return rt;
504 496
505 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) { 497 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
506 struct net_device *dev = sprt->dst.dev; 498 struct net_device *dev = sprt->dst.dev;
507 499
500 if (sprt->rt6i_nh_flags & RTNH_F_DEAD)
501 continue;
502
508 if (oif) { 503 if (oif) {
509 if (dev->ifindex == oif) 504 if (dev->ifindex == oif)
510 return sprt; 505 return sprt;
@@ -533,8 +528,8 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
533 if (flags & RT6_LOOKUP_F_IFACE) 528 if (flags & RT6_LOOKUP_F_IFACE)
534 return net->ipv6.ip6_null_entry; 529 return net->ipv6.ip6_null_entry;
535 } 530 }
536out: 531
537 return rt; 532 return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
538} 533}
539 534
540#ifdef CONFIG_IPV6_ROUTER_PREF 535#ifdef CONFIG_IPV6_ROUTER_PREF
@@ -679,10 +674,12 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
679 int m; 674 int m;
680 bool match_do_rr = false; 675 bool match_do_rr = false;
681 struct inet6_dev *idev = rt->rt6i_idev; 676 struct inet6_dev *idev = rt->rt6i_idev;
682 struct net_device *dev = rt->dst.dev;
683 677
684 if (dev && !netif_carrier_ok(dev) && 678 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
685 idev->cnf.ignore_routes_with_linkdown && 679 goto out;
680
681 if (idev->cnf.ignore_routes_with_linkdown &&
682 rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
686 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) 683 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
687 goto out; 684 goto out;
688 685
@@ -721,7 +718,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
721 718
722 match = NULL; 719 match = NULL;
723 cont = NULL; 720 cont = NULL;
724 for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) { 721 for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
725 if (rt->rt6i_metric != metric) { 722 if (rt->rt6i_metric != metric) {
726 cont = rt; 723 cont = rt;
727 break; 724 break;
@@ -731,7 +728,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
731 } 728 }
732 729
733 for (rt = leaf; rt && rt != rr_head; 730 for (rt = leaf; rt && rt != rr_head;
734 rt = rcu_dereference(rt->dst.rt6_next)) { 731 rt = rcu_dereference(rt->rt6_next)) {
735 if (rt->rt6i_metric != metric) { 732 if (rt->rt6i_metric != metric) {
736 cont = rt; 733 cont = rt;
737 break; 734 break;
@@ -743,7 +740,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
743 if (match || !cont) 740 if (match || !cont)
744 return match; 741 return match;
745 742
746 for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next)) 743 for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
747 match = find_match(rt, oif, strict, &mpri, match, do_rr); 744 match = find_match(rt, oif, strict, &mpri, match, do_rr);
748 745
749 return match; 746 return match;
@@ -781,7 +778,7 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
781 &do_rr); 778 &do_rr);
782 779
783 if (do_rr) { 780 if (do_rr) {
784 struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next); 781 struct rt6_info *next = rcu_dereference(rt0->rt6_next);
785 782
786 /* no entries matched; do round-robin */ 783 /* no entries matched; do round-robin */
787 if (!next || next->rt6i_metric != rt0->rt6i_metric) 784 if (!next || next->rt6i_metric != rt0->rt6i_metric)
@@ -1054,7 +1051,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1054 */ 1051 */
1055 1052
1056 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) 1053 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
1057 ort = (struct rt6_info *)ort->dst.from; 1054 ort = ort->from;
1058 1055
1059 rcu_read_lock(); 1056 rcu_read_lock();
1060 dev = ip6_rt_get_dev_rcu(ort); 1057 dev = ip6_rt_get_dev_rcu(ort);
@@ -1274,7 +1271,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,
1274 1271
1275 /* ort can't be a cache or pcpu route */ 1272 /* ort can't be a cache or pcpu route */
1276 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) 1273 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
1277 ort = (struct rt6_info *)ort->dst.from; 1274 ort = ort->from;
1278 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)); 1275 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1279 1276
1280 spin_lock_bh(&rt6_exception_lock); 1277 spin_lock_bh(&rt6_exception_lock);
@@ -1346,7 +1343,9 @@ out:
1346 1343
1347 /* Update fn->fn_sernum to invalidate all cached dst */ 1344 /* Update fn->fn_sernum to invalidate all cached dst */
1348 if (!err) { 1345 if (!err) {
1346 spin_lock_bh(&ort->rt6i_table->tb6_lock);
1349 fib6_update_sernum(ort); 1347 fib6_update_sernum(ort);
1348 spin_unlock_bh(&ort->rt6i_table->tb6_lock);
1350 fib6_force_start_gc(net); 1349 fib6_force_start_gc(net);
1351 } 1350 }
1352 1351
@@ -1415,8 +1414,8 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1415/* Remove the passed in cached rt from the hash table that contains it */ 1414/* Remove the passed in cached rt from the hash table that contains it */
1416int rt6_remove_exception_rt(struct rt6_info *rt) 1415int rt6_remove_exception_rt(struct rt6_info *rt)
1417{ 1416{
1418 struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1419 struct rt6_exception_bucket *bucket; 1417 struct rt6_exception_bucket *bucket;
1418 struct rt6_info *from = rt->from;
1420 struct in6_addr *src_key = NULL; 1419 struct in6_addr *src_key = NULL;
1421 struct rt6_exception *rt6_ex; 1420 struct rt6_exception *rt6_ex;
1422 int err; 1421 int err;
@@ -1460,8 +1459,8 @@ int rt6_remove_exception_rt(struct rt6_info *rt)
1460 */ 1459 */
1461static void rt6_update_exception_stamp_rt(struct rt6_info *rt) 1460static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1462{ 1461{
1463 struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1464 struct rt6_exception_bucket *bucket; 1462 struct rt6_exception_bucket *bucket;
1463 struct rt6_info *from = rt->from;
1465 struct in6_addr *src_key = NULL; 1464 struct in6_addr *src_key = NULL;
1466 struct rt6_exception *rt6_ex; 1465 struct rt6_exception *rt6_ex;
1467 1466
@@ -1586,12 +1585,19 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1586 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when 1585 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1587 * expired, independently from their aging, as per RFC 8201 section 4 1586 * expired, independently from their aging, as per RFC 8201 section 4
1588 */ 1587 */
1589 if (!(rt->rt6i_flags & RTF_EXPIRES) && 1588 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1590 time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { 1589 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1591 RT6_TRACE("aging clone %p\n", rt); 1590 RT6_TRACE("aging clone %p\n", rt);
1591 rt6_remove_exception(bucket, rt6_ex);
1592 return;
1593 }
1594 } else if (time_after(jiffies, rt->dst.expires)) {
1595 RT6_TRACE("purging expired route %p\n", rt);
1592 rt6_remove_exception(bucket, rt6_ex); 1596 rt6_remove_exception(bucket, rt6_ex);
1593 return; 1597 return;
1594 } else if (rt->rt6i_flags & RTF_GATEWAY) { 1598 }
1599
1600 if (rt->rt6i_flags & RTF_GATEWAY) {
1595 struct neighbour *neigh; 1601 struct neighbour *neigh;
1596 __u8 neigh_flags = 0; 1602 __u8 neigh_flags = 0;
1597 1603
@@ -1606,11 +1612,8 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1606 rt6_remove_exception(bucket, rt6_ex); 1612 rt6_remove_exception(bucket, rt6_ex);
1607 return; 1613 return;
1608 } 1614 }
1609 } else if (__rt6_check_expired(rt)) {
1610 RT6_TRACE("purging expired route %p\n", rt);
1611 rt6_remove_exception(bucket, rt6_ex);
1612 return;
1613 } 1615 }
1616
1614 gc_args->more++; 1617 gc_args->more++;
1615} 1618}
1616 1619
@@ -1824,10 +1827,10 @@ u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1824 1827
1825 if (skb) { 1828 if (skb) {
1826 ip6_multipath_l3_keys(skb, &hash_keys); 1829 ip6_multipath_l3_keys(skb, &hash_keys);
1827 return flow_hash_from_keys(&hash_keys); 1830 return flow_hash_from_keys(&hash_keys) >> 1;
1828 } 1831 }
1829 1832
1830 return get_hash_from_flowi6(fl6); 1833 return get_hash_from_flowi6(fl6) >> 1;
1831} 1834}
1832 1835
1833void ip6_route_input(struct sk_buff *skb) 1836void ip6_route_input(struct sk_buff *skb)
@@ -1929,9 +1932,9 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
1929 1932
1930static void rt6_dst_from_metrics_check(struct rt6_info *rt) 1933static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1931{ 1934{
1932 if (rt->dst.from && 1935 if (rt->from &&
1933 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from)) 1936 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst))
1934 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true); 1937 dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true);
1935} 1938}
1936 1939
1937static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) 1940static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
@@ -1951,7 +1954,7 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1951{ 1954{
1952 if (!__rt6_check_expired(rt) && 1955 if (!__rt6_check_expired(rt) &&
1953 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && 1956 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1954 rt6_check((struct rt6_info *)(rt->dst.from), cookie)) 1957 rt6_check(rt->from, cookie))
1955 return &rt->dst; 1958 return &rt->dst;
1956 else 1959 else
1957 return NULL; 1960 return NULL;
@@ -1971,7 +1974,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1971 rt6_dst_from_metrics_check(rt); 1974 rt6_dst_from_metrics_check(rt);
1972 1975
1973 if (rt->rt6i_flags & RTF_PCPU || 1976 if (rt->rt6i_flags & RTF_PCPU ||
1974 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from)) 1977 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
1975 return rt6_dst_from_check(rt, cookie); 1978 return rt6_dst_from_check(rt, cookie);
1976 else 1979 else
1977 return rt6_check(rt, cookie); 1980 return rt6_check(rt, cookie);
@@ -2154,6 +2157,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
2154 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 2157 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2155restart: 2158restart:
2156 for_each_fib6_node_rt_rcu(fn) { 2159 for_each_fib6_node_rt_rcu(fn) {
2160 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
2161 continue;
2157 if (rt6_check_expired(rt)) 2162 if (rt6_check_expired(rt))
2158 continue; 2163 continue;
2159 if (rt->dst.error) 2164 if (rt->dst.error)
@@ -2344,7 +2349,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2344 rt->rt6i_idev = idev; 2349 rt->rt6i_idev = idev;
2345 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0); 2350 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
2346 2351
2347 /* Add this dst into uncached_list so that rt6_ifdown() can 2352 /* Add this dst into uncached_list so that rt6_disable_ip() can
2348 * do proper release of the net_device 2353 * do proper release of the net_device
2349 */ 2354 */
2350 rt6_uncached_list_add(rt); 2355 rt6_uncached_list_add(rt);
@@ -2439,7 +2444,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
2439 2444
2440static struct rt6_info *ip6_nh_lookup_table(struct net *net, 2445static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2441 struct fib6_config *cfg, 2446 struct fib6_config *cfg,
2442 const struct in6_addr *gw_addr) 2447 const struct in6_addr *gw_addr,
2448 u32 tbid, int flags)
2443{ 2449{
2444 struct flowi6 fl6 = { 2450 struct flowi6 fl6 = {
2445 .flowi6_oif = cfg->fc_ifindex, 2451 .flowi6_oif = cfg->fc_ifindex,
@@ -2448,15 +2454,15 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2448 }; 2454 };
2449 struct fib6_table *table; 2455 struct fib6_table *table;
2450 struct rt6_info *rt; 2456 struct rt6_info *rt;
2451 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
2452 2457
2453 table = fib6_get_table(net, cfg->fc_table); 2458 table = fib6_get_table(net, tbid);
2454 if (!table) 2459 if (!table)
2455 return NULL; 2460 return NULL;
2456 2461
2457 if (!ipv6_addr_any(&cfg->fc_prefsrc)) 2462 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2458 flags |= RT6_LOOKUP_F_HAS_SADDR; 2463 flags |= RT6_LOOKUP_F_HAS_SADDR;
2459 2464
2465 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
2460 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags); 2466 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
2461 2467
2462 /* if table lookup failed, fall back to full lookup */ 2468 /* if table lookup failed, fall back to full lookup */
@@ -2468,6 +2474,84 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2468 return rt; 2474 return rt;
2469} 2475}
2470 2476
2477static int ip6_route_check_nh_onlink(struct net *net,
2478 struct fib6_config *cfg,
2479 struct net_device *dev,
2480 struct netlink_ext_ack *extack)
2481{
2482 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
2483 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2484 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2485 struct rt6_info *grt;
2486 int err;
2487
2488 err = 0;
2489 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2490 if (grt) {
2491 if (!grt->dst.error &&
2492 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
2493 NL_SET_ERR_MSG(extack,
2494 "Nexthop has invalid gateway or device mismatch");
2495 err = -EINVAL;
2496 }
2497
2498 ip6_rt_put(grt);
2499 }
2500
2501 return err;
2502}
2503
2504static int ip6_route_check_nh(struct net *net,
2505 struct fib6_config *cfg,
2506 struct net_device **_dev,
2507 struct inet6_dev **idev)
2508{
2509 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2510 struct net_device *dev = _dev ? *_dev : NULL;
2511 struct rt6_info *grt = NULL;
2512 int err = -EHOSTUNREACH;
2513
2514 if (cfg->fc_table) {
2515 int flags = RT6_LOOKUP_F_IFACE;
2516
2517 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2518 cfg->fc_table, flags);
2519 if (grt) {
2520 if (grt->rt6i_flags & RTF_GATEWAY ||
2521 (dev && dev != grt->dst.dev)) {
2522 ip6_rt_put(grt);
2523 grt = NULL;
2524 }
2525 }
2526 }
2527
2528 if (!grt)
2529 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
2530
2531 if (!grt)
2532 goto out;
2533
2534 if (dev) {
2535 if (dev != grt->dst.dev) {
2536 ip6_rt_put(grt);
2537 goto out;
2538 }
2539 } else {
2540 *_dev = dev = grt->dst.dev;
2541 *idev = grt->rt6i_idev;
2542 dev_hold(dev);
2543 in6_dev_hold(grt->rt6i_idev);
2544 }
2545
2546 if (!(grt->rt6i_flags & RTF_GATEWAY))
2547 err = 0;
2548
2549 ip6_rt_put(grt);
2550
2551out:
2552 return err;
2553}
2554
2471static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, 2555static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2472 struct netlink_ext_ack *extack) 2556 struct netlink_ext_ack *extack)
2473{ 2557{
@@ -2519,6 +2603,21 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2519 if (cfg->fc_metric == 0) 2603 if (cfg->fc_metric == 0)
2520 cfg->fc_metric = IP6_RT_PRIO_USER; 2604 cfg->fc_metric = IP6_RT_PRIO_USER;
2521 2605
2606 if (cfg->fc_flags & RTNH_F_ONLINK) {
2607 if (!dev) {
2608 NL_SET_ERR_MSG(extack,
2609 "Nexthop device required for onlink");
2610 err = -ENODEV;
2611 goto out;
2612 }
2613
2614 if (!(dev->flags & IFF_UP)) {
2615 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2616 err = -ENETDOWN;
2617 goto out;
2618 }
2619 }
2620
2522 err = -ENOBUFS; 2621 err = -ENOBUFS;
2523 if (cfg->fc_nlinfo.nlh && 2622 if (cfg->fc_nlinfo.nlh &&
2524 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { 2623 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
@@ -2593,6 +2692,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2593#endif 2692#endif
2594 2693
2595 rt->rt6i_metric = cfg->fc_metric; 2694 rt->rt6i_metric = cfg->fc_metric;
2695 rt->rt6i_nh_weight = 1;
2596 2696
2597 /* We cannot add true routes via loopback here, 2697 /* We cannot add true routes via loopback here,
2598 they would result in kernel looping; promote them to reject routes 2698 they would result in kernel looping; promote them to reject routes
@@ -2662,8 +2762,6 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2662 rt->rt6i_gateway = *gw_addr; 2762 rt->rt6i_gateway = *gw_addr;
2663 2763
2664 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 2764 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
2665 struct rt6_info *grt = NULL;
2666
2667 /* IPv6 strictly inhibits using not link-local 2765 /* IPv6 strictly inhibits using not link-local
2668 addresses as nexthop address. 2766 addresses as nexthop address.
2669 Otherwise, router will not able to send redirects. 2767 Otherwise, router will not able to send redirects.
@@ -2680,40 +2778,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2680 goto out; 2778 goto out;
2681 } 2779 }
2682 2780
2683 if (cfg->fc_table) { 2781 if (cfg->fc_flags & RTNH_F_ONLINK) {
2684 grt = ip6_nh_lookup_table(net, cfg, gw_addr); 2782 err = ip6_route_check_nh_onlink(net, cfg, dev,
2685 2783 extack);
2686 if (grt) {
2687 if (grt->rt6i_flags & RTF_GATEWAY ||
2688 (dev && dev != grt->dst.dev)) {
2689 ip6_rt_put(grt);
2690 grt = NULL;
2691 }
2692 }
2693 }
2694
2695 if (!grt)
2696 grt = rt6_lookup(net, gw_addr, NULL,
2697 cfg->fc_ifindex, 1);
2698
2699 err = -EHOSTUNREACH;
2700 if (!grt)
2701 goto out;
2702 if (dev) {
2703 if (dev != grt->dst.dev) {
2704 ip6_rt_put(grt);
2705 goto out;
2706 }
2707 } else { 2784 } else {
2708 dev = grt->dst.dev; 2785 err = ip6_route_check_nh(net, cfg, &dev, &idev);
2709 idev = grt->rt6i_idev;
2710 dev_hold(dev);
2711 in6_dev_hold(grt->rt6i_idev);
2712 } 2786 }
2713 if (!(grt->rt6i_flags & RTF_GATEWAY))
2714 err = 0;
2715 ip6_rt_put(grt);
2716
2717 if (err) 2787 if (err)
2718 goto out; 2788 goto out;
2719 } 2789 }
@@ -2732,6 +2802,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2732 if (!dev) 2802 if (!dev)
2733 goto out; 2803 goto out;
2734 2804
2805 if (!(dev->flags & IFF_UP)) {
2806 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2807 err = -ENETDOWN;
2808 goto out;
2809 }
2810
2735 if (!ipv6_addr_any(&cfg->fc_prefsrc)) { 2811 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2736 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { 2812 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2737 NL_SET_ERR_MSG(extack, "Invalid source address"); 2813 NL_SET_ERR_MSG(extack, "Invalid source address");
@@ -2746,6 +2822,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2746 rt->rt6i_flags = cfg->fc_flags; 2822 rt->rt6i_flags = cfg->fc_flags;
2747 2823
2748install_route: 2824install_route:
2825 if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
2826 !netif_carrier_ok(dev))
2827 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
2828 rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
2749 rt->dst.dev = dev; 2829 rt->dst.dev = dev;
2750 rt->rt6i_idev = idev; 2830 rt->rt6i_idev = idev;
2751 rt->rt6i_table = table; 2831 rt->rt6i_table = table;
@@ -3056,11 +3136,11 @@ out:
3056 3136
3057static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) 3137static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
3058{ 3138{
3059 BUG_ON(from->dst.from); 3139 BUG_ON(from->from);
3060 3140
3061 rt->rt6i_flags &= ~RTF_EXPIRES; 3141 rt->rt6i_flags &= ~RTF_EXPIRES;
3062 dst_hold(&from->dst); 3142 dst_hold(&from->dst);
3063 rt->dst.from = &from->dst; 3143 rt->from = from;
3064 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); 3144 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
3065} 3145}
3066 3146
@@ -3459,37 +3539,249 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3459 fib6_clean_all(net, fib6_clean_tohost, gateway); 3539 fib6_clean_all(net, fib6_clean_tohost, gateway);
3460} 3540}
3461 3541
3462struct arg_dev_net { 3542struct arg_netdev_event {
3463 struct net_device *dev; 3543 const struct net_device *dev;
3464 struct net *net; 3544 union {
3545 unsigned int nh_flags;
3546 unsigned long event;
3547 };
3465}; 3548};
3466 3549
3550static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
3551{
3552 struct rt6_info *iter;
3553 struct fib6_node *fn;
3554
3555 fn = rcu_dereference_protected(rt->rt6i_node,
3556 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3557 iter = rcu_dereference_protected(fn->leaf,
3558 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3559 while (iter) {
3560 if (iter->rt6i_metric == rt->rt6i_metric &&
3561 rt6_qualify_for_ecmp(iter))
3562 return iter;
3563 iter = rcu_dereference_protected(iter->rt6_next,
3564 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3565 }
3566
3567 return NULL;
3568}
3569
3570static bool rt6_is_dead(const struct rt6_info *rt)
3571{
3572 if (rt->rt6i_nh_flags & RTNH_F_DEAD ||
3573 (rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
3574 rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3575 return true;
3576
3577 return false;
3578}
3579
3580static int rt6_multipath_total_weight(const struct rt6_info *rt)
3581{
3582 struct rt6_info *iter;
3583 int total = 0;
3584
3585 if (!rt6_is_dead(rt))
3586 total += rt->rt6i_nh_weight;
3587
3588 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
3589 if (!rt6_is_dead(iter))
3590 total += iter->rt6i_nh_weight;
3591 }
3592
3593 return total;
3594}
3595
3596static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
3597{
3598 int upper_bound = -1;
3599
3600 if (!rt6_is_dead(rt)) {
3601 *weight += rt->rt6i_nh_weight;
3602 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3603 total) - 1;
3604 }
3605 atomic_set(&rt->rt6i_nh_upper_bound, upper_bound);
3606}
3607
3608static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
3609{
3610 struct rt6_info *iter;
3611 int weight = 0;
3612
3613 rt6_upper_bound_set(rt, &weight, total);
3614
3615 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3616 rt6_upper_bound_set(iter, &weight, total);
3617}
3618
3619void rt6_multipath_rebalance(struct rt6_info *rt)
3620{
3621 struct rt6_info *first;
3622 int total;
3623
3624 /* In case the entire multipath route was marked for flushing,
3625 * then there is no need to rebalance upon the removal of every
3626 * sibling route.
3627 */
3628 if (!rt->rt6i_nsiblings || rt->should_flush)
3629 return;
3630
3631 /* During lookup routes are evaluated in order, so we need to
3632 * make sure upper bounds are assigned from the first sibling
3633 * onwards.
3634 */
3635 first = rt6_multipath_first_sibling(rt);
3636 if (WARN_ON_ONCE(!first))
3637 return;
3638
3639 total = rt6_multipath_total_weight(first);
3640 rt6_multipath_upper_bound_set(first, total);
3641}
3642
3643static int fib6_ifup(struct rt6_info *rt, void *p_arg)
3644{
3645 const struct arg_netdev_event *arg = p_arg;
3646 const struct net *net = dev_net(arg->dev);
3647
3648 if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
3649 rt->rt6i_nh_flags &= ~arg->nh_flags;
3650 fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
3651 rt6_multipath_rebalance(rt);
3652 }
3653
3654 return 0;
3655}
3656
3657void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3658{
3659 struct arg_netdev_event arg = {
3660 .dev = dev,
3661 {
3662 .nh_flags = nh_flags,
3663 },
3664 };
3665
3666 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3667 arg.nh_flags |= RTNH_F_LINKDOWN;
3668
3669 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3670}
3671
3672static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
3673 const struct net_device *dev)
3674{
3675 struct rt6_info *iter;
3676
3677 if (rt->dst.dev == dev)
3678 return true;
3679 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3680 if (iter->dst.dev == dev)
3681 return true;
3682
3683 return false;
3684}
3685
3686static void rt6_multipath_flush(struct rt6_info *rt)
3687{
3688 struct rt6_info *iter;
3689
3690 rt->should_flush = 1;
3691 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3692 iter->should_flush = 1;
3693}
3694
3695static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
3696 const struct net_device *down_dev)
3697{
3698 struct rt6_info *iter;
3699 unsigned int dead = 0;
3700
3701 if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD)
3702 dead++;
3703 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3704 if (iter->dst.dev == down_dev ||
3705 iter->rt6i_nh_flags & RTNH_F_DEAD)
3706 dead++;
3707
3708 return dead;
3709}
3710
3711static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
3712 const struct net_device *dev,
3713 unsigned int nh_flags)
3714{
3715 struct rt6_info *iter;
3716
3717 if (rt->dst.dev == dev)
3718 rt->rt6i_nh_flags |= nh_flags;
3719 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3720 if (iter->dst.dev == dev)
3721 iter->rt6i_nh_flags |= nh_flags;
3722}
3723
3467/* called with write lock held for table with rt */ 3724/* called with write lock held for table with rt */
3468static int fib6_ifdown(struct rt6_info *rt, void *arg) 3725static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
3469{ 3726{
3470 const struct arg_dev_net *adn = arg; 3727 const struct arg_netdev_event *arg = p_arg;
3471 const struct net_device *dev = adn->dev; 3728 const struct net_device *dev = arg->dev;
3729 const struct net *net = dev_net(dev);
3472 3730
3473 if ((rt->dst.dev == dev || !dev) && 3731 if (rt == net->ipv6.ip6_null_entry)
3474 rt != adn->net->ipv6.ip6_null_entry && 3732 return 0;
3475 (rt->rt6i_nsiblings == 0 || 3733
3476 (dev && netdev_unregistering(dev)) || 3734 switch (arg->event) {
3477 !rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) 3735 case NETDEV_UNREGISTER:
3478 return -1; 3736 return rt->dst.dev == dev ? -1 : 0;
3737 case NETDEV_DOWN:
3738 if (rt->should_flush)
3739 return -1;
3740 if (!rt->rt6i_nsiblings)
3741 return rt->dst.dev == dev ? -1 : 0;
3742 if (rt6_multipath_uses_dev(rt, dev)) {
3743 unsigned int count;
3744
3745 count = rt6_multipath_dead_count(rt, dev);
3746 if (rt->rt6i_nsiblings + 1 == count) {
3747 rt6_multipath_flush(rt);
3748 return -1;
3749 }
3750 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3751 RTNH_F_LINKDOWN);
3752 fib6_update_sernum(rt);
3753 rt6_multipath_rebalance(rt);
3754 }
3755 return -2;
3756 case NETDEV_CHANGE:
3757 if (rt->dst.dev != dev ||
3758 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
3759 break;
3760 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
3761 rt6_multipath_rebalance(rt);
3762 break;
3763 }
3479 3764
3480 return 0; 3765 return 0;
3481} 3766}
3482 3767
3483void rt6_ifdown(struct net *net, struct net_device *dev) 3768void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
3484{ 3769{
3485 struct arg_dev_net adn = { 3770 struct arg_netdev_event arg = {
3486 .dev = dev, 3771 .dev = dev,
3487 .net = net, 3772 {
3773 .event = event,
3774 },
3488 }; 3775 };
3489 3776
3490 fib6_clean_all(net, fib6_ifdown, &adn); 3777 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
3491 if (dev) 3778}
3492 rt6_uncached_list_flush_dev(net, dev); 3779
3780void rt6_disable_ip(struct net_device *dev, unsigned long event)
3781{
3782 rt6_sync_down_dev(dev, event);
3783 rt6_uncached_list_flush_dev(dev_net(dev), dev);
3784 neigh_ifdown(&nd_tbl, dev);
3493} 3785}
3494 3786
3495struct rt6_mtu_change_arg { 3787struct rt6_mtu_change_arg {
@@ -3603,6 +3895,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
3603 if (rtm->rtm_flags & RTM_F_CLONED) 3895 if (rtm->rtm_flags & RTM_F_CLONED)
3604 cfg->fc_flags |= RTF_CACHE; 3896 cfg->fc_flags |= RTF_CACHE;
3605 3897
3898 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
3899
3606 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; 3900 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
3607 cfg->fc_nlinfo.nlh = nlh; 3901 cfg->fc_nlinfo.nlh = nlh;
3608 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 3902 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
@@ -3812,6 +4106,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
3812 goto cleanup; 4106 goto cleanup;
3813 } 4107 }
3814 4108
4109 rt->rt6i_nh_weight = rtnh->rtnh_hops + 1;
4110
3815 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); 4111 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3816 if (err) { 4112 if (err) {
3817 dst_release_immediate(&rt->dst); 4113 dst_release_immediate(&rt->dst);
@@ -3992,7 +4288,10 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
3992static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, 4288static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
3993 unsigned int *flags, bool skip_oif) 4289 unsigned int *flags, bool skip_oif)
3994{ 4290{
3995 if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) { 4291 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
4292 *flags |= RTNH_F_DEAD;
4293
4294 if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) {
3996 *flags |= RTNH_F_LINKDOWN; 4295 *flags |= RTNH_F_LINKDOWN;
3997 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) 4296 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3998 *flags |= RTNH_F_DEAD; 4297 *flags |= RTNH_F_DEAD;
@@ -4003,6 +4302,7 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
4003 goto nla_put_failure; 4302 goto nla_put_failure;
4004 } 4303 }
4005 4304
4305 *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK);
4006 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD) 4306 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
4007 *flags |= RTNH_F_OFFLOAD; 4307 *flags |= RTNH_F_OFFLOAD;
4008 4308
@@ -4031,7 +4331,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4031 if (!rtnh) 4331 if (!rtnh)
4032 goto nla_put_failure; 4332 goto nla_put_failure;
4033 4333
4034 rtnh->rtnh_hops = 0; 4334 rtnh->rtnh_hops = rt->rt6i_nh_weight - 1;
4035 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0; 4335 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
4036 4336
4037 if (rt6_nexthop_info(skb, rt, &flags, true) < 0) 4337 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
@@ -4321,9 +4621,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4321 goto errout; 4621 goto errout;
4322 } 4622 }
4323 4623
4324 if (fibmatch && rt->dst.from) { 4624 if (fibmatch && rt->from) {
4325 struct rt6_info *ort = container_of(rt->dst.from, 4625 struct rt6_info *ort = rt->from;
4326 struct rt6_info, dst);
4327 4626
4328 dst_hold(&ort->dst); 4627 dst_hold(&ort->dst);
4329 ip6_rt_put(rt); 4628 ip6_rt_put(rt);
@@ -4427,7 +4726,6 @@ static int ip6_route_dev_notify(struct notifier_block *this,
4427#ifdef CONFIG_PROC_FS 4726#ifdef CONFIG_PROC_FS
4428 4727
4429static const struct file_operations ipv6_route_proc_fops = { 4728static const struct file_operations ipv6_route_proc_fops = {
4430 .owner = THIS_MODULE,
4431 .open = ipv6_route_open, 4729 .open = ipv6_route_open,
4432 .read = seq_read, 4730 .read = seq_read,
4433 .llseek = seq_lseek, 4731 .llseek = seq_lseek,
@@ -4455,7 +4753,6 @@ static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4455} 4753}
4456 4754
4457static const struct file_operations rt6_stats_seq_fops = { 4755static const struct file_operations rt6_stats_seq_fops = {
4458 .owner = THIS_MODULE,
4459 .open = rt6_stats_seq_open, 4756 .open = rt6_stats_seq_open,
4460 .read = seq_read, 4757 .read = seq_read,
4461 .llseek = seq_lseek, 4758 .llseek = seq_lseek,
@@ -4600,8 +4897,6 @@ static int __net_init ip6_route_net_init(struct net *net)
4600 GFP_KERNEL); 4897 GFP_KERNEL);
4601 if (!net->ipv6.ip6_null_entry) 4898 if (!net->ipv6.ip6_null_entry)
4602 goto out_ip6_dst_entries; 4899 goto out_ip6_dst_entries;
4603 net->ipv6.ip6_null_entry->dst.path =
4604 (struct dst_entry *)net->ipv6.ip6_null_entry;
4605 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 4900 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4606 dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 4901 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4607 ip6_template_metrics, true); 4902 ip6_template_metrics, true);
@@ -4613,8 +4908,6 @@ static int __net_init ip6_route_net_init(struct net *net)
4613 GFP_KERNEL); 4908 GFP_KERNEL);
4614 if (!net->ipv6.ip6_prohibit_entry) 4909 if (!net->ipv6.ip6_prohibit_entry)
4615 goto out_ip6_null_entry; 4910 goto out_ip6_null_entry;
4616 net->ipv6.ip6_prohibit_entry->dst.path =
4617 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
4618 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 4911 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4619 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 4912 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4620 ip6_template_metrics, true); 4913 ip6_template_metrics, true);
@@ -4624,8 +4917,6 @@ static int __net_init ip6_route_net_init(struct net *net)
4624 GFP_KERNEL); 4917 GFP_KERNEL);
4625 if (!net->ipv6.ip6_blk_hole_entry) 4918 if (!net->ipv6.ip6_blk_hole_entry)
4626 goto out_ip6_prohibit_entry; 4919 goto out_ip6_prohibit_entry;
4627 net->ipv6.ip6_blk_hole_entry->dst.path =
4628 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
4629 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 4920 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4630 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 4921 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4631 ip6_template_metrics, true); 4922 ip6_template_metrics, true);
@@ -4782,11 +5073,20 @@ int __init ip6_route_init(void)
4782 if (ret) 5073 if (ret)
4783 goto fib6_rules_init; 5074 goto fib6_rules_init;
4784 5075
4785 ret = -ENOBUFS; 5076 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
4786 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) || 5077 inet6_rtm_newroute, NULL, 0);
4787 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) || 5078 if (ret < 0)
4788 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, 5079 goto out_register_late_subsys;
4789 RTNL_FLAG_DOIT_UNLOCKED)) 5080
5081 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5082 inet6_rtm_delroute, NULL, 0);
5083 if (ret < 0)
5084 goto out_register_late_subsys;
5085
5086 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5087 inet6_rtm_getroute, NULL,
5088 RTNL_FLAG_DOIT_UNLOCKED);
5089 if (ret < 0)
4790 goto out_register_late_subsys; 5090 goto out_register_late_subsys;
4791 5091
4792 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 5092 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
@@ -4804,6 +5104,7 @@ out:
4804 return ret; 5104 return ret;
4805 5105
4806out_register_late_subsys: 5106out_register_late_subsys:
5107 rtnl_unregister_all(PF_INET6);
4807 unregister_pernet_subsys(&ip6_route_net_late_ops); 5108 unregister_pernet_subsys(&ip6_route_net_late_ops);
4808fib6_rules_init: 5109fib6_rules_init:
4809 fib6_rules_cleanup(); 5110 fib6_rules_cleanup();
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index c81407770956..7f5621d09571 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -306,9 +306,7 @@ static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb)
306 struct seg6_hmac_info *hinfo; 306 struct seg6_hmac_info *hinfo;
307 int ret; 307 int ret;
308 308
309 ret = rhashtable_walk_start(iter); 309 rhashtable_walk_start(iter);
310 if (ret && ret != -EAGAIN)
311 goto done;
312 310
313 for (;;) { 311 for (;;) {
314 hinfo = rhashtable_walk_next(iter); 312 hinfo = rhashtable_walk_next(iter);
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 825b8e01f947..ba3767ef5e93 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -501,7 +501,7 @@ static struct seg6_action_desc *__get_action_desc(int action)
501 struct seg6_action_desc *desc; 501 struct seg6_action_desc *desc;
502 int i, count; 502 int i, count;
503 503
504 count = sizeof(seg6_action_table) / sizeof(struct seg6_action_desc); 504 count = ARRAY_SIZE(seg6_action_table);
505 for (i = 0; i < count; i++) { 505 for (i = 0; i < count; i++) {
506 desc = &seg6_action_table[i]; 506 desc = &seg6_action_table[i];
507 if (desc->action == action) 507 if (desc->action == action)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7178476b3d2f..412139f4eccd 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -176,8 +176,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
176 /* If interface is set while binding, indices 176 /* If interface is set while binding, indices
177 * must coincide. 177 * must coincide.
178 */ 178 */
179 if (sk->sk_bound_dev_if && 179 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
180 sk->sk_bound_dev_if != usin->sin6_scope_id)
181 return -EINVAL; 180 return -EINVAL;
182 181
183 sk->sk_bound_dev_if = usin->sin6_scope_id; 182 sk->sk_bound_dev_if = usin->sin6_scope_id;
@@ -943,7 +942,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
943 942
944 if (sk) { 943 if (sk) {
945 oif = sk->sk_bound_dev_if; 944 oif = sk->sk_bound_dev_if;
946 trace_tcp_send_reset(sk, skb); 945 if (sk_fullsock(sk))
946 trace_tcp_send_reset(sk, skb);
947 } 947 }
948 948
949 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); 949 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
@@ -1795,7 +1795,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1795 timer_expires = jiffies; 1795 timer_expires = jiffies;
1796 } 1796 }
1797 1797
1798 state = sk_state_load(sp); 1798 state = inet_sk_state_load(sp);
1799 if (state == TCP_LISTEN) 1799 if (state == TCP_LISTEN)
1800 rx_queue = sp->sk_ack_backlog; 1800 rx_queue = sp->sk_ack_backlog;
1801 else 1801 else
@@ -1884,7 +1884,6 @@ out:
1884} 1884}
1885 1885
1886static const struct file_operations tcp6_afinfo_seq_fops = { 1886static const struct file_operations tcp6_afinfo_seq_fops = {
1887 .owner = THIS_MODULE,
1888 .open = tcp_seq_open, 1887 .open = tcp_seq_open,
1889 .read = seq_read, 1888 .read = seq_read,
1890 .llseek = seq_lseek, 1889 .llseek = seq_lseek,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3f30fa313bf2..52e3ea0e6f50 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -89,28 +89,12 @@ static u32 udp6_ehashfn(const struct net *net,
89 udp_ipv6_hash_secret + net_hash_mix(net)); 89 udp_ipv6_hash_secret + net_hash_mix(net));
90} 90}
91 91
92static u32 udp6_portaddr_hash(const struct net *net,
93 const struct in6_addr *addr6,
94 unsigned int port)
95{
96 unsigned int hash, mix = net_hash_mix(net);
97
98 if (ipv6_addr_any(addr6))
99 hash = jhash_1word(0, mix);
100 else if (ipv6_addr_v4mapped(addr6))
101 hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
102 else
103 hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
104
105 return hash ^ port;
106}
107
108int udp_v6_get_port(struct sock *sk, unsigned short snum) 92int udp_v6_get_port(struct sock *sk, unsigned short snum)
109{ 93{
110 unsigned int hash2_nulladdr = 94 unsigned int hash2_nulladdr =
111 udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum); 95 ipv6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
112 unsigned int hash2_partial = 96 unsigned int hash2_partial =
113 udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0); 97 ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
114 98
115 /* precompute partial secondary hash */ 99 /* precompute partial secondary hash */
116 udp_sk(sk)->udp_portaddr_hash = hash2_partial; 100 udp_sk(sk)->udp_portaddr_hash = hash2_partial;
@@ -119,7 +103,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
119 103
120static void udp_v6_rehash(struct sock *sk) 104static void udp_v6_rehash(struct sock *sk)
121{ 105{
122 u16 new_hash = udp6_portaddr_hash(sock_net(sk), 106 u16 new_hash = ipv6_portaddr_hash(sock_net(sk),
123 &sk->sk_v6_rcv_saddr, 107 &sk->sk_v6_rcv_saddr,
124 inet_sk(sk)->inet_num); 108 inet_sk(sk)->inet_num);
125 109
@@ -184,7 +168,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
184 struct udp_hslot *hslot2, struct sk_buff *skb) 168 struct udp_hslot *hslot2, struct sk_buff *skb)
185{ 169{
186 struct sock *sk, *result; 170 struct sock *sk, *result;
187 int score, badness, matches = 0, reuseport = 0; 171 int score, badness;
188 u32 hash = 0; 172 u32 hash = 0;
189 173
190 result = NULL; 174 result = NULL;
@@ -193,8 +177,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
193 score = compute_score(sk, net, saddr, sport, 177 score = compute_score(sk, net, saddr, sport,
194 daddr, hnum, dif, sdif, exact_dif); 178 daddr, hnum, dif, sdif, exact_dif);
195 if (score > badness) { 179 if (score > badness) {
196 reuseport = sk->sk_reuseport; 180 if (sk->sk_reuseport) {
197 if (reuseport) {
198 hash = udp6_ehashfn(net, daddr, hnum, 181 hash = udp6_ehashfn(net, daddr, hnum,
199 saddr, sport); 182 saddr, sport);
200 183
@@ -202,15 +185,9 @@ static struct sock *udp6_lib_lookup2(struct net *net,
202 sizeof(struct udphdr)); 185 sizeof(struct udphdr));
203 if (result) 186 if (result)
204 return result; 187 return result;
205 matches = 1;
206 } 188 }
207 result = sk; 189 result = sk;
208 badness = score; 190 badness = score;
209 } else if (score == badness && reuseport) {
210 matches++;
211 if (reciprocal_scale(hash, matches) == 0)
212 result = sk;
213 hash = next_pseudo_random32(hash);
214 } 191 }
215 } 192 }
216 return result; 193 return result;
@@ -228,11 +205,11 @@ struct sock *__udp6_lib_lookup(struct net *net,
228 unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); 205 unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
229 struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; 206 struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
230 bool exact_dif = udp6_lib_exact_dif_match(net, skb); 207 bool exact_dif = udp6_lib_exact_dif_match(net, skb);
231 int score, badness, matches = 0, reuseport = 0; 208 int score, badness;
232 u32 hash = 0; 209 u32 hash = 0;
233 210
234 if (hslot->count > 10) { 211 if (hslot->count > 10) {
235 hash2 = udp6_portaddr_hash(net, daddr, hnum); 212 hash2 = ipv6_portaddr_hash(net, daddr, hnum);
236 slot2 = hash2 & udptable->mask; 213 slot2 = hash2 & udptable->mask;
237 hslot2 = &udptable->hash2[slot2]; 214 hslot2 = &udptable->hash2[slot2];
238 if (hslot->count < hslot2->count) 215 if (hslot->count < hslot2->count)
@@ -243,7 +220,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
243 hslot2, skb); 220 hslot2, skb);
244 if (!result) { 221 if (!result) {
245 unsigned int old_slot2 = slot2; 222 unsigned int old_slot2 = slot2;
246 hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum); 223 hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
247 slot2 = hash2 & udptable->mask; 224 slot2 = hash2 & udptable->mask;
248 /* avoid searching the same slot again. */ 225 /* avoid searching the same slot again. */
249 if (unlikely(slot2 == old_slot2)) 226 if (unlikely(slot2 == old_slot2))
@@ -267,23 +244,16 @@ begin:
267 score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, 244 score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
268 sdif, exact_dif); 245 sdif, exact_dif);
269 if (score > badness) { 246 if (score > badness) {
270 reuseport = sk->sk_reuseport; 247 if (sk->sk_reuseport) {
271 if (reuseport) {
272 hash = udp6_ehashfn(net, daddr, hnum, 248 hash = udp6_ehashfn(net, daddr, hnum,
273 saddr, sport); 249 saddr, sport);
274 result = reuseport_select_sock(sk, hash, skb, 250 result = reuseport_select_sock(sk, hash, skb,
275 sizeof(struct udphdr)); 251 sizeof(struct udphdr));
276 if (result) 252 if (result)
277 return result; 253 return result;
278 matches = 1;
279 } 254 }
280 result = sk; 255 result = sk;
281 badness = score; 256 badness = score;
282 } else if (score == badness && reuseport) {
283 matches++;
284 if (reciprocal_scale(hash, matches) == 0)
285 result = sk;
286 hash = next_pseudo_random32(hash);
287 } 257 }
288 } 258 }
289 return result; 259 return result;
@@ -719,9 +689,9 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
719 struct sk_buff *nskb; 689 struct sk_buff *nskb;
720 690
721 if (use_hash2) { 691 if (use_hash2) {
722 hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) & 692 hash2_any = ipv6_portaddr_hash(net, &in6addr_any, hnum) &
723 udptable->mask; 693 udptable->mask;
724 hash2 = udp6_portaddr_hash(net, daddr, hnum) & udptable->mask; 694 hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask;
725start_lookup: 695start_lookup:
726 hslot = &udptable->hash2[hash2]; 696 hslot = &udptable->hash2[hash2];
727 offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); 697 offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
@@ -909,7 +879,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
909 int dif, int sdif) 879 int dif, int sdif)
910{ 880{
911 unsigned short hnum = ntohs(loc_port); 881 unsigned short hnum = ntohs(loc_port);
912 unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum); 882 unsigned int hash2 = ipv6_portaddr_hash(net, loc_addr, hnum);
913 unsigned int slot2 = hash2 & udp_table.mask; 883 unsigned int slot2 = hash2 & udp_table.mask;
914 struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; 884 struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
915 const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); 885 const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
@@ -1509,7 +1479,6 @@ int udp6_seq_show(struct seq_file *seq, void *v)
1509} 1479}
1510 1480
1511static const struct file_operations udp6_afinfo_seq_fops = { 1481static const struct file_operations udp6_afinfo_seq_fops = {
1512 .owner = THIS_MODULE,
1513 .open = udp_seq_open, 1482 .open = udp_seq_open,
1514 .read = seq_read, 1483 .read = seq_read,
1515 .llseek = seq_lseek, 1484 .llseek = seq_lseek,
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 2784cc363f2b..14ae32bb1f3d 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -94,7 +94,6 @@ void udplitev6_exit(void)
94#ifdef CONFIG_PROC_FS 94#ifdef CONFIG_PROC_FS
95 95
96static const struct file_operations udplite6_afinfo_seq_fops = { 96static const struct file_operations udplite6_afinfo_seq_fops = {
97 .owner = THIS_MODULE,
98 .open = udp_seq_open, 97 .open = udp_seq_open,
99 .read = seq_read, 98 .read = seq_read,
100 .llseek = seq_lseek, 99 .llseek = seq_lseek,
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index dc93002ff9d1..bb935a3b7fea 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -59,7 +59,7 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
59 if (x->props.flags & XFRM_STATE_NOECN) 59 if (x->props.flags & XFRM_STATE_NOECN)
60 dsfield &= ~INET_ECN_MASK; 60 dsfield &= ~INET_ECN_MASK;
61 ipv6_change_dsfield(top_iph, 0, dsfield); 61 ipv6_change_dsfield(top_iph, 0, dsfield);
62 top_iph->hop_limit = ip6_dst_hoplimit(dst->child); 62 top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst));
63 top_iph->saddr = *(struct in6_addr *)&x->props.saddr; 63 top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
64 top_iph->daddr = *(struct in6_addr *)&x->id.daddr; 64 top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
65 return 0; 65 return 0;
@@ -106,17 +106,14 @@ static struct sk_buff *xfrm6_mode_tunnel_gso_segment(struct xfrm_state *x,
106{ 106{
107 __skb_push(skb, skb->mac_len); 107 __skb_push(skb, skb->mac_len);
108 return skb_mac_gso_segment(skb, features); 108 return skb_mac_gso_segment(skb, features);
109
110} 109}
111 110
112static void xfrm6_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb) 111static void xfrm6_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb)
113{ 112{
114 struct xfrm_offload *xo = xfrm_offload(skb); 113 struct xfrm_offload *xo = xfrm_offload(skb);
115 114
116 if (xo->flags & XFRM_GSO_SEGMENT) { 115 if (xo->flags & XFRM_GSO_SEGMENT)
117 skb->network_header = skb->network_header - x->props.header_len;
118 skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); 116 skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
119 }
120 117
121 skb_reset_mac_len(skb); 118 skb_reset_mac_len(skb);
122 pskb_pull(skb, skb->mac_len + x->props.header_len); 119 pskb_pull(skb, skb->mac_len + x->props.header_len);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 885ade234a49..09fb44ee3b45 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -265,7 +265,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
265 in6_dev_put(xdst->u.rt6.rt6i_idev); 265 in6_dev_put(xdst->u.rt6.rt6i_idev);
266 xdst->u.rt6.rt6i_idev = loopback_idev; 266 xdst->u.rt6.rt6i_idev = loopback_idev;
267 in6_dev_hold(loopback_idev); 267 in6_dev_hold(loopback_idev);
268 xdst = (struct xfrm_dst *)xdst->u.dst.child; 268 xdst = (struct xfrm_dst *)xfrm_dst_child(&xdst->u.dst);
269 } while (xdst->u.dst.xfrm); 269 } while (xdst->u.dst.xfrm);
270 270
271 __in6_dev_put(loopback_idev); 271 __in6_dev_put(loopback_idev);
diff --git a/net/ipx/Kconfig b/net/ipx/Kconfig
deleted file mode 100644
index e9ad0062fbb6..000000000000
--- a/net/ipx/Kconfig
+++ /dev/null
@@ -1,60 +0,0 @@
1#
2# IPX configuration
3#
4config IPX
5 tristate "The IPX protocol"
6 select LLC
7 ---help---
8 This is support for the Novell networking protocol, IPX, commonly
9 used for local networks of Windows machines. You need it if you
10 want to access Novell NetWare file or print servers using the Linux
11 Novell client ncpfs (available from
12 <ftp://platan.vc.cvut.cz/pub/linux/ncpfs/>) or from
13 within the Linux DOS emulator DOSEMU (read the DOSEMU-HOWTO,
14 available from <http://www.tldp.org/docs.html#howto>). In order
15 to do the former, you'll also have to say Y to "NCP file system
16 support", below.
17
18 IPX is similar in scope to IP, while SPX, which runs on top of IPX,
19 is similar to TCP.
20
21 To turn your Linux box into a fully featured NetWare file server and
22 IPX router, say Y here and fetch either lwared from
23 <ftp://ibiblio.org/pub/Linux/system/network/daemons/> or
24 mars_nwe from <ftp://www.compu-art.de/mars_nwe/>. For more
25 information, read the IPX-HOWTO available from
26 <http://www.tldp.org/docs.html#howto>.
27
28 The IPX driver would enlarge your kernel by about 16 KB. To compile
29 this driver as a module, choose M here: the module will be called ipx.
30 Unless you want to integrate your Linux box with a local Novell
31 network, say N.
32
33config IPX_INTERN
34 bool "IPX: Full internal IPX network"
35 depends on IPX
36 ---help---
37 Every IPX network has an address that identifies it. Sometimes it is
38 useful to give an IPX "network" address to your Linux box as well
39 (for example if your box is acting as a file server for different
40 IPX networks: it will then be accessible from everywhere using the
41 same address). The way this is done is to create a virtual internal
42 "network" inside your box and to assign an IPX address to this
43 network. Say Y here if you want to do this; read the IPX-HOWTO at
44 <http://www.tldp.org/docs.html#howto> for details.
45
46 The full internal IPX network enables you to allocate sockets on
47 different virtual nodes of the internal network. This is done by
48 evaluating the field sipx_node of the socket address given to the
49 bind call. So applications should always initialize the node field
50 to 0 when binding a socket on the primary network. In this case the
51 socket is assigned the default node that has been given to the
52 kernel when the internal network was created. By enabling the full
53 internal IPX network the cross-forwarding of packets targeted at
54 'special' sockets to sockets listening on the primary network is
55 disabled. This might break existing applications, especially RIP/SAP
56 daemons. A RIP/SAP daemon that works well with the full internal net
57 can be found on <ftp://ftp.gwdg.de/pub/linux/misc/ncpfs/>.
58
59 If you don't know what you are doing, say N.
60
diff --git a/net/ipx/Makefile b/net/ipx/Makefile
deleted file mode 100644
index 440fafa9fd07..000000000000
--- a/net/ipx/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
1#
2# Makefile for the Linux IPX layer.
3#
4
5obj-$(CONFIG_IPX) += ipx.o
6
7ipx-y := af_ipx.o ipx_route.o ipx_proc.o pe2.o
8ipx-$(CONFIG_SYSCTL) += sysctl_net_ipx.o
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
deleted file mode 100644
index d21a9d128d3e..000000000000
--- a/net/ipx/af_ipx.c
+++ /dev/null
@@ -1,2084 +0,0 @@
1/*
2 * Implements an IPX socket layer.
3 *
4 * This code is derived from work by
5 * Ross Biro : Writing the original IP stack
6 * Fred Van Kempen : Tidying up the TCP/IP
7 *
8 * Many thanks go to Keith Baker, Institute For Industrial Information
9 * Technology Ltd, Swansea University for allowing me to work on this
10 * in my own time even though it was in some ways related to commercial
11 * work I am currently employed to do there.
12 *
13 * All the material in this file is subject to the Gnu license version 2.
14 * Neither Alan Cox nor the Swansea University Computer Society admit
15 * liability nor provide warranty for any of this software. This material
16 * is provided as is and at no charge.
17 *
18 * Portions Copyright (c) 2000-2003 Conectiva, Inc. <acme@conectiva.com.br>
19 * Neither Arnaldo Carvalho de Melo nor Conectiva, Inc. admit liability nor
20 * provide warranty for any of this software. This material is provided
21 * "AS-IS" and at no charge.
22 *
23 * Portions Copyright (c) 1995 Caldera, Inc. <greg@caldera.com>
24 * Neither Greg Page nor Caldera, Inc. admit liability nor provide
25 * warranty for any of this software. This material is provided
26 * "AS-IS" and at no charge.
27 *
28 * See net/ipx/ChangeLog.
29 */
30
31#include <linux/capability.h>
32#include <linux/errno.h>
33#include <linux/if_arp.h>
34#include <linux/if_ether.h>
35#include <linux/init.h>
36#include <linux/ipx.h>
37#include <linux/kernel.h>
38#include <linux/list.h>
39#include <linux/module.h>
40#include <linux/net.h>
41#include <linux/netdevice.h>
42#include <linux/uio.h>
43#include <linux/slab.h>
44#include <linux/skbuff.h>
45#include <linux/socket.h>
46#include <linux/sockios.h>
47#include <linux/string.h>
48#include <linux/types.h>
49#include <linux/termios.h>
50
51#include <net/ipx.h>
52#include <net/p8022.h>
53#include <net/psnap.h>
54#include <net/sock.h>
55#include <net/datalink.h>
56#include <net/tcp_states.h>
57#include <net/net_namespace.h>
58
59#include <linux/uaccess.h>
60
61/* Configuration Variables */
62static unsigned char ipxcfg_max_hops = 16;
63static char ipxcfg_auto_select_primary;
64static char ipxcfg_auto_create_interfaces;
65int sysctl_ipx_pprop_broadcasting = 1;
66
67/* Global Variables */
68static struct datalink_proto *p8022_datalink;
69static struct datalink_proto *pEII_datalink;
70static struct datalink_proto *p8023_datalink;
71static struct datalink_proto *pSNAP_datalink;
72
73static const struct proto_ops ipx_dgram_ops;
74
75LIST_HEAD(ipx_interfaces);
76DEFINE_SPINLOCK(ipx_interfaces_lock);
77
78struct ipx_interface *ipx_primary_net;
79struct ipx_interface *ipx_internal_net;
80
81struct ipx_interface *ipx_interfaces_head(void)
82{
83 struct ipx_interface *rc = NULL;
84
85 if (!list_empty(&ipx_interfaces))
86 rc = list_entry(ipx_interfaces.next,
87 struct ipx_interface, node);
88 return rc;
89}
90
91static void ipxcfg_set_auto_select(char val)
92{
93 ipxcfg_auto_select_primary = val;
94 if (val && !ipx_primary_net)
95 ipx_primary_net = ipx_interfaces_head();
96}
97
98static int ipxcfg_get_config_data(struct ipx_config_data __user *arg)
99{
100 struct ipx_config_data vals;
101
102 vals.ipxcfg_auto_create_interfaces = ipxcfg_auto_create_interfaces;
103 vals.ipxcfg_auto_select_primary = ipxcfg_auto_select_primary;
104
105 return copy_to_user(arg, &vals, sizeof(vals)) ? -EFAULT : 0;
106}
107
108/*
109 * Note: Sockets may not be removed _during_ an interrupt or inet_bh
110 * handler using this technique. They can be added although we do not
111 * use this facility.
112 */
113
114static void ipx_remove_socket(struct sock *sk)
115{
116 /* Determine interface with which socket is associated */
117 struct ipx_interface *intrfc = ipx_sk(sk)->intrfc;
118
119 if (!intrfc)
120 goto out;
121
122 ipxitf_hold(intrfc);
123 spin_lock_bh(&intrfc->if_sklist_lock);
124 sk_del_node_init(sk);
125 spin_unlock_bh(&intrfc->if_sklist_lock);
126 ipxitf_put(intrfc);
127out:
128 return;
129}
130
131static void ipx_destroy_socket(struct sock *sk)
132{
133 ipx_remove_socket(sk);
134 skb_queue_purge(&sk->sk_receive_queue);
135 sk_refcnt_debug_dec(sk);
136}
137
138/*
139 * The following code is used to support IPX Interfaces (IPXITF). An
140 * IPX interface is defined by a physical device and a frame type.
141 */
142
143/* ipxitf_clear_primary_net has to be called with ipx_interfaces_lock held */
144
145static void ipxitf_clear_primary_net(void)
146{
147 ipx_primary_net = NULL;
148 if (ipxcfg_auto_select_primary)
149 ipx_primary_net = ipx_interfaces_head();
150}
151
152static struct ipx_interface *__ipxitf_find_using_phys(struct net_device *dev,
153 __be16 datalink)
154{
155 struct ipx_interface *i;
156
157 list_for_each_entry(i, &ipx_interfaces, node)
158 if (i->if_dev == dev && i->if_dlink_type == datalink)
159 goto out;
160 i = NULL;
161out:
162 return i;
163}
164
165static struct ipx_interface *ipxitf_find_using_phys(struct net_device *dev,
166 __be16 datalink)
167{
168 struct ipx_interface *i;
169
170 spin_lock_bh(&ipx_interfaces_lock);
171 i = __ipxitf_find_using_phys(dev, datalink);
172 if (i)
173 ipxitf_hold(i);
174 spin_unlock_bh(&ipx_interfaces_lock);
175 return i;
176}
177
178struct ipx_interface *ipxitf_find_using_net(__be32 net)
179{
180 struct ipx_interface *i;
181
182 spin_lock_bh(&ipx_interfaces_lock);
183 if (net) {
184 list_for_each_entry(i, &ipx_interfaces, node)
185 if (i->if_netnum == net)
186 goto hold;
187 i = NULL;
188 goto unlock;
189 }
190
191 i = ipx_primary_net;
192 if (i)
193hold:
194 ipxitf_hold(i);
195unlock:
196 spin_unlock_bh(&ipx_interfaces_lock);
197 return i;
198}
199
200/* Sockets are bound to a particular IPX interface. */
201static void ipxitf_insert_socket(struct ipx_interface *intrfc, struct sock *sk)
202{
203 ipxitf_hold(intrfc);
204 spin_lock_bh(&intrfc->if_sklist_lock);
205 ipx_sk(sk)->intrfc = intrfc;
206 sk_add_node(sk, &intrfc->if_sklist);
207 spin_unlock_bh(&intrfc->if_sklist_lock);
208 ipxitf_put(intrfc);
209}
210
211/* caller must hold intrfc->if_sklist_lock */
212static struct sock *__ipxitf_find_socket(struct ipx_interface *intrfc,
213 __be16 port)
214{
215 struct sock *s;
216
217 sk_for_each(s, &intrfc->if_sklist)
218 if (ipx_sk(s)->port == port)
219 goto found;
220 s = NULL;
221found:
222 return s;
223}
224
225/* caller must hold a reference to intrfc */
226static struct sock *ipxitf_find_socket(struct ipx_interface *intrfc,
227 __be16 port)
228{
229 struct sock *s;
230
231 spin_lock_bh(&intrfc->if_sklist_lock);
232 s = __ipxitf_find_socket(intrfc, port);
233 if (s)
234 sock_hold(s);
235 spin_unlock_bh(&intrfc->if_sklist_lock);
236
237 return s;
238}
239
240#ifdef CONFIG_IPX_INTERN
241static struct sock *ipxitf_find_internal_socket(struct ipx_interface *intrfc,
242 unsigned char *ipx_node,
243 __be16 port)
244{
245 struct sock *s;
246
247 ipxitf_hold(intrfc);
248 spin_lock_bh(&intrfc->if_sklist_lock);
249
250 sk_for_each(s, &intrfc->if_sklist) {
251 struct ipx_sock *ipxs = ipx_sk(s);
252
253 if (ipxs->port == port &&
254 !memcmp(ipx_node, ipxs->node, IPX_NODE_LEN))
255 goto found;
256 }
257 s = NULL;
258found:
259 spin_unlock_bh(&intrfc->if_sklist_lock);
260 ipxitf_put(intrfc);
261 return s;
262}
263#endif
264
265static void __ipxitf_down(struct ipx_interface *intrfc)
266{
267 struct sock *s;
268 struct hlist_node *t;
269
270 /* Delete all routes associated with this interface */
271 ipxrtr_del_routes(intrfc);
272
273 spin_lock_bh(&intrfc->if_sklist_lock);
274 /* error sockets */
275 sk_for_each_safe(s, t, &intrfc->if_sklist) {
276 struct ipx_sock *ipxs = ipx_sk(s);
277
278 s->sk_err = ENOLINK;
279 s->sk_error_report(s);
280 ipxs->intrfc = NULL;
281 ipxs->port = 0;
282 sock_set_flag(s, SOCK_ZAPPED); /* Indicates it is no longer bound */
283 sk_del_node_init(s);
284 }
285 INIT_HLIST_HEAD(&intrfc->if_sklist);
286 spin_unlock_bh(&intrfc->if_sklist_lock);
287
288 /* remove this interface from list */
289 list_del(&intrfc->node);
290
291 /* remove this interface from *special* networks */
292 if (intrfc == ipx_primary_net)
293 ipxitf_clear_primary_net();
294 if (intrfc == ipx_internal_net)
295 ipx_internal_net = NULL;
296
297 if (intrfc->if_dev)
298 dev_put(intrfc->if_dev);
299 kfree(intrfc);
300}
301
302void ipxitf_down(struct ipx_interface *intrfc)
303{
304 spin_lock_bh(&ipx_interfaces_lock);
305 __ipxitf_down(intrfc);
306 spin_unlock_bh(&ipx_interfaces_lock);
307}
308
309static void __ipxitf_put(struct ipx_interface *intrfc)
310{
311 if (refcount_dec_and_test(&intrfc->refcnt))
312 __ipxitf_down(intrfc);
313}
314
315static int ipxitf_device_event(struct notifier_block *notifier,
316 unsigned long event, void *ptr)
317{
318 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
319 struct ipx_interface *i, *tmp;
320
321 if (!net_eq(dev_net(dev), &init_net))
322 return NOTIFY_DONE;
323
324 if (event != NETDEV_DOWN && event != NETDEV_UP)
325 goto out;
326
327 spin_lock_bh(&ipx_interfaces_lock);
328 list_for_each_entry_safe(i, tmp, &ipx_interfaces, node)
329 if (i->if_dev == dev) {
330 if (event == NETDEV_UP)
331 ipxitf_hold(i);
332 else
333 __ipxitf_put(i);
334 }
335 spin_unlock_bh(&ipx_interfaces_lock);
336out:
337 return NOTIFY_DONE;
338}
339
340
341static __exit void ipxitf_cleanup(void)
342{
343 struct ipx_interface *i, *tmp;
344
345 spin_lock_bh(&ipx_interfaces_lock);
346 list_for_each_entry_safe(i, tmp, &ipx_interfaces, node)
347 __ipxitf_put(i);
348 spin_unlock_bh(&ipx_interfaces_lock);
349}
350
351static void ipxitf_def_skb_handler(struct sock *sock, struct sk_buff *skb)
352{
353 if (sock_queue_rcv_skb(sock, skb) < 0)
354 kfree_skb(skb);
355}
356
357/*
358 * On input skb->sk is NULL. Nobody is charged for the memory.
359 */
360
361/* caller must hold a reference to intrfc */
362
363#ifdef CONFIG_IPX_INTERN
364static int ipxitf_demux_socket(struct ipx_interface *intrfc,
365 struct sk_buff *skb, int copy)
366{
367 struct ipxhdr *ipx = ipx_hdr(skb);
368 int is_broadcast = !memcmp(ipx->ipx_dest.node, ipx_broadcast_node,
369 IPX_NODE_LEN);
370 struct sock *s;
371 int rc;
372
373 spin_lock_bh(&intrfc->if_sklist_lock);
374
375 sk_for_each(s, &intrfc->if_sklist) {
376 struct ipx_sock *ipxs = ipx_sk(s);
377
378 if (ipxs->port == ipx->ipx_dest.sock &&
379 (is_broadcast || !memcmp(ipx->ipx_dest.node,
380 ipxs->node, IPX_NODE_LEN))) {
381 /* We found a socket to which to send */
382 struct sk_buff *skb1;
383
384 if (copy) {
385 skb1 = skb_clone(skb, GFP_ATOMIC);
386 rc = -ENOMEM;
387 if (!skb1)
388 goto out;
389 } else {
390 skb1 = skb;
391 copy = 1; /* skb may only be used once */
392 }
393 ipxitf_def_skb_handler(s, skb1);
394
395 /* On an external interface, one socket can listen */
396 if (intrfc != ipx_internal_net)
397 break;
398 }
399 }
400
401 /* skb was solely for us, and we did not make a copy, so free it. */
402 if (!copy)
403 kfree_skb(skb);
404
405 rc = 0;
406out:
407 spin_unlock_bh(&intrfc->if_sklist_lock);
408 return rc;
409}
410#else
411static struct sock *ncp_connection_hack(struct ipx_interface *intrfc,
412 struct ipxhdr *ipx)
413{
414 /* The packet's target is a NCP connection handler. We want to hand it
415 * to the correct socket directly within the kernel, so that the
416 * mars_nwe packet distribution process does not have to do it. Here we
417 * only care about NCP and BURST packets.
418 *
419 * You might call this a hack, but believe me, you do not want a
420 * complete NCP layer in the kernel, and this is VERY fast as well. */
421 struct sock *sk = NULL;
422 int connection = 0;
423 u8 *ncphdr = (u8 *)(ipx + 1);
424
425 if (*ncphdr == 0x22 && *(ncphdr + 1) == 0x22) /* NCP request */
426 connection = (((int) *(ncphdr + 5)) << 8) | (int) *(ncphdr + 3);
427 else if (*ncphdr == 0x77 && *(ncphdr + 1) == 0x77) /* BURST packet */
428 connection = (((int) *(ncphdr + 9)) << 8) | (int) *(ncphdr + 8);
429
430 if (connection) {
431 /* Now we have to look for a special NCP connection handling
432 * socket. Only these sockets have ipx_ncp_conn != 0, set by
433 * SIOCIPXNCPCONN. */
434 spin_lock_bh(&intrfc->if_sklist_lock);
435 sk_for_each(sk, &intrfc->if_sklist)
436 if (ipx_sk(sk)->ipx_ncp_conn == connection) {
437 sock_hold(sk);
438 goto found;
439 }
440 sk = NULL;
441 found:
442 spin_unlock_bh(&intrfc->if_sklist_lock);
443 }
444 return sk;
445}
446
447static int ipxitf_demux_socket(struct ipx_interface *intrfc,
448 struct sk_buff *skb, int copy)
449{
450 struct ipxhdr *ipx = ipx_hdr(skb);
451 struct sock *sock1 = NULL, *sock2 = NULL;
452 struct sk_buff *skb1 = NULL, *skb2 = NULL;
453 int rc;
454
455 if (intrfc == ipx_primary_net && ntohs(ipx->ipx_dest.sock) == 0x451)
456 sock1 = ncp_connection_hack(intrfc, ipx);
457 if (!sock1)
458 /* No special socket found, forward the packet the normal way */
459 sock1 = ipxitf_find_socket(intrfc, ipx->ipx_dest.sock);
460
461 /*
462 * We need to check if there is a primary net and if
463 * this is addressed to one of the *SPECIAL* sockets because
464 * these need to be propagated to the primary net.
465 * The *SPECIAL* socket list contains: 0x452(SAP), 0x453(RIP) and
466 * 0x456(Diagnostic).
467 */
468
469 if (ipx_primary_net && intrfc != ipx_primary_net) {
470 const int dsock = ntohs(ipx->ipx_dest.sock);
471
472 if (dsock == 0x452 || dsock == 0x453 || dsock == 0x456)
473 /* The appropriate thing to do here is to dup the
474 * packet and route to the primary net interface via
475 * ipxitf_send; however, we'll cheat and just demux it
476 * here. */
477 sock2 = ipxitf_find_socket(ipx_primary_net,
478 ipx->ipx_dest.sock);
479 }
480
481 /*
482 * If there is nothing to do return. The kfree will cancel any charging.
483 */
484 rc = 0;
485 if (!sock1 && !sock2) {
486 if (!copy)
487 kfree_skb(skb);
488 goto out;
489 }
490
491 /*
492 * This next segment of code is a little awkward, but it sets it up
493 * so that the appropriate number of copies of the SKB are made and
494 * that skb1 and skb2 point to it (them) so that it (they) can be
495 * demuxed to sock1 and/or sock2. If we are unable to make enough
496 * copies, we do as much as is possible.
497 */
498
499 if (copy)
500 skb1 = skb_clone(skb, GFP_ATOMIC);
501 else
502 skb1 = skb;
503
504 rc = -ENOMEM;
505 if (!skb1)
506 goto out_put;
507
508 /* Do we need 2 SKBs? */
509 if (sock1 && sock2)
510 skb2 = skb_clone(skb1, GFP_ATOMIC);
511 else
512 skb2 = skb1;
513
514 if (sock1)
515 ipxitf_def_skb_handler(sock1, skb1);
516
517 if (!skb2)
518 goto out_put;
519
520 if (sock2)
521 ipxitf_def_skb_handler(sock2, skb2);
522
523 rc = 0;
524out_put:
525 if (sock1)
526 sock_put(sock1);
527 if (sock2)
528 sock_put(sock2);
529out:
530 return rc;
531}
532#endif /* CONFIG_IPX_INTERN */
533
534static struct sk_buff *ipxitf_adjust_skbuff(struct ipx_interface *intrfc,
535 struct sk_buff *skb)
536{
537 struct sk_buff *skb2;
538 int in_offset = (unsigned char *)ipx_hdr(skb) - skb->head;
539 int out_offset = intrfc->if_ipx_offset;
540 int len;
541
542 /* Hopefully, most cases */
543 if (in_offset >= out_offset)
544 return skb;
545
546 /* Need new SKB */
547 len = skb->len + out_offset;
548 skb2 = alloc_skb(len, GFP_ATOMIC);
549 if (skb2) {
550 skb_reserve(skb2, out_offset);
551 skb_reset_network_header(skb2);
552 skb_reset_transport_header(skb2);
553 skb_put(skb2, skb->len);
554 memcpy(ipx_hdr(skb2), ipx_hdr(skb), skb->len);
555 memcpy(skb2->cb, skb->cb, sizeof(skb->cb));
556 }
557 kfree_skb(skb);
558 return skb2;
559}
560
561/* caller must hold a reference to intrfc and the skb has to be unshared */
562int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb, char *node)
563{
564 struct ipxhdr *ipx = ipx_hdr(skb);
565 struct net_device *dev = intrfc->if_dev;
566 struct datalink_proto *dl = intrfc->if_dlink;
567 char dest_node[IPX_NODE_LEN];
568 int send_to_wire = 1;
569 int addr_len;
570
571 ipx->ipx_tctrl = IPX_SKB_CB(skb)->ipx_tctrl;
572 ipx->ipx_dest.net = IPX_SKB_CB(skb)->ipx_dest_net;
573 ipx->ipx_source.net = IPX_SKB_CB(skb)->ipx_source_net;
574
575 /* see if we need to include the netnum in the route list */
576 if (IPX_SKB_CB(skb)->last_hop.index >= 0) {
577 __be32 *last_hop = (__be32 *)(((u8 *) skb->data) +
578 sizeof(struct ipxhdr) +
579 IPX_SKB_CB(skb)->last_hop.index *
580 sizeof(__be32));
581 *last_hop = IPX_SKB_CB(skb)->last_hop.netnum;
582 IPX_SKB_CB(skb)->last_hop.index = -1;
583 }
584
585 /*
586 * We need to know how many skbuffs it will take to send out this
587 * packet to avoid unnecessary copies.
588 */
589
590 if (!dl || !dev || dev->flags & IFF_LOOPBACK)
591 send_to_wire = 0; /* No non looped */
592
593 /*
594 * See if this should be demuxed to sockets on this interface
595 *
596 * We want to ensure the original was eaten or that we only use
597 * up clones.
598 */
599
600 if (ipx->ipx_dest.net == intrfc->if_netnum) {
601 /*
602 * To our own node, loop and free the original.
603 * The internal net will receive on all node address.
604 */
605 if (intrfc == ipx_internal_net ||
606 !memcmp(intrfc->if_node, node, IPX_NODE_LEN)) {
607 /* Don't charge sender */
608 skb_orphan(skb);
609
610 /* Will charge receiver */
611 return ipxitf_demux_socket(intrfc, skb, 0);
612 }
613
614 /* Broadcast, loop and possibly keep to send on. */
615 if (!memcmp(ipx_broadcast_node, node, IPX_NODE_LEN)) {
616 if (!send_to_wire)
617 skb_orphan(skb);
618 ipxitf_demux_socket(intrfc, skb, send_to_wire);
619 if (!send_to_wire)
620 goto out;
621 }
622 }
623
624 /*
625 * If the originating net is not equal to our net; this is routed
626 * We are still charging the sender. Which is right - the driver
627 * free will handle this fairly.
628 */
629 if (ipx->ipx_source.net != intrfc->if_netnum) {
630 /*
631 * Unshare the buffer before modifying the count in
632 * case it's a flood or tcpdump
633 */
634 skb = skb_unshare(skb, GFP_ATOMIC);
635 if (!skb)
636 goto out;
637 if (++ipx->ipx_tctrl > ipxcfg_max_hops)
638 send_to_wire = 0;
639 }
640
641 if (!send_to_wire) {
642 kfree_skb(skb);
643 goto out;
644 }
645
646 /* Determine the appropriate hardware address */
647 addr_len = dev->addr_len;
648 if (!memcmp(ipx_broadcast_node, node, IPX_NODE_LEN))
649 memcpy(dest_node, dev->broadcast, addr_len);
650 else
651 memcpy(dest_node, &(node[IPX_NODE_LEN-addr_len]), addr_len);
652
653 /* Make any compensation for differing physical/data link size */
654 skb = ipxitf_adjust_skbuff(intrfc, skb);
655 if (!skb)
656 goto out;
657
658 /* set up data link and physical headers */
659 skb->dev = dev;
660 skb->protocol = htons(ETH_P_IPX);
661
662 /* Send it out */
663 dl->request(dl, skb, dest_node);
664out:
665 return 0;
666}
667
668static int ipxitf_add_local_route(struct ipx_interface *intrfc)
669{
670 return ipxrtr_add_route(intrfc->if_netnum, intrfc, NULL);
671}
672
673static void ipxitf_discover_netnum(struct ipx_interface *intrfc,
674 struct sk_buff *skb);
675static int ipxitf_pprop(struct ipx_interface *intrfc, struct sk_buff *skb);
676
677static int ipxitf_rcv(struct ipx_interface *intrfc, struct sk_buff *skb)
678{
679 struct ipxhdr *ipx = ipx_hdr(skb);
680 int rc = 0;
681
682 ipxitf_hold(intrfc);
683
684 /* See if we should update our network number */
685 if (!intrfc->if_netnum) /* net number of intrfc not known yet */
686 ipxitf_discover_netnum(intrfc, skb);
687
688 IPX_SKB_CB(skb)->last_hop.index = -1;
689 if (ipx->ipx_type == IPX_TYPE_PPROP) {
690 rc = ipxitf_pprop(intrfc, skb);
691 if (rc)
692 goto out_free_skb;
693 }
694
695 /* local processing follows */
696 if (!IPX_SKB_CB(skb)->ipx_dest_net)
697 IPX_SKB_CB(skb)->ipx_dest_net = intrfc->if_netnum;
698 if (!IPX_SKB_CB(skb)->ipx_source_net)
699 IPX_SKB_CB(skb)->ipx_source_net = intrfc->if_netnum;
700
701 /* it doesn't make sense to route a pprop packet, there's no meaning
702 * in the ipx_dest_net for such packets */
703 if (ipx->ipx_type != IPX_TYPE_PPROP &&
704 intrfc->if_netnum != IPX_SKB_CB(skb)->ipx_dest_net) {
705 /* We only route point-to-point packets. */
706 if (skb->pkt_type == PACKET_HOST) {
707 skb = skb_unshare(skb, GFP_ATOMIC);
708 if (skb)
709 rc = ipxrtr_route_skb(skb);
710 goto out_intrfc;
711 }
712
713 goto out_free_skb;
714 }
715
716 /* see if we should keep it */
717 if (!memcmp(ipx_broadcast_node, ipx->ipx_dest.node, IPX_NODE_LEN) ||
718 !memcmp(intrfc->if_node, ipx->ipx_dest.node, IPX_NODE_LEN)) {
719 rc = ipxitf_demux_socket(intrfc, skb, 0);
720 goto out_intrfc;
721 }
722
723 /* we couldn't pawn it off so unload it */
724out_free_skb:
725 kfree_skb(skb);
726out_intrfc:
727 ipxitf_put(intrfc);
728 return rc;
729}
730
731static void ipxitf_discover_netnum(struct ipx_interface *intrfc,
732 struct sk_buff *skb)
733{
734 const struct ipx_cb *cb = IPX_SKB_CB(skb);
735
736 /* see if this is an intra packet: source_net == dest_net */
737 if (cb->ipx_source_net == cb->ipx_dest_net && cb->ipx_source_net) {
738 struct ipx_interface *i =
739 ipxitf_find_using_net(cb->ipx_source_net);
740 /* NB: NetWare servers lie about their hop count so we
741 * dropped the test based on it. This is the best way
742 * to determine this is a 0 hop count packet. */
743 if (!i) {
744 intrfc->if_netnum = cb->ipx_source_net;
745 ipxitf_add_local_route(intrfc);
746 } else {
747 printk(KERN_WARNING "IPX: Network number collision "
748 "%lx\n %s %s and %s %s\n",
749 (unsigned long) ntohl(cb->ipx_source_net),
750 ipx_device_name(i),
751 ipx_frame_name(i->if_dlink_type),
752 ipx_device_name(intrfc),
753 ipx_frame_name(intrfc->if_dlink_type));
754 ipxitf_put(i);
755 }
756 }
757}
758
759/**
760 * ipxitf_pprop - Process packet propagation IPX packet type 0x14, used for
761 * NetBIOS broadcasts
762 * @intrfc: IPX interface receiving this packet
763 * @skb: Received packet
764 *
765 * Checks if packet is valid: if its more than %IPX_MAX_PPROP_HOPS hops or if it
766 * is smaller than a IPX header + the room for %IPX_MAX_PPROP_HOPS hops we drop
767 * it, not even processing it locally, if it has exact %IPX_MAX_PPROP_HOPS we
768 * don't broadcast it, but process it locally. See chapter 5 of Novell's "IPX
769 * RIP and SAP Router Specification", Part Number 107-000029-001.
770 *
771 * If it is valid, check if we have pprop broadcasting enabled by the user,
772 * if not, just return zero for local processing.
773 *
774 * If it is enabled check the packet and don't broadcast it if we have already
775 * seen this packet.
776 *
777 * Broadcast: send it to the interfaces that aren't on the packet visited nets
778 * array, just after the IPX header.
779 *
780 * Returns -EINVAL for invalid packets, so that the calling function drops
781 * the packet without local processing. 0 if packet is to be locally processed.
782 */
783static int ipxitf_pprop(struct ipx_interface *intrfc, struct sk_buff *skb)
784{
785 struct ipxhdr *ipx = ipx_hdr(skb);
786 int i, rc = -EINVAL;
787 struct ipx_interface *ifcs;
788 char *c;
789 __be32 *l;
790
791 /* Illegal packet - too many hops or too short */
792 /* We decide to throw it away: no broadcasting, no local processing.
793 * NetBIOS unaware implementations route them as normal packets -
794 * tctrl <= 15, any data payload... */
795 if (IPX_SKB_CB(skb)->ipx_tctrl > IPX_MAX_PPROP_HOPS ||
796 ntohs(ipx->ipx_pktsize) < sizeof(struct ipxhdr) +
797 IPX_MAX_PPROP_HOPS * sizeof(u32))
798 goto out;
799 /* are we broadcasting this damn thing? */
800 rc = 0;
801 if (!sysctl_ipx_pprop_broadcasting)
802 goto out;
803 /* We do broadcast packet on the IPX_MAX_PPROP_HOPS hop, but we
804 * process it locally. All previous hops broadcasted it, and process it
805 * locally. */
806 if (IPX_SKB_CB(skb)->ipx_tctrl == IPX_MAX_PPROP_HOPS)
807 goto out;
808
809 c = ((u8 *) ipx) + sizeof(struct ipxhdr);
810 l = (__be32 *) c;
811
812 /* Don't broadcast packet if already seen this net */
813 for (i = 0; i < IPX_SKB_CB(skb)->ipx_tctrl; i++)
814 if (*l++ == intrfc->if_netnum)
815 goto out;
816
817 /* < IPX_MAX_PPROP_HOPS hops && input interface not in list. Save the
818 * position where we will insert recvd netnum into list, later on,
819 * in ipxitf_send */
820 IPX_SKB_CB(skb)->last_hop.index = i;
821 IPX_SKB_CB(skb)->last_hop.netnum = intrfc->if_netnum;
822 /* xmit on all other interfaces... */
823 spin_lock_bh(&ipx_interfaces_lock);
824 list_for_each_entry(ifcs, &ipx_interfaces, node) {
825 /* Except unconfigured interfaces */
826 if (!ifcs->if_netnum)
827 continue;
828
829 /* That aren't in the list */
830 if (ifcs == intrfc)
831 continue;
832 l = (__be32 *) c;
833 /* don't consider the last entry in the packet list,
834 * it is our netnum, and it is not there yet */
835 for (i = 0; i < IPX_SKB_CB(skb)->ipx_tctrl; i++)
836 if (ifcs->if_netnum == *l++)
837 break;
838 if (i == IPX_SKB_CB(skb)->ipx_tctrl) {
839 struct sk_buff *s = skb_copy(skb, GFP_ATOMIC);
840
841 if (s) {
842 IPX_SKB_CB(s)->ipx_dest_net = ifcs->if_netnum;
843 ipxrtr_route_skb(s);
844 }
845 }
846 }
847 spin_unlock_bh(&ipx_interfaces_lock);
848out:
849 return rc;
850}
851
852static void ipxitf_insert(struct ipx_interface *intrfc)
853{
854 spin_lock_bh(&ipx_interfaces_lock);
855 list_add_tail(&intrfc->node, &ipx_interfaces);
856 spin_unlock_bh(&ipx_interfaces_lock);
857
858 if (ipxcfg_auto_select_primary && !ipx_primary_net)
859 ipx_primary_net = intrfc;
860}
861
862static struct ipx_interface *ipxitf_alloc(struct net_device *dev, __be32 netnum,
863 __be16 dlink_type,
864 struct datalink_proto *dlink,
865 unsigned char internal,
866 int ipx_offset)
867{
868 struct ipx_interface *intrfc = kmalloc(sizeof(*intrfc), GFP_ATOMIC);
869
870 if (intrfc) {
871 intrfc->if_dev = dev;
872 intrfc->if_netnum = netnum;
873 intrfc->if_dlink_type = dlink_type;
874 intrfc->if_dlink = dlink;
875 intrfc->if_internal = internal;
876 intrfc->if_ipx_offset = ipx_offset;
877 intrfc->if_sknum = IPX_MIN_EPHEMERAL_SOCKET;
878 INIT_HLIST_HEAD(&intrfc->if_sklist);
879 refcount_set(&intrfc->refcnt, 1);
880 spin_lock_init(&intrfc->if_sklist_lock);
881 }
882
883 return intrfc;
884}
885
886static int ipxitf_create_internal(struct ipx_interface_definition *idef)
887{
888 struct ipx_interface *intrfc;
889 int rc = -EEXIST;
890
891 /* Only one primary network allowed */
892 if (ipx_primary_net)
893 goto out;
894
895 /* Must have a valid network number */
896 rc = -EADDRNOTAVAIL;
897 if (!idef->ipx_network)
898 goto out;
899 intrfc = ipxitf_find_using_net(idef->ipx_network);
900 rc = -EADDRINUSE;
901 if (intrfc) {
902 ipxitf_put(intrfc);
903 goto out;
904 }
905 intrfc = ipxitf_alloc(NULL, idef->ipx_network, 0, NULL, 1, 0);
906 rc = -EAGAIN;
907 if (!intrfc)
908 goto out;
909 memcpy((char *)&(intrfc->if_node), idef->ipx_node, IPX_NODE_LEN);
910 ipx_internal_net = ipx_primary_net = intrfc;
911 ipxitf_hold(intrfc);
912 ipxitf_insert(intrfc);
913
914 rc = ipxitf_add_local_route(intrfc);
915 ipxitf_put(intrfc);
916out:
917 return rc;
918}
919
920static __be16 ipx_map_frame_type(unsigned char type)
921{
922 __be16 rc = 0;
923
924 switch (type) {
925 case IPX_FRAME_ETHERII: rc = htons(ETH_P_IPX); break;
926 case IPX_FRAME_8022: rc = htons(ETH_P_802_2); break;
927 case IPX_FRAME_SNAP: rc = htons(ETH_P_SNAP); break;
928 case IPX_FRAME_8023: rc = htons(ETH_P_802_3); break;
929 }
930
931 return rc;
932}
933
934static int ipxitf_create(struct ipx_interface_definition *idef)
935{
936 struct net_device *dev;
937 __be16 dlink_type = 0;
938 struct datalink_proto *datalink = NULL;
939 struct ipx_interface *intrfc;
940 int rc;
941
942 if (idef->ipx_special == IPX_INTERNAL) {
943 rc = ipxitf_create_internal(idef);
944 goto out;
945 }
946
947 rc = -EEXIST;
948 if (idef->ipx_special == IPX_PRIMARY && ipx_primary_net)
949 goto out;
950
951 intrfc = ipxitf_find_using_net(idef->ipx_network);
952 rc = -EADDRINUSE;
953 if (idef->ipx_network && intrfc) {
954 ipxitf_put(intrfc);
955 goto out;
956 }
957
958 if (intrfc)
959 ipxitf_put(intrfc);
960
961 dev = dev_get_by_name(&init_net, idef->ipx_device);
962 rc = -ENODEV;
963 if (!dev)
964 goto out;
965
966 switch (idef->ipx_dlink_type) {
967 case IPX_FRAME_8022:
968 dlink_type = htons(ETH_P_802_2);
969 datalink = p8022_datalink;
970 break;
971 case IPX_FRAME_ETHERII:
972 if (dev->type != ARPHRD_IEEE802) {
973 dlink_type = htons(ETH_P_IPX);
974 datalink = pEII_datalink;
975 break;
976 }
977 /* fall through */
978 case IPX_FRAME_SNAP:
979 dlink_type = htons(ETH_P_SNAP);
980 datalink = pSNAP_datalink;
981 break;
982 case IPX_FRAME_8023:
983 dlink_type = htons(ETH_P_802_3);
984 datalink = p8023_datalink;
985 break;
986 case IPX_FRAME_NONE:
987 default:
988 rc = -EPROTONOSUPPORT;
989 goto out_dev;
990 }
991
992 rc = -ENETDOWN;
993 if (!(dev->flags & IFF_UP))
994 goto out_dev;
995
996 /* Check addresses are suitable */
997 rc = -EINVAL;
998 if (dev->addr_len > IPX_NODE_LEN)
999 goto out_dev;
1000
1001 intrfc = ipxitf_find_using_phys(dev, dlink_type);
1002 if (!intrfc) {
1003 /* Ok now create */
1004 intrfc = ipxitf_alloc(dev, idef->ipx_network, dlink_type,
1005 datalink, 0, dev->hard_header_len +
1006 datalink->header_length);
1007 rc = -EAGAIN;
1008 if (!intrfc)
1009 goto out_dev;
1010 /* Setup primary if necessary */
1011 if (idef->ipx_special == IPX_PRIMARY)
1012 ipx_primary_net = intrfc;
1013 if (!memcmp(idef->ipx_node, "\000\000\000\000\000\000",
1014 IPX_NODE_LEN)) {
1015 memset(intrfc->if_node, 0, IPX_NODE_LEN);
1016 memcpy(intrfc->if_node + IPX_NODE_LEN - dev->addr_len,
1017 dev->dev_addr, dev->addr_len);
1018 } else
1019 memcpy(intrfc->if_node, idef->ipx_node, IPX_NODE_LEN);
1020 ipxitf_hold(intrfc);
1021 ipxitf_insert(intrfc);
1022 }
1023
1024
1025 /* If the network number is known, add a route */
1026 rc = 0;
1027 if (!intrfc->if_netnum)
1028 goto out_intrfc;
1029
1030 rc = ipxitf_add_local_route(intrfc);
1031out_intrfc:
1032 ipxitf_put(intrfc);
1033 goto out;
1034out_dev:
1035 dev_put(dev);
1036out:
1037 return rc;
1038}
1039
1040static int ipxitf_delete(struct ipx_interface_definition *idef)
1041{
1042 struct net_device *dev = NULL;
1043 __be16 dlink_type = 0;
1044 struct ipx_interface *intrfc;
1045 int rc = 0;
1046
1047 spin_lock_bh(&ipx_interfaces_lock);
1048 if (idef->ipx_special == IPX_INTERNAL) {
1049 if (ipx_internal_net) {
1050 __ipxitf_put(ipx_internal_net);
1051 goto out;
1052 }
1053 rc = -ENOENT;
1054 goto out;
1055 }
1056
1057 dlink_type = ipx_map_frame_type(idef->ipx_dlink_type);
1058 rc = -EPROTONOSUPPORT;
1059 if (!dlink_type)
1060 goto out;
1061
1062 dev = __dev_get_by_name(&init_net, idef->ipx_device);
1063 rc = -ENODEV;
1064 if (!dev)
1065 goto out;
1066
1067 intrfc = __ipxitf_find_using_phys(dev, dlink_type);
1068 rc = -EINVAL;
1069 if (!intrfc)
1070 goto out;
1071 __ipxitf_put(intrfc);
1072
1073 rc = 0;
1074out:
1075 spin_unlock_bh(&ipx_interfaces_lock);
1076 return rc;
1077}
1078
1079static struct ipx_interface *ipxitf_auto_create(struct net_device *dev,
1080 __be16 dlink_type)
1081{
1082 struct ipx_interface *intrfc = NULL;
1083 struct datalink_proto *datalink;
1084
1085 if (!dev)
1086 goto out;
1087
1088 /* Check addresses are suitable */
1089 if (dev->addr_len > IPX_NODE_LEN)
1090 goto out;
1091
1092 switch (ntohs(dlink_type)) {
1093 case ETH_P_IPX: datalink = pEII_datalink; break;
1094 case ETH_P_802_2: datalink = p8022_datalink; break;
1095 case ETH_P_SNAP: datalink = pSNAP_datalink; break;
1096 case ETH_P_802_3: datalink = p8023_datalink; break;
1097 default: goto out;
1098 }
1099
1100 intrfc = ipxitf_alloc(dev, 0, dlink_type, datalink, 0,
1101 dev->hard_header_len + datalink->header_length);
1102
1103 if (intrfc) {
1104 memset(intrfc->if_node, 0, IPX_NODE_LEN);
1105 memcpy((char *)&(intrfc->if_node[IPX_NODE_LEN-dev->addr_len]),
1106 dev->dev_addr, dev->addr_len);
1107 spin_lock_init(&intrfc->if_sklist_lock);
1108 refcount_set(&intrfc->refcnt, 1);
1109 ipxitf_insert(intrfc);
1110 dev_hold(dev);
1111 }
1112
1113out:
1114 return intrfc;
1115}
1116
1117static int ipxitf_ioctl(unsigned int cmd, void __user *arg)
1118{
1119 int rc = -EINVAL;
1120 struct ifreq ifr;
1121 int val;
1122
1123 switch (cmd) {
1124 case SIOCSIFADDR: {
1125 struct sockaddr_ipx *sipx;
1126 struct ipx_interface_definition f;
1127
1128 rc = -EFAULT;
1129 if (copy_from_user(&ifr, arg, sizeof(ifr)))
1130 break;
1131 sipx = (struct sockaddr_ipx *)&ifr.ifr_addr;
1132 rc = -EINVAL;
1133 if (sipx->sipx_family != AF_IPX)
1134 break;
1135 f.ipx_network = sipx->sipx_network;
1136 memcpy(f.ipx_device, ifr.ifr_name,
1137 sizeof(f.ipx_device));
1138 memcpy(f.ipx_node, sipx->sipx_node, IPX_NODE_LEN);
1139 f.ipx_dlink_type = sipx->sipx_type;
1140 f.ipx_special = sipx->sipx_special;
1141
1142 if (sipx->sipx_action == IPX_DLTITF)
1143 rc = ipxitf_delete(&f);
1144 else
1145 rc = ipxitf_create(&f);
1146 break;
1147 }
1148 case SIOCGIFADDR: {
1149 struct sockaddr_ipx *sipx;
1150 struct ipx_interface *ipxif;
1151 struct net_device *dev;
1152
1153 rc = -EFAULT;
1154 if (copy_from_user(&ifr, arg, sizeof(ifr)))
1155 break;
1156 sipx = (struct sockaddr_ipx *)&ifr.ifr_addr;
1157 dev = __dev_get_by_name(&init_net, ifr.ifr_name);
1158 rc = -ENODEV;
1159 if (!dev)
1160 break;
1161 ipxif = ipxitf_find_using_phys(dev,
1162 ipx_map_frame_type(sipx->sipx_type));
1163 rc = -EADDRNOTAVAIL;
1164 if (!ipxif)
1165 break;
1166
1167 sipx->sipx_family = AF_IPX;
1168 sipx->sipx_network = ipxif->if_netnum;
1169 memcpy(sipx->sipx_node, ipxif->if_node,
1170 sizeof(sipx->sipx_node));
1171 rc = 0;
1172 if (copy_to_user(arg, &ifr, sizeof(ifr)))
1173 rc = -EFAULT;
1174 ipxitf_put(ipxif);
1175 break;
1176 }
1177 case SIOCAIPXITFCRT:
1178 rc = -EFAULT;
1179 if (get_user(val, (unsigned char __user *) arg))
1180 break;
1181 rc = 0;
1182 ipxcfg_auto_create_interfaces = val;
1183 break;
1184 case SIOCAIPXPRISLT:
1185 rc = -EFAULT;
1186 if (get_user(val, (unsigned char __user *) arg))
1187 break;
1188 rc = 0;
1189 ipxcfg_set_auto_select(val);
1190 break;
1191 }
1192
1193 return rc;
1194}
1195
1196/*
1197 * Checksum routine for IPX
1198 */
1199
1200/* Note: We assume ipx_tctrl==0 and htons(length)==ipx_pktsize */
1201/* This functions should *not* mess with packet contents */
1202
1203__be16 ipx_cksum(struct ipxhdr *packet, int length)
1204{
1205 /*
1206 * NOTE: sum is a net byte order quantity, which optimizes the
1207 * loop. This only works on big and little endian machines. (I
1208 * don't know of a machine that isn't.)
1209 */
1210 /* handle the first 3 words separately; checksum should be skipped
1211 * and ipx_tctrl masked out */
1212 __u16 *p = (__u16 *)packet;
1213 __u32 sum = p[1] + (p[2] & (__force u16)htons(0x00ff));
1214 __u32 i = (length >> 1) - 3; /* Number of remaining complete words */
1215
1216 /* Loop through them */
1217 p += 3;
1218 while (i--)
1219 sum += *p++;
1220
1221 /* Add on the last part word if it exists */
1222 if (packet->ipx_pktsize & htons(1))
1223 sum += (__force u16)htons(0xff00) & *p;
1224
1225 /* Do final fixup */
1226 sum = (sum & 0xffff) + (sum >> 16);
1227
1228 /* It's a pity there's no concept of carry in C */
1229 if (sum >= 0x10000)
1230 sum++;
1231
1232 /*
1233 * Leave 0 alone; we don't want 0xffff here. Note that we can't get
1234 * here with 0x10000, so this check is the same as ((__u16)sum)
1235 */
1236 if (sum)
1237 sum = ~sum;
1238
1239 return (__force __be16)sum;
1240}
1241
1242const char *ipx_frame_name(__be16 frame)
1243{
1244 char* rc = "None";
1245
1246 switch (ntohs(frame)) {
1247 case ETH_P_IPX: rc = "EtherII"; break;
1248 case ETH_P_802_2: rc = "802.2"; break;
1249 case ETH_P_SNAP: rc = "SNAP"; break;
1250 case ETH_P_802_3: rc = "802.3"; break;
1251 }
1252
1253 return rc;
1254}
1255
1256const char *ipx_device_name(struct ipx_interface *intrfc)
1257{
1258 return intrfc->if_internal ? "Internal" :
1259 intrfc->if_dev ? intrfc->if_dev->name : "Unknown";
1260}
1261
1262/* Handling for system calls applied via the various interfaces to an IPX
1263 * socket object. */
1264
1265static int ipx_setsockopt(struct socket *sock, int level, int optname,
1266 char __user *optval, unsigned int optlen)
1267{
1268 struct sock *sk = sock->sk;
1269 int opt;
1270 int rc = -EINVAL;
1271
1272 lock_sock(sk);
1273 if (optlen != sizeof(int))
1274 goto out;
1275
1276 rc = -EFAULT;
1277 if (get_user(opt, (unsigned int __user *)optval))
1278 goto out;
1279
1280 rc = -ENOPROTOOPT;
1281 if (!(level == SOL_IPX && optname == IPX_TYPE))
1282 goto out;
1283
1284 ipx_sk(sk)->type = opt;
1285 rc = 0;
1286out:
1287 release_sock(sk);
1288 return rc;
1289}
1290
1291static int ipx_getsockopt(struct socket *sock, int level, int optname,
1292 char __user *optval, int __user *optlen)
1293{
1294 struct sock *sk = sock->sk;
1295 int val = 0;
1296 int len;
1297 int rc = -ENOPROTOOPT;
1298
1299 lock_sock(sk);
1300 if (!(level == SOL_IPX && optname == IPX_TYPE))
1301 goto out;
1302
1303 val = ipx_sk(sk)->type;
1304
1305 rc = -EFAULT;
1306 if (get_user(len, optlen))
1307 goto out;
1308
1309 len = min_t(unsigned int, len, sizeof(int));
1310 rc = -EINVAL;
1311 if(len < 0)
1312 goto out;
1313
1314 rc = -EFAULT;
1315 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
1316 goto out;
1317
1318 rc = 0;
1319out:
1320 release_sock(sk);
1321 return rc;
1322}
1323
1324static struct proto ipx_proto = {
1325 .name = "IPX",
1326 .owner = THIS_MODULE,
1327 .obj_size = sizeof(struct ipx_sock),
1328};
1329
1330static int ipx_create(struct net *net, struct socket *sock, int protocol,
1331 int kern)
1332{
1333 int rc = -ESOCKTNOSUPPORT;
1334 struct sock *sk;
1335
1336 if (!net_eq(net, &init_net))
1337 return -EAFNOSUPPORT;
1338
1339 /*
1340 * SPX support is not anymore in the kernel sources. If you want to
1341 * ressurrect it, completing it and making it understand shared skbs,
1342 * be fully multithreaded, etc, grab the sources in an early 2.5 kernel
1343 * tree.
1344 */
1345 if (sock->type != SOCK_DGRAM)
1346 goto out;
1347
1348 rc = -ENOMEM;
1349 sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, kern);
1350 if (!sk)
1351 goto out;
1352
1353 sk_refcnt_debug_inc(sk);
1354 sock_init_data(sock, sk);
1355 sk->sk_no_check_tx = 1; /* Checksum off by default */
1356 sock->ops = &ipx_dgram_ops;
1357 rc = 0;
1358out:
1359 return rc;
1360}
1361
1362static int ipx_release(struct socket *sock)
1363{
1364 struct sock *sk = sock->sk;
1365
1366 if (!sk)
1367 goto out;
1368
1369 lock_sock(sk);
1370 sk->sk_shutdown = SHUTDOWN_MASK;
1371 if (!sock_flag(sk, SOCK_DEAD))
1372 sk->sk_state_change(sk);
1373
1374 sock_set_flag(sk, SOCK_DEAD);
1375 sock->sk = NULL;
1376 sk_refcnt_debug_release(sk);
1377 ipx_destroy_socket(sk);
1378 release_sock(sk);
1379 sock_put(sk);
1380out:
1381 return 0;
1382}
1383
1384/* caller must hold a reference to intrfc */
1385
1386static __be16 ipx_first_free_socketnum(struct ipx_interface *intrfc)
1387{
1388 unsigned short socketNum = intrfc->if_sknum;
1389
1390 spin_lock_bh(&intrfc->if_sklist_lock);
1391
1392 if (socketNum < IPX_MIN_EPHEMERAL_SOCKET)
1393 socketNum = IPX_MIN_EPHEMERAL_SOCKET;
1394
1395 while (__ipxitf_find_socket(intrfc, htons(socketNum)))
1396 if (socketNum > IPX_MAX_EPHEMERAL_SOCKET)
1397 socketNum = IPX_MIN_EPHEMERAL_SOCKET;
1398 else
1399 socketNum++;
1400
1401 spin_unlock_bh(&intrfc->if_sklist_lock);
1402 intrfc->if_sknum = socketNum;
1403
1404 return htons(socketNum);
1405}
1406
1407static int __ipx_bind(struct socket *sock,
1408 struct sockaddr *uaddr, int addr_len)
1409{
1410 struct sock *sk = sock->sk;
1411 struct ipx_sock *ipxs = ipx_sk(sk);
1412 struct ipx_interface *intrfc;
1413 struct sockaddr_ipx *addr = (struct sockaddr_ipx *)uaddr;
1414 int rc = -EINVAL;
1415
1416 if (!sock_flag(sk, SOCK_ZAPPED) || addr_len != sizeof(struct sockaddr_ipx))
1417 goto out;
1418
1419 intrfc = ipxitf_find_using_net(addr->sipx_network);
1420 rc = -EADDRNOTAVAIL;
1421 if (!intrfc)
1422 goto out;
1423
1424 if (!addr->sipx_port) {
1425 addr->sipx_port = ipx_first_free_socketnum(intrfc);
1426 rc = -EINVAL;
1427 if (!addr->sipx_port)
1428 goto out_put;
1429 }
1430
1431 /* protect IPX system stuff like routing/sap */
1432 rc = -EACCES;
1433 if (ntohs(addr->sipx_port) < IPX_MIN_EPHEMERAL_SOCKET &&
1434 !capable(CAP_NET_ADMIN))
1435 goto out_put;
1436
1437 ipxs->port = addr->sipx_port;
1438
1439#ifdef CONFIG_IPX_INTERN
1440 if (intrfc == ipx_internal_net) {
1441 /* The source address is to be set explicitly if the
1442 * socket is to be bound on the internal network. If a
1443 * node number 0 was specified, the default is used.
1444 */
1445
1446 rc = -EINVAL;
1447 if (!memcmp(addr->sipx_node, ipx_broadcast_node, IPX_NODE_LEN))
1448 goto out_put;
1449 if (!memcmp(addr->sipx_node, ipx_this_node, IPX_NODE_LEN))
1450 memcpy(ipxs->node, intrfc->if_node, IPX_NODE_LEN);
1451 else
1452 memcpy(ipxs->node, addr->sipx_node, IPX_NODE_LEN);
1453
1454 rc = -EADDRINUSE;
1455 if (ipxitf_find_internal_socket(intrfc, ipxs->node,
1456 ipxs->port)) {
1457 SOCK_DEBUG(sk,
1458 "IPX: bind failed because port %X in use.\n",
1459 ntohs(addr->sipx_port));
1460 goto out_put;
1461 }
1462 } else {
1463 /* Source addresses are easy. It must be our
1464 * network:node pair for an interface routed to IPX
1465 * with the ipx routing ioctl()
1466 */
1467
1468 memcpy(ipxs->node, intrfc->if_node, IPX_NODE_LEN);
1469
1470 rc = -EADDRINUSE;
1471 if (ipxitf_find_socket(intrfc, addr->sipx_port)) {
1472 SOCK_DEBUG(sk,
1473 "IPX: bind failed because port %X in use.\n",
1474 ntohs(addr->sipx_port));
1475 goto out_put;
1476 }
1477 }
1478
1479#else /* !def CONFIG_IPX_INTERN */
1480
1481 /* Source addresses are easy. It must be our network:node pair for
1482 an interface routed to IPX with the ipx routing ioctl() */
1483
1484 rc = -EADDRINUSE;
1485 if (ipxitf_find_socket(intrfc, addr->sipx_port)) {
1486 SOCK_DEBUG(sk, "IPX: bind failed because port %X in use.\n",
1487 ntohs((int)addr->sipx_port));
1488 goto out_put;
1489 }
1490
1491#endif /* CONFIG_IPX_INTERN */
1492
1493 ipxitf_insert_socket(intrfc, sk);
1494 sock_reset_flag(sk, SOCK_ZAPPED);
1495
1496 rc = 0;
1497out_put:
1498 ipxitf_put(intrfc);
1499out:
1500 return rc;
1501}
1502
1503static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1504{
1505 struct sock *sk = sock->sk;
1506 int rc;
1507
1508 lock_sock(sk);
1509 rc = __ipx_bind(sock, uaddr, addr_len);
1510 release_sock(sk);
1511
1512 return rc;
1513}
1514
1515static int ipx_connect(struct socket *sock, struct sockaddr *uaddr,
1516 int addr_len, int flags)
1517{
1518 struct sock *sk = sock->sk;
1519 struct ipx_sock *ipxs = ipx_sk(sk);
1520 struct sockaddr_ipx *addr;
1521 int rc = -EINVAL;
1522 struct ipx_route *rt;
1523
1524 sk->sk_state = TCP_CLOSE;
1525 sock->state = SS_UNCONNECTED;
1526
1527 lock_sock(sk);
1528 if (addr_len != sizeof(*addr))
1529 goto out;
1530 addr = (struct sockaddr_ipx *)uaddr;
1531
1532 /* put the autobinding in */
1533 if (!ipxs->port) {
1534 struct sockaddr_ipx uaddr;
1535
1536 uaddr.sipx_port = 0;
1537 uaddr.sipx_network = 0;
1538
1539#ifdef CONFIG_IPX_INTERN
1540 rc = -ENETDOWN;
1541 if (!ipxs->intrfc)
1542 goto out; /* Someone zonked the iface */
1543 memcpy(uaddr.sipx_node, ipxs->intrfc->if_node,
1544 IPX_NODE_LEN);
1545#endif /* CONFIG_IPX_INTERN */
1546
1547 rc = __ipx_bind(sock, (struct sockaddr *)&uaddr,
1548 sizeof(struct sockaddr_ipx));
1549 if (rc)
1550 goto out;
1551 }
1552
1553 /* We can either connect to primary network or somewhere
1554 * we can route to */
1555 rt = ipxrtr_lookup(addr->sipx_network);
1556 rc = -ENETUNREACH;
1557 if (!rt && !(!addr->sipx_network && ipx_primary_net))
1558 goto out;
1559
1560 ipxs->dest_addr.net = addr->sipx_network;
1561 ipxs->dest_addr.sock = addr->sipx_port;
1562 memcpy(ipxs->dest_addr.node, addr->sipx_node, IPX_NODE_LEN);
1563 ipxs->type = addr->sipx_type;
1564
1565 if (sock->type == SOCK_DGRAM) {
1566 sock->state = SS_CONNECTED;
1567 sk->sk_state = TCP_ESTABLISHED;
1568 }
1569
1570 if (rt)
1571 ipxrtr_put(rt);
1572 rc = 0;
1573out:
1574 release_sock(sk);
1575 return rc;
1576}
1577
1578
1579static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
1580 int *uaddr_len, int peer)
1581{
1582 struct ipx_address *addr;
1583 struct sockaddr_ipx sipx;
1584 struct sock *sk = sock->sk;
1585 struct ipx_sock *ipxs = ipx_sk(sk);
1586 int rc;
1587
1588 *uaddr_len = sizeof(struct sockaddr_ipx);
1589
1590 lock_sock(sk);
1591 if (peer) {
1592 rc = -ENOTCONN;
1593 if (sk->sk_state != TCP_ESTABLISHED)
1594 goto out;
1595
1596 addr = &ipxs->dest_addr;
1597 sipx.sipx_network = addr->net;
1598 sipx.sipx_port = addr->sock;
1599 memcpy(sipx.sipx_node, addr->node, IPX_NODE_LEN);
1600 } else {
1601 if (ipxs->intrfc) {
1602 sipx.sipx_network = ipxs->intrfc->if_netnum;
1603#ifdef CONFIG_IPX_INTERN
1604 memcpy(sipx.sipx_node, ipxs->node, IPX_NODE_LEN);
1605#else
1606 memcpy(sipx.sipx_node, ipxs->intrfc->if_node,
1607 IPX_NODE_LEN);
1608#endif /* CONFIG_IPX_INTERN */
1609
1610 } else {
1611 sipx.sipx_network = 0;
1612 memset(sipx.sipx_node, '\0', IPX_NODE_LEN);
1613 }
1614
1615 sipx.sipx_port = ipxs->port;
1616 }
1617
1618 sipx.sipx_family = AF_IPX;
1619 sipx.sipx_type = ipxs->type;
1620 sipx.sipx_zero = 0;
1621 memcpy(uaddr, &sipx, sizeof(sipx));
1622
1623 rc = 0;
1624out:
1625 release_sock(sk);
1626 return rc;
1627}
1628
1629static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
1630{
1631 /* NULL here for pt means the packet was looped back */
1632 struct ipx_interface *intrfc;
1633 struct ipxhdr *ipx;
1634 u16 ipx_pktsize;
1635 int rc = 0;
1636
1637 if (!net_eq(dev_net(dev), &init_net))
1638 goto drop;
1639
1640 /* Not ours */
1641 if (skb->pkt_type == PACKET_OTHERHOST)
1642 goto drop;
1643
1644 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
1645 goto out;
1646
1647 if (!pskb_may_pull(skb, sizeof(struct ipxhdr)))
1648 goto drop;
1649
1650 ipx_pktsize = ntohs(ipx_hdr(skb)->ipx_pktsize);
1651
1652 /* Too small or invalid header? */
1653 if (ipx_pktsize < sizeof(struct ipxhdr) ||
1654 !pskb_may_pull(skb, ipx_pktsize))
1655 goto drop;
1656
1657 ipx = ipx_hdr(skb);
1658 if (ipx->ipx_checksum != IPX_NO_CHECKSUM &&
1659 ipx->ipx_checksum != ipx_cksum(ipx, ipx_pktsize))
1660 goto drop;
1661
1662 IPX_SKB_CB(skb)->ipx_tctrl = ipx->ipx_tctrl;
1663 IPX_SKB_CB(skb)->ipx_dest_net = ipx->ipx_dest.net;
1664 IPX_SKB_CB(skb)->ipx_source_net = ipx->ipx_source.net;
1665
1666 /* Determine what local ipx endpoint this is */
1667 intrfc = ipxitf_find_using_phys(dev, pt->type);
1668 if (!intrfc) {
1669 if (ipxcfg_auto_create_interfaces &&
1670 IPX_SKB_CB(skb)->ipx_dest_net) {
1671 intrfc = ipxitf_auto_create(dev, pt->type);
1672 if (intrfc)
1673 ipxitf_hold(intrfc);
1674 }
1675
1676 if (!intrfc) /* Not one of ours */
1677 /* or invalid packet for auto creation */
1678 goto drop;
1679 }
1680
1681 rc = ipxitf_rcv(intrfc, skb);
1682 ipxitf_put(intrfc);
1683 goto out;
1684drop:
1685 kfree_skb(skb);
1686out:
1687 return rc;
1688}
1689
1690static int ipx_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1691{
1692 struct sock *sk = sock->sk;
1693 struct ipx_sock *ipxs = ipx_sk(sk);
1694 DECLARE_SOCKADDR(struct sockaddr_ipx *, usipx, msg->msg_name);
1695 struct sockaddr_ipx local_sipx;
1696 int rc = -EINVAL;
1697 int flags = msg->msg_flags;
1698
1699 lock_sock(sk);
1700 /* Socket gets bound below anyway */
1701/* if (sk->sk_zapped)
1702 return -EIO; */ /* Socket not bound */
1703 if (flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
1704 goto out;
1705
1706 /* Max possible packet size limited by 16 bit pktsize in header */
1707 if (len >= 65535 - sizeof(struct ipxhdr))
1708 goto out;
1709
1710 if (usipx) {
1711 if (!ipxs->port) {
1712 struct sockaddr_ipx uaddr;
1713
1714 uaddr.sipx_port = 0;
1715 uaddr.sipx_network = 0;
1716#ifdef CONFIG_IPX_INTERN
1717 rc = -ENETDOWN;
1718 if (!ipxs->intrfc)
1719 goto out; /* Someone zonked the iface */
1720 memcpy(uaddr.sipx_node, ipxs->intrfc->if_node,
1721 IPX_NODE_LEN);
1722#endif
1723 rc = __ipx_bind(sock, (struct sockaddr *)&uaddr,
1724 sizeof(struct sockaddr_ipx));
1725 if (rc)
1726 goto out;
1727 }
1728
1729 rc = -EINVAL;
1730 if (msg->msg_namelen < sizeof(*usipx) ||
1731 usipx->sipx_family != AF_IPX)
1732 goto out;
1733 } else {
1734 rc = -ENOTCONN;
1735 if (sk->sk_state != TCP_ESTABLISHED)
1736 goto out;
1737
1738 usipx = &local_sipx;
1739 usipx->sipx_family = AF_IPX;
1740 usipx->sipx_type = ipxs->type;
1741 usipx->sipx_port = ipxs->dest_addr.sock;
1742 usipx->sipx_network = ipxs->dest_addr.net;
1743 memcpy(usipx->sipx_node, ipxs->dest_addr.node, IPX_NODE_LEN);
1744 }
1745
1746 rc = ipxrtr_route_packet(sk, usipx, msg, len, flags & MSG_DONTWAIT);
1747 if (rc >= 0)
1748 rc = len;
1749out:
1750 release_sock(sk);
1751 return rc;
1752}
1753
1754
1755static int ipx_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
1756 int flags)
1757{
1758 struct sock *sk = sock->sk;
1759 struct ipx_sock *ipxs = ipx_sk(sk);
1760 DECLARE_SOCKADDR(struct sockaddr_ipx *, sipx, msg->msg_name);
1761 struct ipxhdr *ipx = NULL;
1762 struct sk_buff *skb;
1763 int copied, rc;
1764 bool locked = true;
1765
1766 lock_sock(sk);
1767 /* put the autobinding in */
1768 if (!ipxs->port) {
1769 struct sockaddr_ipx uaddr;
1770
1771 uaddr.sipx_port = 0;
1772 uaddr.sipx_network = 0;
1773
1774#ifdef CONFIG_IPX_INTERN
1775 rc = -ENETDOWN;
1776 if (!ipxs->intrfc)
1777 goto out; /* Someone zonked the iface */
1778 memcpy(uaddr.sipx_node, ipxs->intrfc->if_node, IPX_NODE_LEN);
1779#endif /* CONFIG_IPX_INTERN */
1780
1781 rc = __ipx_bind(sock, (struct sockaddr *)&uaddr,
1782 sizeof(struct sockaddr_ipx));
1783 if (rc)
1784 goto out;
1785 }
1786
1787 rc = -ENOTCONN;
1788 if (sock_flag(sk, SOCK_ZAPPED))
1789 goto out;
1790
1791 release_sock(sk);
1792 locked = false;
1793 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
1794 flags & MSG_DONTWAIT, &rc);
1795 if (!skb) {
1796 if (rc == -EAGAIN && (sk->sk_shutdown & RCV_SHUTDOWN))
1797 rc = 0;
1798 goto out;
1799 }
1800
1801 ipx = ipx_hdr(skb);
1802 copied = ntohs(ipx->ipx_pktsize) - sizeof(struct ipxhdr);
1803 if (copied > size) {
1804 copied = size;
1805 msg->msg_flags |= MSG_TRUNC;
1806 }
1807
1808 rc = skb_copy_datagram_msg(skb, sizeof(struct ipxhdr), msg, copied);
1809 if (rc)
1810 goto out_free;
1811 if (skb->tstamp)
1812 sk->sk_stamp = skb->tstamp;
1813
1814 if (sipx) {
1815 sipx->sipx_family = AF_IPX;
1816 sipx->sipx_port = ipx->ipx_source.sock;
1817 memcpy(sipx->sipx_node, ipx->ipx_source.node, IPX_NODE_LEN);
1818 sipx->sipx_network = IPX_SKB_CB(skb)->ipx_source_net;
1819 sipx->sipx_type = ipx->ipx_type;
1820 sipx->sipx_zero = 0;
1821 msg->msg_namelen = sizeof(*sipx);
1822 }
1823 rc = copied;
1824
1825out_free:
1826 skb_free_datagram(sk, skb);
1827out:
1828 if (locked)
1829 release_sock(sk);
1830 return rc;
1831}
1832
1833
1834static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1835{
1836 int rc = 0;
1837 long amount = 0;
1838 struct sock *sk = sock->sk;
1839 void __user *argp = (void __user *)arg;
1840
1841 lock_sock(sk);
1842 switch (cmd) {
1843 case TIOCOUTQ:
1844 amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
1845 if (amount < 0)
1846 amount = 0;
1847 rc = put_user(amount, (int __user *)argp);
1848 break;
1849 case TIOCINQ: {
1850 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
1851 /* These two are safe on a single CPU system as only
1852 * user tasks fiddle here */
1853 if (skb)
1854 amount = skb->len - sizeof(struct ipxhdr);
1855 rc = put_user(amount, (int __user *)argp);
1856 break;
1857 }
1858 case SIOCADDRT:
1859 case SIOCDELRT:
1860 rc = -EPERM;
1861 if (capable(CAP_NET_ADMIN))
1862 rc = ipxrtr_ioctl(cmd, argp);
1863 break;
1864 case SIOCSIFADDR:
1865 case SIOCAIPXITFCRT:
1866 case SIOCAIPXPRISLT:
1867 rc = -EPERM;
1868 if (!capable(CAP_NET_ADMIN))
1869 break;
1870 /* fall through */
1871 case SIOCGIFADDR:
1872 rc = ipxitf_ioctl(cmd, argp);
1873 break;
1874 case SIOCIPXCFGDATA:
1875 rc = ipxcfg_get_config_data(argp);
1876 break;
1877 case SIOCIPXNCPCONN:
1878 /*
1879 * This socket wants to take care of the NCP connection
1880 * handed to us in arg.
1881 */
1882 rc = -EPERM;
1883 if (!capable(CAP_NET_ADMIN))
1884 break;
1885 rc = get_user(ipx_sk(sk)->ipx_ncp_conn,
1886 (const unsigned short __user *)argp);
1887 break;
1888 case SIOCGSTAMP:
1889 rc = sock_get_timestamp(sk, argp);
1890 break;
1891 case SIOCGIFDSTADDR:
1892 case SIOCSIFDSTADDR:
1893 case SIOCGIFBRDADDR:
1894 case SIOCSIFBRDADDR:
1895 case SIOCGIFNETMASK:
1896 case SIOCSIFNETMASK:
1897 rc = -EINVAL;
1898 break;
1899 default:
1900 rc = -ENOIOCTLCMD;
1901 break;
1902 }
1903 release_sock(sk);
1904
1905 return rc;
1906}
1907
1908
1909#ifdef CONFIG_COMPAT
1910static int ipx_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1911{
1912 /*
1913 * These 4 commands use same structure on 32bit and 64bit. Rest of IPX
1914 * commands is handled by generic ioctl code. As these commands are
1915 * SIOCPROTOPRIVATE..SIOCPROTOPRIVATE+3, they cannot be handled by generic
1916 * code.
1917 */
1918 switch (cmd) {
1919 case SIOCAIPXITFCRT:
1920 case SIOCAIPXPRISLT:
1921 case SIOCIPXCFGDATA:
1922 case SIOCIPXNCPCONN:
1923 return ipx_ioctl(sock, cmd, arg);
1924 default:
1925 return -ENOIOCTLCMD;
1926 }
1927}
1928#endif
1929
1930static int ipx_shutdown(struct socket *sock, int mode)
1931{
1932 struct sock *sk = sock->sk;
1933
1934 if (mode < SHUT_RD || mode > SHUT_RDWR)
1935 return -EINVAL;
1936 /* This maps:
1937 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
1938 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
1939 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
1940 */
1941 ++mode;
1942
1943 lock_sock(sk);
1944 sk->sk_shutdown |= mode;
1945 release_sock(sk);
1946 sk->sk_state_change(sk);
1947
1948 return 0;
1949}
1950
1951/*
1952 * Socket family declarations
1953 */
1954
1955static const struct net_proto_family ipx_family_ops = {
1956 .family = PF_IPX,
1957 .create = ipx_create,
1958 .owner = THIS_MODULE,
1959};
1960
1961static const struct proto_ops ipx_dgram_ops = {
1962 .family = PF_IPX,
1963 .owner = THIS_MODULE,
1964 .release = ipx_release,
1965 .bind = ipx_bind,
1966 .connect = ipx_connect,
1967 .socketpair = sock_no_socketpair,
1968 .accept = sock_no_accept,
1969 .getname = ipx_getname,
1970 .poll = datagram_poll,
1971 .ioctl = ipx_ioctl,
1972#ifdef CONFIG_COMPAT
1973 .compat_ioctl = ipx_compat_ioctl,
1974#endif
1975 .listen = sock_no_listen,
1976 .shutdown = ipx_shutdown,
1977 .setsockopt = ipx_setsockopt,
1978 .getsockopt = ipx_getsockopt,
1979 .sendmsg = ipx_sendmsg,
1980 .recvmsg = ipx_recvmsg,
1981 .mmap = sock_no_mmap,
1982 .sendpage = sock_no_sendpage,
1983};
1984
1985static struct packet_type ipx_8023_packet_type __read_mostly = {
1986 .type = cpu_to_be16(ETH_P_802_3),
1987 .func = ipx_rcv,
1988};
1989
1990static struct packet_type ipx_dix_packet_type __read_mostly = {
1991 .type = cpu_to_be16(ETH_P_IPX),
1992 .func = ipx_rcv,
1993};
1994
1995static struct notifier_block ipx_dev_notifier = {
1996 .notifier_call = ipxitf_device_event,
1997};
1998
1999static const unsigned char ipx_8022_type = 0xE0;
2000static const unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 };
2001static const char ipx_EII_err_msg[] __initconst =
2002 KERN_CRIT "IPX: Unable to register with Ethernet II\n";
2003static const char ipx_8023_err_msg[] __initconst =
2004 KERN_CRIT "IPX: Unable to register with 802.3\n";
2005static const char ipx_llc_err_msg[] __initconst =
2006 KERN_CRIT "IPX: Unable to register with 802.2\n";
2007static const char ipx_snap_err_msg[] __initconst =
2008 KERN_CRIT "IPX: Unable to register with SNAP\n";
2009
2010static int __init ipx_init(void)
2011{
2012 int rc = proto_register(&ipx_proto, 1);
2013
2014 if (rc != 0)
2015 goto out;
2016
2017 sock_register(&ipx_family_ops);
2018
2019 pEII_datalink = make_EII_client();
2020 if (pEII_datalink)
2021 dev_add_pack(&ipx_dix_packet_type);
2022 else
2023 printk(ipx_EII_err_msg);
2024
2025 p8023_datalink = make_8023_client();
2026 if (p8023_datalink)
2027 dev_add_pack(&ipx_8023_packet_type);
2028 else
2029 printk(ipx_8023_err_msg);
2030
2031 p8022_datalink = register_8022_client(ipx_8022_type, ipx_rcv);
2032 if (!p8022_datalink)
2033 printk(ipx_llc_err_msg);
2034
2035 pSNAP_datalink = register_snap_client(ipx_snap_id, ipx_rcv);
2036 if (!pSNAP_datalink)
2037 printk(ipx_snap_err_msg);
2038
2039 register_netdevice_notifier(&ipx_dev_notifier);
2040 ipx_register_sysctl();
2041 ipx_proc_init();
2042out:
2043 return rc;
2044}
2045
2046static void __exit ipx_proto_finito(void)
2047{
2048 ipx_proc_exit();
2049 ipx_unregister_sysctl();
2050
2051 unregister_netdevice_notifier(&ipx_dev_notifier);
2052
2053 ipxitf_cleanup();
2054
2055 if (pSNAP_datalink) {
2056 unregister_snap_client(pSNAP_datalink);
2057 pSNAP_datalink = NULL;
2058 }
2059
2060 if (p8022_datalink) {
2061 unregister_8022_client(p8022_datalink);
2062 p8022_datalink = NULL;
2063 }
2064
2065 dev_remove_pack(&ipx_8023_packet_type);
2066 if (p8023_datalink) {
2067 destroy_8023_client(p8023_datalink);
2068 p8023_datalink = NULL;
2069 }
2070
2071 dev_remove_pack(&ipx_dix_packet_type);
2072 if (pEII_datalink) {
2073 destroy_EII_client(pEII_datalink);
2074 pEII_datalink = NULL;
2075 }
2076
2077 proto_unregister(&ipx_proto);
2078 sock_unregister(ipx_family_ops.family);
2079}
2080
2081module_init(ipx_init);
2082module_exit(ipx_proto_finito);
2083MODULE_LICENSE("GPL");
2084MODULE_ALIAS_NETPROTO(PF_IPX);
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
deleted file mode 100644
index 38a3d51d9ead..000000000000
--- a/net/ipx/ipx_proc.c
+++ /dev/null
@@ -1,341 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * IPX proc routines
4 *
5 * Copyright(C) Arnaldo Carvalho de Melo <acme@conectiva.com.br>, 2002
6 */
7
8#include <linux/init.h>
9#ifdef CONFIG_PROC_FS
10#include <linux/proc_fs.h>
11#include <linux/spinlock.h>
12#include <linux/seq_file.h>
13#include <linux/export.h>
14#include <net/net_namespace.h>
15#include <net/tcp_states.h>
16#include <net/ipx.h>
17
18static void *ipx_seq_interface_start(struct seq_file *seq, loff_t *pos)
19{
20 spin_lock_bh(&ipx_interfaces_lock);
21 return seq_list_start_head(&ipx_interfaces, *pos);
22}
23
24static void *ipx_seq_interface_next(struct seq_file *seq, void *v, loff_t *pos)
25{
26 return seq_list_next(v, &ipx_interfaces, pos);
27}
28
29static void ipx_seq_interface_stop(struct seq_file *seq, void *v)
30{
31 spin_unlock_bh(&ipx_interfaces_lock);
32}
33
34static int ipx_seq_interface_show(struct seq_file *seq, void *v)
35{
36 struct ipx_interface *i;
37
38 if (v == &ipx_interfaces) {
39 seq_puts(seq, "Network Node_Address Primary Device "
40 "Frame_Type");
41#ifdef IPX_REFCNT_DEBUG
42 seq_puts(seq, " refcnt");
43#endif
44 seq_puts(seq, "\n");
45 goto out;
46 }
47
48 i = list_entry(v, struct ipx_interface, node);
49 seq_printf(seq, "%08X ", ntohl(i->if_netnum));
50 seq_printf(seq, "%02X%02X%02X%02X%02X%02X ",
51 i->if_node[0], i->if_node[1], i->if_node[2],
52 i->if_node[3], i->if_node[4], i->if_node[5]);
53 seq_printf(seq, "%-9s", i == ipx_primary_net ? "Yes" : "No");
54 seq_printf(seq, "%-11s", ipx_device_name(i));
55 seq_printf(seq, "%-9s", ipx_frame_name(i->if_dlink_type));
56#ifdef IPX_REFCNT_DEBUG
57 seq_printf(seq, "%6d", refcount_read(&i->refcnt));
58#endif
59 seq_puts(seq, "\n");
60out:
61 return 0;
62}
63
64static void *ipx_seq_route_start(struct seq_file *seq, loff_t *pos)
65{
66 read_lock_bh(&ipx_routes_lock);
67 return seq_list_start_head(&ipx_routes, *pos);
68}
69
70static void *ipx_seq_route_next(struct seq_file *seq, void *v, loff_t *pos)
71{
72 return seq_list_next(v, &ipx_routes, pos);
73}
74
75static void ipx_seq_route_stop(struct seq_file *seq, void *v)
76{
77 read_unlock_bh(&ipx_routes_lock);
78}
79
80static int ipx_seq_route_show(struct seq_file *seq, void *v)
81{
82 struct ipx_route *rt;
83
84 if (v == &ipx_routes) {
85 seq_puts(seq, "Network Router_Net Router_Node\n");
86 goto out;
87 }
88
89 rt = list_entry(v, struct ipx_route, node);
90
91 seq_printf(seq, "%08X ", ntohl(rt->ir_net));
92 if (rt->ir_routed)
93 seq_printf(seq, "%08X %02X%02X%02X%02X%02X%02X\n",
94 ntohl(rt->ir_intrfc->if_netnum),
95 rt->ir_router_node[0], rt->ir_router_node[1],
96 rt->ir_router_node[2], rt->ir_router_node[3],
97 rt->ir_router_node[4], rt->ir_router_node[5]);
98 else
99 seq_puts(seq, "Directly Connected\n");
100out:
101 return 0;
102}
103
104static __inline__ struct sock *ipx_get_socket_idx(loff_t pos)
105{
106 struct sock *s = NULL;
107 struct ipx_interface *i;
108
109 list_for_each_entry(i, &ipx_interfaces, node) {
110 spin_lock_bh(&i->if_sklist_lock);
111 sk_for_each(s, &i->if_sklist) {
112 if (!pos)
113 break;
114 --pos;
115 }
116 spin_unlock_bh(&i->if_sklist_lock);
117 if (!pos) {
118 if (s)
119 goto found;
120 break;
121 }
122 }
123 s = NULL;
124found:
125 return s;
126}
127
128static void *ipx_seq_socket_start(struct seq_file *seq, loff_t *pos)
129{
130 loff_t l = *pos;
131
132 spin_lock_bh(&ipx_interfaces_lock);
133 return l ? ipx_get_socket_idx(--l) : SEQ_START_TOKEN;
134}
135
136static void *ipx_seq_socket_next(struct seq_file *seq, void *v, loff_t *pos)
137{
138 struct sock* sk, *next;
139 struct ipx_interface *i;
140 struct ipx_sock *ipxs;
141
142 ++*pos;
143 if (v == SEQ_START_TOKEN) {
144 sk = NULL;
145 i = ipx_interfaces_head();
146 if (!i)
147 goto out;
148 sk = sk_head(&i->if_sklist);
149 if (sk)
150 spin_lock_bh(&i->if_sklist_lock);
151 goto out;
152 }
153 sk = v;
154 next = sk_next(sk);
155 if (next) {
156 sk = next;
157 goto out;
158 }
159 ipxs = ipx_sk(sk);
160 i = ipxs->intrfc;
161 spin_unlock_bh(&i->if_sklist_lock);
162 sk = NULL;
163 for (;;) {
164 if (i->node.next == &ipx_interfaces)
165 break;
166 i = list_entry(i->node.next, struct ipx_interface, node);
167 spin_lock_bh(&i->if_sklist_lock);
168 if (!hlist_empty(&i->if_sklist)) {
169 sk = sk_head(&i->if_sklist);
170 break;
171 }
172 spin_unlock_bh(&i->if_sklist_lock);
173 }
174out:
175 return sk;
176}
177
178static int ipx_seq_socket_show(struct seq_file *seq, void *v)
179{
180 struct sock *s;
181 struct ipx_sock *ipxs;
182
183 if (v == SEQ_START_TOKEN) {
184#ifdef CONFIG_IPX_INTERN
185 seq_puts(seq, "Local_Address "
186 "Remote_Address Tx_Queue "
187 "Rx_Queue State Uid\n");
188#else
189 seq_puts(seq, "Local_Address Remote_Address "
190 "Tx_Queue Rx_Queue State Uid\n");
191#endif
192 goto out;
193 }
194
195 s = v;
196 ipxs = ipx_sk(s);
197#ifdef CONFIG_IPX_INTERN
198 seq_printf(seq, "%08X:%02X%02X%02X%02X%02X%02X:%04X ",
199 ntohl(ipxs->intrfc->if_netnum),
200 ipxs->node[0], ipxs->node[1], ipxs->node[2], ipxs->node[3],
201 ipxs->node[4], ipxs->node[5], ntohs(ipxs->port));
202#else
203 seq_printf(seq, "%08X:%04X ", ntohl(ipxs->intrfc->if_netnum),
204 ntohs(ipxs->port));
205#endif /* CONFIG_IPX_INTERN */
206 if (s->sk_state != TCP_ESTABLISHED)
207 seq_printf(seq, "%-28s", "Not_Connected");
208 else {
209 seq_printf(seq, "%08X:%02X%02X%02X%02X%02X%02X:%04X ",
210 ntohl(ipxs->dest_addr.net),
211 ipxs->dest_addr.node[0], ipxs->dest_addr.node[1],
212 ipxs->dest_addr.node[2], ipxs->dest_addr.node[3],
213 ipxs->dest_addr.node[4], ipxs->dest_addr.node[5],
214 ntohs(ipxs->dest_addr.sock));
215 }
216
217 seq_printf(seq, "%08X %08X %02X %03u\n",
218 sk_wmem_alloc_get(s),
219 sk_rmem_alloc_get(s),
220 s->sk_state,
221 from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)));
222out:
223 return 0;
224}
225
226static const struct seq_operations ipx_seq_interface_ops = {
227 .start = ipx_seq_interface_start,
228 .next = ipx_seq_interface_next,
229 .stop = ipx_seq_interface_stop,
230 .show = ipx_seq_interface_show,
231};
232
233static const struct seq_operations ipx_seq_route_ops = {
234 .start = ipx_seq_route_start,
235 .next = ipx_seq_route_next,
236 .stop = ipx_seq_route_stop,
237 .show = ipx_seq_route_show,
238};
239
240static const struct seq_operations ipx_seq_socket_ops = {
241 .start = ipx_seq_socket_start,
242 .next = ipx_seq_socket_next,
243 .stop = ipx_seq_interface_stop,
244 .show = ipx_seq_socket_show,
245};
246
247static int ipx_seq_route_open(struct inode *inode, struct file *file)
248{
249 return seq_open(file, &ipx_seq_route_ops);
250}
251
252static int ipx_seq_interface_open(struct inode *inode, struct file *file)
253{
254 return seq_open(file, &ipx_seq_interface_ops);
255}
256
257static int ipx_seq_socket_open(struct inode *inode, struct file *file)
258{
259 return seq_open(file, &ipx_seq_socket_ops);
260}
261
262static const struct file_operations ipx_seq_interface_fops = {
263 .owner = THIS_MODULE,
264 .open = ipx_seq_interface_open,
265 .read = seq_read,
266 .llseek = seq_lseek,
267 .release = seq_release,
268};
269
270static const struct file_operations ipx_seq_route_fops = {
271 .owner = THIS_MODULE,
272 .open = ipx_seq_route_open,
273 .read = seq_read,
274 .llseek = seq_lseek,
275 .release = seq_release,
276};
277
278static const struct file_operations ipx_seq_socket_fops = {
279 .owner = THIS_MODULE,
280 .open = ipx_seq_socket_open,
281 .read = seq_read,
282 .llseek = seq_lseek,
283 .release = seq_release,
284};
285
286static struct proc_dir_entry *ipx_proc_dir;
287
288int __init ipx_proc_init(void)
289{
290 struct proc_dir_entry *p;
291 int rc = -ENOMEM;
292
293 ipx_proc_dir = proc_mkdir("ipx", init_net.proc_net);
294
295 if (!ipx_proc_dir)
296 goto out;
297 p = proc_create("interface", S_IRUGO,
298 ipx_proc_dir, &ipx_seq_interface_fops);
299 if (!p)
300 goto out_interface;
301
302 p = proc_create("route", S_IRUGO, ipx_proc_dir, &ipx_seq_route_fops);
303 if (!p)
304 goto out_route;
305
306 p = proc_create("socket", S_IRUGO, ipx_proc_dir, &ipx_seq_socket_fops);
307 if (!p)
308 goto out_socket;
309
310 rc = 0;
311out:
312 return rc;
313out_socket:
314 remove_proc_entry("route", ipx_proc_dir);
315out_route:
316 remove_proc_entry("interface", ipx_proc_dir);
317out_interface:
318 remove_proc_entry("ipx", init_net.proc_net);
319 goto out;
320}
321
322void __exit ipx_proc_exit(void)
323{
324 remove_proc_entry("interface", ipx_proc_dir);
325 remove_proc_entry("route", ipx_proc_dir);
326 remove_proc_entry("socket", ipx_proc_dir);
327 remove_proc_entry("ipx", init_net.proc_net);
328}
329
330#else /* CONFIG_PROC_FS */
331
332int __init ipx_proc_init(void)
333{
334 return 0;
335}
336
337void __exit ipx_proc_exit(void)
338{
339}
340
341#endif /* CONFIG_PROC_FS */
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
deleted file mode 100644
index 3cf93aa9f284..000000000000
--- a/net/ipx/ipx_route.c
+++ /dev/null
@@ -1,293 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Implements the IPX routing routines.
4 * Code moved from af_ipx.c.
5 *
6 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>, 2003
7 *
8 * See net/ipx/ChangeLog.
9 */
10
11#include <linux/list.h>
12#include <linux/route.h>
13#include <linux/slab.h>
14#include <linux/spinlock.h>
15
16#include <net/ipx.h>
17#include <net/sock.h>
18
19LIST_HEAD(ipx_routes);
20DEFINE_RWLOCK(ipx_routes_lock);
21
22extern struct ipx_interface *ipx_internal_net;
23
24extern struct ipx_interface *ipxitf_find_using_net(__be32 net);
25extern int ipxitf_demux_socket(struct ipx_interface *intrfc,
26 struct sk_buff *skb, int copy);
27extern int ipxitf_demux_socket(struct ipx_interface *intrfc,
28 struct sk_buff *skb, int copy);
29
30struct ipx_route *ipxrtr_lookup(__be32 net)
31{
32 struct ipx_route *r;
33
34 read_lock_bh(&ipx_routes_lock);
35 list_for_each_entry(r, &ipx_routes, node)
36 if (r->ir_net == net) {
37 ipxrtr_hold(r);
38 goto unlock;
39 }
40 r = NULL;
41unlock:
42 read_unlock_bh(&ipx_routes_lock);
43 return r;
44}
45
46/*
47 * Caller must hold a reference to intrfc
48 */
49int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc,
50 unsigned char *node)
51{
52 struct ipx_route *rt;
53 int rc;
54
55 /* Get a route structure; either existing or create */
56 rt = ipxrtr_lookup(network);
57 if (!rt) {
58 rt = kmalloc(sizeof(*rt), GFP_ATOMIC);
59 rc = -EAGAIN;
60 if (!rt)
61 goto out;
62
63 refcount_set(&rt->refcnt, 1);
64 ipxrtr_hold(rt);
65 write_lock_bh(&ipx_routes_lock);
66 list_add(&rt->node, &ipx_routes);
67 write_unlock_bh(&ipx_routes_lock);
68 } else {
69 rc = -EEXIST;
70 if (intrfc == ipx_internal_net)
71 goto out_put;
72 }
73
74 rt->ir_net = network;
75 rt->ir_intrfc = intrfc;
76 if (!node) {
77 memset(rt->ir_router_node, '\0', IPX_NODE_LEN);
78 rt->ir_routed = 0;
79 } else {
80 memcpy(rt->ir_router_node, node, IPX_NODE_LEN);
81 rt->ir_routed = 1;
82 }
83
84 rc = 0;
85out_put:
86 ipxrtr_put(rt);
87out:
88 return rc;
89}
90
91void ipxrtr_del_routes(struct ipx_interface *intrfc)
92{
93 struct ipx_route *r, *tmp;
94
95 write_lock_bh(&ipx_routes_lock);
96 list_for_each_entry_safe(r, tmp, &ipx_routes, node)
97 if (r->ir_intrfc == intrfc) {
98 list_del(&r->node);
99 ipxrtr_put(r);
100 }
101 write_unlock_bh(&ipx_routes_lock);
102}
103
104static int ipxrtr_create(struct ipx_route_definition *rd)
105{
106 struct ipx_interface *intrfc;
107 int rc = -ENETUNREACH;
108
109 /* Find the appropriate interface */
110 intrfc = ipxitf_find_using_net(rd->ipx_router_network);
111 if (!intrfc)
112 goto out;
113 rc = ipxrtr_add_route(rd->ipx_network, intrfc, rd->ipx_router_node);
114 ipxitf_put(intrfc);
115out:
116 return rc;
117}
118
119static int ipxrtr_delete(__be32 net)
120{
121 struct ipx_route *r, *tmp;
122 int rc;
123
124 write_lock_bh(&ipx_routes_lock);
125 list_for_each_entry_safe(r, tmp, &ipx_routes, node)
126 if (r->ir_net == net) {
127 /* Directly connected; can't lose route */
128 rc = -EPERM;
129 if (!r->ir_routed)
130 goto out;
131 list_del(&r->node);
132 ipxrtr_put(r);
133 rc = 0;
134 goto out;
135 }
136 rc = -ENOENT;
137out:
138 write_unlock_bh(&ipx_routes_lock);
139 return rc;
140}
141
142/*
143 * The skb has to be unshared, we'll end up calling ipxitf_send, that'll
144 * modify the packet
145 */
146int ipxrtr_route_skb(struct sk_buff *skb)
147{
148 struct ipxhdr *ipx = ipx_hdr(skb);
149 struct ipx_route *r = ipxrtr_lookup(IPX_SKB_CB(skb)->ipx_dest_net);
150
151 if (!r) { /* no known route */
152 kfree_skb(skb);
153 return 0;
154 }
155
156 ipxitf_hold(r->ir_intrfc);
157 ipxitf_send(r->ir_intrfc, skb, r->ir_routed ?
158 r->ir_router_node : ipx->ipx_dest.node);
159 ipxitf_put(r->ir_intrfc);
160 ipxrtr_put(r);
161
162 return 0;
163}
164
165/*
166 * Route an outgoing frame from a socket.
167 */
168int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
169 struct msghdr *msg, size_t len, int noblock)
170{
171 struct sk_buff *skb;
172 struct ipx_sock *ipxs = ipx_sk(sk);
173 struct ipx_interface *intrfc;
174 struct ipxhdr *ipx;
175 size_t size;
176 int ipx_offset;
177 struct ipx_route *rt = NULL;
178 int rc;
179
180 /* Find the appropriate interface on which to send packet */
181 if (!usipx->sipx_network && ipx_primary_net) {
182 usipx->sipx_network = ipx_primary_net->if_netnum;
183 intrfc = ipx_primary_net;
184 } else {
185 rt = ipxrtr_lookup(usipx->sipx_network);
186 rc = -ENETUNREACH;
187 if (!rt)
188 goto out;
189 intrfc = rt->ir_intrfc;
190 }
191
192 ipxitf_hold(intrfc);
193 ipx_offset = intrfc->if_ipx_offset;
194 size = sizeof(struct ipxhdr) + len + ipx_offset;
195
196 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
197 if (!skb)
198 goto out_put;
199
200 skb_reserve(skb, ipx_offset);
201 skb->sk = sk;
202
203 /* Fill in IPX header */
204 skb_reset_network_header(skb);
205 skb_reset_transport_header(skb);
206 skb_put(skb, sizeof(struct ipxhdr));
207 ipx = ipx_hdr(skb);
208 ipx->ipx_pktsize = htons(len + sizeof(struct ipxhdr));
209 IPX_SKB_CB(skb)->ipx_tctrl = 0;
210 ipx->ipx_type = usipx->sipx_type;
211
212 IPX_SKB_CB(skb)->last_hop.index = -1;
213#ifdef CONFIG_IPX_INTERN
214 IPX_SKB_CB(skb)->ipx_source_net = ipxs->intrfc->if_netnum;
215 memcpy(ipx->ipx_source.node, ipxs->node, IPX_NODE_LEN);
216#else
217 rc = ntohs(ipxs->port);
218 if (rc == 0x453 || rc == 0x452) {
219 /* RIP/SAP special handling for mars_nwe */
220 IPX_SKB_CB(skb)->ipx_source_net = intrfc->if_netnum;
221 memcpy(ipx->ipx_source.node, intrfc->if_node, IPX_NODE_LEN);
222 } else {
223 IPX_SKB_CB(skb)->ipx_source_net = ipxs->intrfc->if_netnum;
224 memcpy(ipx->ipx_source.node, ipxs->intrfc->if_node,
225 IPX_NODE_LEN);
226 }
227#endif /* CONFIG_IPX_INTERN */
228 ipx->ipx_source.sock = ipxs->port;
229 IPX_SKB_CB(skb)->ipx_dest_net = usipx->sipx_network;
230 memcpy(ipx->ipx_dest.node, usipx->sipx_node, IPX_NODE_LEN);
231 ipx->ipx_dest.sock = usipx->sipx_port;
232
233 rc = memcpy_from_msg(skb_put(skb, len), msg, len);
234 if (rc) {
235 kfree_skb(skb);
236 goto out_put;
237 }
238
239 /* Apply checksum. Not allowed on 802.3 links. */
240 if (sk->sk_no_check_tx ||
241 intrfc->if_dlink_type == htons(IPX_FRAME_8023))
242 ipx->ipx_checksum = htons(0xFFFF);
243 else
244 ipx->ipx_checksum = ipx_cksum(ipx, len + sizeof(struct ipxhdr));
245
246 rc = ipxitf_send(intrfc, skb, (rt && rt->ir_routed) ?
247 rt->ir_router_node : ipx->ipx_dest.node);
248out_put:
249 ipxitf_put(intrfc);
250 if (rt)
251 ipxrtr_put(rt);
252out:
253 return rc;
254}
255
256/*
257 * We use a normal struct rtentry for route handling
258 */
259int ipxrtr_ioctl(unsigned int cmd, void __user *arg)
260{
261 struct rtentry rt; /* Use these to behave like 'other' stacks */
262 struct sockaddr_ipx *sg, *st;
263 int rc = -EFAULT;
264
265 if (copy_from_user(&rt, arg, sizeof(rt)))
266 goto out;
267
268 sg = (struct sockaddr_ipx *)&rt.rt_gateway;
269 st = (struct sockaddr_ipx *)&rt.rt_dst;
270
271 rc = -EINVAL;
272 if (!(rt.rt_flags & RTF_GATEWAY) || /* Direct routes are fixed */
273 sg->sipx_family != AF_IPX ||
274 st->sipx_family != AF_IPX)
275 goto out;
276
277 switch (cmd) {
278 case SIOCDELRT:
279 rc = ipxrtr_delete(st->sipx_network);
280 break;
281 case SIOCADDRT: {
282 struct ipx_route_definition f;
283 f.ipx_network = st->sipx_network;
284 f.ipx_router_network = sg->sipx_network;
285 memcpy(f.ipx_router_node, sg->sipx_node, IPX_NODE_LEN);
286 rc = ipxrtr_create(&f);
287 break;
288 }
289 }
290
291out:
292 return rc;
293}
diff --git a/net/ipx/pe2.c b/net/ipx/pe2.c
deleted file mode 100644
index ba7d4214bbff..000000000000
--- a/net/ipx/pe2.c
+++ /dev/null
@@ -1,36 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2#include <linux/in.h>
3#include <linux/mm.h>
4#include <linux/module.h>
5#include <linux/netdevice.h>
6#include <linux/skbuff.h>
7#include <linux/slab.h>
8
9#include <net/datalink.h>
10
11static int pEII_request(struct datalink_proto *dl,
12 struct sk_buff *skb, unsigned char *dest_node)
13{
14 struct net_device *dev = skb->dev;
15
16 skb->protocol = htons(ETH_P_IPX);
17 dev_hard_header(skb, dev, ETH_P_IPX, dest_node, NULL, skb->len);
18 return dev_queue_xmit(skb);
19}
20
21struct datalink_proto *make_EII_client(void)
22{
23 struct datalink_proto *proto = kmalloc(sizeof(*proto), GFP_ATOMIC);
24
25 if (proto) {
26 proto->header_length = 0;
27 proto->request = pEII_request;
28 }
29
30 return proto;
31}
32
33void destroy_EII_client(struct datalink_proto *dl)
34{
35 kfree(dl);
36}
diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c
deleted file mode 100644
index c3eef457db88..000000000000
--- a/net/ipx/sysctl_net_ipx.c
+++ /dev/null
@@ -1,40 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/* -*- linux-c -*-
3 * sysctl_net_ipx.c: sysctl interface to net IPX subsystem.
4 *
5 * Begun April 1, 1996, Mike Shaver.
6 * Added /proc/sys/net/ipx directory entry (empty =) ). [MS]
7 * Added /proc/sys/net/ipx/ipx_pprop_broadcasting - acme March 4, 2001
8 */
9
10#include <linux/mm.h>
11#include <linux/sysctl.h>
12#include <net/net_namespace.h>
13#include <net/ipx.h>
14
15#ifndef CONFIG_SYSCTL
16#error This file should not be compiled without CONFIG_SYSCTL defined
17#endif
18
19static struct ctl_table ipx_table[] = {
20 {
21 .procname = "ipx_pprop_broadcasting",
22 .data = &sysctl_ipx_pprop_broadcasting,
23 .maxlen = sizeof(int),
24 .mode = 0644,
25 .proc_handler = proc_dointvec,
26 },
27 { },
28};
29
30static struct ctl_table_header *ipx_table_header;
31
32void ipx_register_sysctl(void)
33{
34 ipx_table_header = register_net_sysctl(&init_net, "net/ipx", ipx_table);
35}
36
37void ipx_unregister_sysctl(void)
38{
39 unregister_net_sysctl_table(ipx_table_header);
40}
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 148533169b1d..1e8cc7bcbca3 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1474,7 +1474,7 @@ done:
1474 return copied; 1474 return copied;
1475} 1475}
1476 1476
1477static inline unsigned int iucv_accept_poll(struct sock *parent) 1477static inline __poll_t iucv_accept_poll(struct sock *parent)
1478{ 1478{
1479 struct iucv_sock *isk, *n; 1479 struct iucv_sock *isk, *n;
1480 struct sock *sk; 1480 struct sock *sk;
@@ -1483,17 +1483,17 @@ static inline unsigned int iucv_accept_poll(struct sock *parent)
1483 sk = (struct sock *) isk; 1483 sk = (struct sock *) isk;
1484 1484
1485 if (sk->sk_state == IUCV_CONNECTED) 1485 if (sk->sk_state == IUCV_CONNECTED)
1486 return POLLIN | POLLRDNORM; 1486 return EPOLLIN | EPOLLRDNORM;
1487 } 1487 }
1488 1488
1489 return 0; 1489 return 0;
1490} 1490}
1491 1491
1492unsigned int iucv_sock_poll(struct file *file, struct socket *sock, 1492__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
1493 poll_table *wait) 1493 poll_table *wait)
1494{ 1494{
1495 struct sock *sk = sock->sk; 1495 struct sock *sk = sock->sk;
1496 unsigned int mask = 0; 1496 __poll_t mask = 0;
1497 1497
1498 sock_poll_wait(file, sk_sleep(sk), wait); 1498 sock_poll_wait(file, sk_sleep(sk), wait);
1499 1499
@@ -1501,27 +1501,27 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
1501 return iucv_accept_poll(sk); 1501 return iucv_accept_poll(sk);
1502 1502
1503 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 1503 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
1504 mask |= POLLERR | 1504 mask |= EPOLLERR |
1505 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); 1505 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
1506 1506
1507 if (sk->sk_shutdown & RCV_SHUTDOWN) 1507 if (sk->sk_shutdown & RCV_SHUTDOWN)
1508 mask |= POLLRDHUP; 1508 mask |= EPOLLRDHUP;
1509 1509
1510 if (sk->sk_shutdown == SHUTDOWN_MASK) 1510 if (sk->sk_shutdown == SHUTDOWN_MASK)
1511 mask |= POLLHUP; 1511 mask |= EPOLLHUP;
1512 1512
1513 if (!skb_queue_empty(&sk->sk_receive_queue) || 1513 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1514 (sk->sk_shutdown & RCV_SHUTDOWN)) 1514 (sk->sk_shutdown & RCV_SHUTDOWN))
1515 mask |= POLLIN | POLLRDNORM; 1515 mask |= EPOLLIN | EPOLLRDNORM;
1516 1516
1517 if (sk->sk_state == IUCV_CLOSED) 1517 if (sk->sk_state == IUCV_CLOSED)
1518 mask |= POLLHUP; 1518 mask |= EPOLLHUP;
1519 1519
1520 if (sk->sk_state == IUCV_DISCONN) 1520 if (sk->sk_state == IUCV_DISCONN)
1521 mask |= POLLIN; 1521 mask |= EPOLLIN;
1522 1522
1523 if (sock_writeable(sk) && iucv_below_msglim(sk)) 1523 if (sock_writeable(sk) && iucv_below_msglim(sk))
1524 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 1524 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
1525 else 1525 else
1526 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1526 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1527 1527
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index bd5723315069..9d5649e4e8b7 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -247,7 +247,6 @@ static int kcm_seq_show(struct seq_file *seq, void *v)
247} 247}
248 248
249static const struct file_operations kcm_seq_fops = { 249static const struct file_operations kcm_seq_fops = {
250 .owner = THIS_MODULE,
251 .open = kcm_seq_open, 250 .open = kcm_seq_open,
252 .read = seq_read, 251 .read = seq_read,
253 .llseek = seq_lseek, 252 .llseek = seq_lseek,
@@ -397,7 +396,6 @@ static int kcm_stats_seq_open(struct inode *inode, struct file *file)
397} 396}
398 397
399static const struct file_operations kcm_stats_seq_fops = { 398static const struct file_operations kcm_stats_seq_fops = {
400 .owner = THIS_MODULE,
401 .open = kcm_stats_seq_open, 399 .open = kcm_stats_seq_open,
402 .read = seq_read, 400 .read = seq_read,
403 .llseek = seq_lseek, 401 .llseek = seq_lseek,
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 4a8d407f8902..f297d53a11aa 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -396,8 +396,8 @@ static int kcm_read_sock_done(struct strparser *strp, int err)
396 396
397static void psock_state_change(struct sock *sk) 397static void psock_state_change(struct sock *sk)
398{ 398{
399 /* TCP only does a POLLIN for a half close. Do a POLLHUP here 399 /* TCP only does a EPOLLIN for a half close. Do a EPOLLHUP here
400 * since application will normally not poll with POLLIN 400 * since application will normally not poll with EPOLLIN
401 * on the TCP sockets. 401 * on the TCP sockets.
402 */ 402 */
403 403
@@ -1338,7 +1338,7 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
1338 1338
1339 /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so 1339 /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
1340 * we set sk_state, otherwise epoll_wait always returns right away with 1340 * we set sk_state, otherwise epoll_wait always returns right away with
1341 * POLLHUP 1341 * EPOLLHUP
1342 */ 1342 */
1343 kcm->sk.sk_state = TCP_ESTABLISHED; 1343 kcm->sk.sk_state = TCP_ESTABLISHED;
1344 1344
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 115918ad8eca..194a7483bb93 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -662,10 +662,9 @@ discard:
662 * |x|S|x|x|x|x|x|x| Sequence Number | 662 * |x|S|x|x|x|x|x|x| Sequence Number |
663 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 663 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
664 * 664 *
665 * Cookie value, sublayer format and offset (pad) are negotiated with 665 * Cookie value and sublayer format are negotiated with the peer when
666 * the peer when the session is set up. Unlike L2TPv2, we do not need 666 * the session is set up. Unlike L2TPv2, we do not need to parse the
667 * to parse the packet header to determine if optional fields are 667 * packet header to determine if optional fields are present.
668 * present.
669 * 668 *
670 * Caller must already have parsed the frame and determined that it is 669 * Caller must already have parsed the frame and determined that it is
671 * a data (not control) frame before coming here. Fields up to the 670 * a data (not control) frame before coming here. Fields up to the
@@ -731,11 +730,9 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
731 "%s: recv data ns=%u, session nr=%u\n", 730 "%s: recv data ns=%u, session nr=%u\n",
732 session->name, ns, session->nr); 731 session->name, ns, session->nr);
733 } 732 }
733 ptr += 4;
734 } 734 }
735 735
736 /* Advance past L2-specific header, if present */
737 ptr += session->l2specific_len;
738
739 if (L2TP_SKB_CB(skb)->has_seq) { 736 if (L2TP_SKB_CB(skb)->has_seq) {
740 /* Received a packet with sequence numbers. If we're the LNS, 737 /* Received a packet with sequence numbers. If we're the LNS,
741 * check if we sre sending sequence numbers and if not, 738 * check if we sre sending sequence numbers and if not,
@@ -780,10 +777,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
780 } 777 }
781 } 778 }
782 779
783 /* Session data offset is handled differently for L2TPv2 and 780 /* Session data offset is defined only for L2TPv2 and is
784 * L2TPv3. For L2TPv2, there is an optional 16-bit value in 781 * indicated by an optional 16-bit value in the header.
785 * the header. For L2TPv3, the offset is negotiated using AVPs
786 * in the session setup control protocol.
787 */ 782 */
788 if (tunnel->version == L2TP_HDR_VER_2) { 783 if (tunnel->version == L2TP_HDR_VER_2) {
789 /* If offset bit set, skip it. */ 784 /* If offset bit set, skip it. */
@@ -791,8 +786,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
791 offset = ntohs(*(__be16 *)ptr); 786 offset = ntohs(*(__be16 *)ptr);
792 ptr += 2 + offset; 787 ptr += 2 + offset;
793 } 788 }
794 } else 789 }
795 ptr += session->offset;
796 790
797 offset = ptr - optr; 791 offset = ptr - optr;
798 if (!pskb_may_pull(skb, offset)) 792 if (!pskb_may_pull(skb, offset))
@@ -1052,24 +1046,21 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
1052 memcpy(bufp, &session->cookie[0], session->cookie_len); 1046 memcpy(bufp, &session->cookie[0], session->cookie_len);
1053 bufp += session->cookie_len; 1047 bufp += session->cookie_len;
1054 } 1048 }
1055 if (session->l2specific_len) { 1049 if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) {
1056 if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) { 1050 u32 l2h = 0;
1057 u32 l2h = 0;
1058 if (session->send_seq) {
1059 l2h = 0x40000000 | session->ns;
1060 session->ns++;
1061 session->ns &= 0xffffff;
1062 l2tp_dbg(session, L2TP_MSG_SEQ,
1063 "%s: updated ns to %u\n",
1064 session->name, session->ns);
1065 }
1066 1051
1067 *((__be32 *) bufp) = htonl(l2h); 1052 if (session->send_seq) {
1053 l2h = 0x40000000 | session->ns;
1054 session->ns++;
1055 session->ns &= 0xffffff;
1056 l2tp_dbg(session, L2TP_MSG_SEQ,
1057 "%s: updated ns to %u\n",
1058 session->name, session->ns);
1068 } 1059 }
1069 bufp += session->l2specific_len; 1060
1061 *((__be32 *)bufp) = htonl(l2h);
1062 bufp += 4;
1070 } 1063 }
1071 if (session->offset)
1072 bufp += session->offset;
1073 1064
1074 return bufp - optr; 1065 return bufp - optr;
1075} 1066}
@@ -1725,7 +1716,7 @@ int l2tp_session_delete(struct l2tp_session *session)
1725EXPORT_SYMBOL_GPL(l2tp_session_delete); 1716EXPORT_SYMBOL_GPL(l2tp_session_delete);
1726 1717
1727/* We come here whenever a session's send_seq, cookie_len or 1718/* We come here whenever a session's send_seq, cookie_len or
1728 * l2specific_len parameters are set. 1719 * l2specific_type parameters are set.
1729 */ 1720 */
1730void l2tp_session_set_header_len(struct l2tp_session *session, int version) 1721void l2tp_session_set_header_len(struct l2tp_session *session, int version)
1731{ 1722{
@@ -1734,7 +1725,8 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version)
1734 if (session->send_seq) 1725 if (session->send_seq)
1735 session->hdr_len += 4; 1726 session->hdr_len += 4;
1736 } else { 1727 } else {
1737 session->hdr_len = 4 + session->cookie_len + session->l2specific_len + session->offset; 1728 session->hdr_len = 4 + session->cookie_len;
1729 session->hdr_len += l2tp_get_l2specific_len(session);
1738 if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP) 1730 if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP)
1739 session->hdr_len += 4; 1731 session->hdr_len += 4;
1740 } 1732 }
@@ -1784,9 +1776,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
1784 session->recv_seq = cfg->recv_seq; 1776 session->recv_seq = cfg->recv_seq;
1785 session->lns_mode = cfg->lns_mode; 1777 session->lns_mode = cfg->lns_mode;
1786 session->reorder_timeout = cfg->reorder_timeout; 1778 session->reorder_timeout = cfg->reorder_timeout;
1787 session->offset = cfg->offset;
1788 session->l2specific_type = cfg->l2specific_type; 1779 session->l2specific_type = cfg->l2specific_type;
1789 session->l2specific_len = cfg->l2specific_len;
1790 session->cookie_len = cfg->cookie_len; 1780 session->cookie_len = cfg->cookie_len;
1791 memcpy(&session->cookie[0], &cfg->cookie[0], cfg->cookie_len); 1781 memcpy(&session->cookie[0], &cfg->cookie[0], cfg->cookie_len);
1792 session->peer_cookie_len = cfg->peer_cookie_len; 1782 session->peer_cookie_len = cfg->peer_cookie_len;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 9534e16965cc..9bbee90e9963 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -59,8 +59,6 @@ struct l2tp_session_cfg {
59 int debug; /* bitmask of debug message 59 int debug; /* bitmask of debug message
60 * categories */ 60 * categories */
61 u16 vlan_id; /* VLAN pseudowire only */ 61 u16 vlan_id; /* VLAN pseudowire only */
62 u16 offset; /* offset to payload */
63 u16 l2specific_len; /* Layer 2 specific length */
64 u16 l2specific_type; /* Layer 2 specific type */ 62 u16 l2specific_type; /* Layer 2 specific type */
65 u8 cookie[8]; /* optional cookie */ 63 u8 cookie[8]; /* optional cookie */
66 int cookie_len; /* 0, 4 or 8 bytes */ 64 int cookie_len; /* 0, 4 or 8 bytes */
@@ -86,9 +84,6 @@ struct l2tp_session {
86 int cookie_len; 84 int cookie_len;
87 u8 peer_cookie[8]; 85 u8 peer_cookie[8];
88 int peer_cookie_len; 86 int peer_cookie_len;
89 u16 offset; /* offset from end of L2TP header
90 to beginning of data */
91 u16 l2specific_len;
92 u16 l2specific_type; 87 u16 l2specific_type;
93 u16 hdr_len; 88 u16 hdr_len;
94 u32 nr; /* session NR state (receive) */ 89 u32 nr; /* session NR state (receive) */
@@ -305,6 +300,17 @@ static inline void l2tp_session_dec_refcount(struct l2tp_session *session)
305 l2tp_session_free(session); 300 l2tp_session_free(session);
306} 301}
307 302
303static inline int l2tp_get_l2specific_len(struct l2tp_session *session)
304{
305 switch (session->l2specific_type) {
306 case L2TP_L2SPECTYPE_DEFAULT:
307 return 4;
308 case L2TP_L2SPECTYPE_NONE:
309 default:
310 return 0;
311 }
312}
313
308#define l2tp_printk(ptr, type, func, fmt, ...) \ 314#define l2tp_printk(ptr, type, func, fmt, ...) \
309do { \ 315do { \
310 if (((ptr)->debug) & (type)) \ 316 if (((ptr)->debug) & (type)) \
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index eb69411bcb47..72e713da4733 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -180,8 +180,8 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
180 session->lns_mode ? "LNS" : "LAC", 180 session->lns_mode ? "LNS" : "LAC",
181 session->debug, 181 session->debug,
182 jiffies_to_msecs(session->reorder_timeout)); 182 jiffies_to_msecs(session->reorder_timeout));
183 seq_printf(m, " offset %hu l2specific %hu/%hu\n", 183 seq_printf(m, " offset 0 l2specific %hu/%hu\n",
184 session->offset, session->l2specific_type, session->l2specific_len); 184 session->l2specific_type, l2tp_get_l2specific_len(session));
185 if (session->cookie_len) { 185 if (session->cookie_len) {
186 seq_printf(m, " cookie %02x%02x%02x%02x", 186 seq_printf(m, " cookie %02x%02x%02x%02x",
187 session->cookie[0], session->cookie[1], 187 session->cookie[0], session->cookie[1],
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index a1f24fb2be98..e7ea9c4b89ff 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -547,19 +547,19 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
547 } 547 }
548 548
549 if (tunnel->version > 2) { 549 if (tunnel->version > 2) {
550 if (info->attrs[L2TP_ATTR_OFFSET])
551 cfg.offset = nla_get_u16(info->attrs[L2TP_ATTR_OFFSET]);
552
553 if (info->attrs[L2TP_ATTR_DATA_SEQ]) 550 if (info->attrs[L2TP_ATTR_DATA_SEQ])
554 cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]); 551 cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
555 552
556 cfg.l2specific_type = L2TP_L2SPECTYPE_DEFAULT; 553 if (info->attrs[L2TP_ATTR_L2SPEC_TYPE]) {
557 if (info->attrs[L2TP_ATTR_L2SPEC_TYPE])
558 cfg.l2specific_type = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_TYPE]); 554 cfg.l2specific_type = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_TYPE]);
559 555 if (cfg.l2specific_type != L2TP_L2SPECTYPE_DEFAULT &&
560 cfg.l2specific_len = 4; 556 cfg.l2specific_type != L2TP_L2SPECTYPE_NONE) {
561 if (info->attrs[L2TP_ATTR_L2SPEC_LEN]) 557 ret = -EINVAL;
562 cfg.l2specific_len = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_LEN]); 558 goto out_tunnel;
559 }
560 } else {
561 cfg.l2specific_type = L2TP_L2SPECTYPE_DEFAULT;
562 }
563 563
564 if (info->attrs[L2TP_ATTR_COOKIE]) { 564 if (info->attrs[L2TP_ATTR_COOKIE]) {
565 u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]); 565 u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]);
@@ -620,27 +620,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
620 goto out_tunnel; 620 goto out_tunnel;
621 } 621 }
622 622
623 /* Check that pseudowire-specific params are present */
624 switch (cfg.pw_type) {
625 case L2TP_PWTYPE_NONE:
626 break;
627 case L2TP_PWTYPE_ETH_VLAN:
628 if (!info->attrs[L2TP_ATTR_VLAN_ID]) {
629 ret = -EINVAL;
630 goto out_tunnel;
631 }
632 break;
633 case L2TP_PWTYPE_ETH:
634 break;
635 case L2TP_PWTYPE_PPP:
636 case L2TP_PWTYPE_PPP_AC:
637 break;
638 case L2TP_PWTYPE_IP:
639 default:
640 ret = -EPROTONOSUPPORT;
641 break;
642 }
643
644 ret = l2tp_nl_cmd_ops[cfg.pw_type]->session_create(net, tunnel, 623 ret = l2tp_nl_cmd_ops[cfg.pw_type]->session_create(net, tunnel,
645 session_id, 624 session_id,
646 peer_session_id, 625 peer_session_id,
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index b412fc3351dc..59f246d7b290 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1734,7 +1734,6 @@ static int pppol2tp_proc_open(struct inode *inode, struct file *file)
1734} 1734}
1735 1735
1736static const struct file_operations pppol2tp_proc_fops = { 1736static const struct file_operations pppol2tp_proc_fops = {
1737 .owner = THIS_MODULE,
1738 .open = pppol2tp_proc_open, 1737 .open = pppol2tp_proc_open,
1739 .read = seq_read, 1738 .read = seq_read,
1740 .llseek = seq_lseek, 1739 .llseek = seq_lseek,
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index 29c509c54bb2..66821e8a2b7a 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -225,7 +225,6 @@ static int llc_seq_core_open(struct inode *inode, struct file *file)
225} 225}
226 226
227static const struct file_operations llc_seq_socket_fops = { 227static const struct file_operations llc_seq_socket_fops = {
228 .owner = THIS_MODULE,
229 .open = llc_seq_socket_open, 228 .open = llc_seq_socket_open,
230 .read = seq_read, 229 .read = seq_read,
231 .llseek = seq_lseek, 230 .llseek = seq_lseek,
@@ -233,7 +232,6 @@ static const struct file_operations llc_seq_socket_fops = {
233}; 232};
234 233
235static const struct file_operations llc_seq_core_fops = { 234static const struct file_operations llc_seq_core_fops = {
236 .owner = THIS_MODULE,
237 .open = llc_seq_core_open, 235 .open = llc_seq_core_open,
238 .read = seq_read, 236 .read = seq_read,
239 .llseek = seq_lseek, 237 .llseek = seq_lseek,
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index d444752dbf40..a8b1616cec41 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -153,27 +153,16 @@ EXPORT_SYMBOL(ieee80211_stop_rx_ba_session);
153 */ 153 */
154static void sta_rx_agg_session_timer_expired(struct timer_list *t) 154static void sta_rx_agg_session_timer_expired(struct timer_list *t)
155{ 155{
156 struct tid_ampdu_rx *tid_rx_timer = 156 struct tid_ampdu_rx *tid_rx = from_timer(tid_rx, t, session_timer);
157 from_timer(tid_rx_timer, t, session_timer); 157 struct sta_info *sta = tid_rx->sta;
158 struct sta_info *sta = tid_rx_timer->sta; 158 u8 tid = tid_rx->tid;
159 u8 tid = tid_rx_timer->tid;
160 struct tid_ampdu_rx *tid_rx;
161 unsigned long timeout; 159 unsigned long timeout;
162 160
163 rcu_read_lock();
164 tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
165 if (!tid_rx) {
166 rcu_read_unlock();
167 return;
168 }
169
170 timeout = tid_rx->last_rx + TU_TO_JIFFIES(tid_rx->timeout); 161 timeout = tid_rx->last_rx + TU_TO_JIFFIES(tid_rx->timeout);
171 if (time_is_after_jiffies(timeout)) { 162 if (time_is_after_jiffies(timeout)) {
172 mod_timer(&tid_rx->session_timer, timeout); 163 mod_timer(&tid_rx->session_timer, timeout);
173 rcu_read_unlock();
174 return; 164 return;
175 } 165 }
176 rcu_read_unlock();
177 166
178 ht_dbg(sta->sdata, "RX session timer expired on %pM tid %d\n", 167 ht_dbg(sta->sdata, "RX session timer expired on %pM tid %d\n",
179 sta->sta.addr, tid); 168 sta->sta.addr, tid);
@@ -415,10 +404,11 @@ end:
415 timeout); 404 timeout);
416} 405}
417 406
418void __ieee80211_start_rx_ba_session(struct sta_info *sta, 407static void __ieee80211_start_rx_ba_session(struct sta_info *sta,
419 u8 dialog_token, u16 timeout, 408 u8 dialog_token, u16 timeout,
420 u16 start_seq_num, u16 ba_policy, u16 tid, 409 u16 start_seq_num, u16 ba_policy,
421 u16 buf_size, bool tx, bool auto_seq) 410 u16 tid, u16 buf_size, bool tx,
411 bool auto_seq)
422{ 412{
423 mutex_lock(&sta->ampdu_mlme.mtx); 413 mutex_lock(&sta->ampdu_mlme.mtx);
424 ___ieee80211_start_rx_ba_session(sta, dialog_token, timeout, 414 ___ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 5f8ab5be369f..595c662a61e8 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -392,7 +392,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
392 * telling the driver. New packets will not go through since 392 * telling the driver. New packets will not go through since
393 * the aggregation session is no longer OPERATIONAL. 393 * the aggregation session is no longer OPERATIONAL.
394 */ 394 */
395 synchronize_net(); 395 if (!local->in_reconfig)
396 synchronize_net();
396 397
397 tid_tx->stop_initiator = reason == AGG_STOP_PEER_REQUEST ? 398 tid_tx->stop_initiator = reason == AGG_STOP_PEER_REQUEST ?
398 WLAN_BACK_RECIPIENT : 399 WLAN_BACK_RECIPIENT :
@@ -429,18 +430,12 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
429 */ 430 */
430static void sta_addba_resp_timer_expired(struct timer_list *t) 431static void sta_addba_resp_timer_expired(struct timer_list *t)
431{ 432{
432 struct tid_ampdu_tx *tid_tx_timer = 433 struct tid_ampdu_tx *tid_tx = from_timer(tid_tx, t, addba_resp_timer);
433 from_timer(tid_tx_timer, t, addba_resp_timer); 434 struct sta_info *sta = tid_tx->sta;
434 struct sta_info *sta = tid_tx_timer->sta; 435 u8 tid = tid_tx->tid;
435 u8 tid = tid_tx_timer->tid;
436 struct tid_ampdu_tx *tid_tx;
437 436
438 /* check if the TID waits for addBA response */ 437 /* check if the TID waits for addBA response */
439 rcu_read_lock(); 438 if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) {
440 tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
441 if (!tid_tx ||
442 test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) {
443 rcu_read_unlock();
444 ht_dbg(sta->sdata, 439 ht_dbg(sta->sdata,
445 "timer expired on %pM tid %d not expecting addBA response\n", 440 "timer expired on %pM tid %d not expecting addBA response\n",
446 sta->sta.addr, tid); 441 sta->sta.addr, tid);
@@ -451,7 +446,6 @@ static void sta_addba_resp_timer_expired(struct timer_list *t)
451 sta->sta.addr, tid); 446 sta->sta.addr, tid);
452 447
453 ieee80211_stop_tx_ba_session(&sta->sta, tid); 448 ieee80211_stop_tx_ba_session(&sta->sta, tid);
454 rcu_read_unlock();
455} 449}
456 450
457void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) 451void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
@@ -529,29 +523,21 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
529 */ 523 */
530static void sta_tx_agg_session_timer_expired(struct timer_list *t) 524static void sta_tx_agg_session_timer_expired(struct timer_list *t)
531{ 525{
532 struct tid_ampdu_tx *tid_tx_timer = 526 struct tid_ampdu_tx *tid_tx = from_timer(tid_tx, t, session_timer);
533 from_timer(tid_tx_timer, t, session_timer); 527 struct sta_info *sta = tid_tx->sta;
534 struct sta_info *sta = tid_tx_timer->sta; 528 u8 tid = tid_tx->tid;
535 u8 tid = tid_tx_timer->tid;
536 struct tid_ampdu_tx *tid_tx;
537 unsigned long timeout; 529 unsigned long timeout;
538 530
539 rcu_read_lock(); 531 if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
540 tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
541 if (!tid_tx || test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
542 rcu_read_unlock();
543 return; 532 return;
544 } 533 }
545 534
546 timeout = tid_tx->last_tx + TU_TO_JIFFIES(tid_tx->timeout); 535 timeout = tid_tx->last_tx + TU_TO_JIFFIES(tid_tx->timeout);
547 if (time_is_after_jiffies(timeout)) { 536 if (time_is_after_jiffies(timeout)) {
548 mod_timer(&tid_tx->session_timer, timeout); 537 mod_timer(&tid_tx->session_timer, timeout);
549 rcu_read_unlock();
550 return; 538 return;
551 } 539 }
552 540
553 rcu_read_unlock();
554
555 ht_dbg(sta->sdata, "tx session timer expired on %pM tid %d\n", 541 ht_dbg(sta->sdata, "tx session timer expired on %pM tid %d\n",
556 sta->sta.addr, tid); 542 sta->sta.addr, tid);
557 543
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index fb15d3b97cb2..46028e12e216 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -573,10 +573,12 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
573 case WLAN_CIPHER_SUITE_BIP_CMAC_256: 573 case WLAN_CIPHER_SUITE_BIP_CMAC_256:
574 BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != 574 BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) !=
575 offsetof(typeof(kseq), aes_cmac)); 575 offsetof(typeof(kseq), aes_cmac));
576 /* fall through */
576 case WLAN_CIPHER_SUITE_BIP_GMAC_128: 577 case WLAN_CIPHER_SUITE_BIP_GMAC_128:
577 case WLAN_CIPHER_SUITE_BIP_GMAC_256: 578 case WLAN_CIPHER_SUITE_BIP_GMAC_256:
578 BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != 579 BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) !=
579 offsetof(typeof(kseq), aes_gmac)); 580 offsetof(typeof(kseq), aes_gmac));
581 /* fall through */
580 case WLAN_CIPHER_SUITE_GCMP: 582 case WLAN_CIPHER_SUITE_GCMP:
581 case WLAN_CIPHER_SUITE_GCMP_256: 583 case WLAN_CIPHER_SUITE_GCMP_256:
582 BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != 584 BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) !=
@@ -2205,6 +2207,7 @@ static int ieee80211_scan(struct wiphy *wiphy,
2205 * for now fall through to allow scanning only when 2207 * for now fall through to allow scanning only when
2206 * beaconing hasn't been configured yet 2208 * beaconing hasn't been configured yet
2207 */ 2209 */
2210 /* fall through */
2208 case NL80211_IFTYPE_AP: 2211 case NL80211_IFTYPE_AP:
2209 /* 2212 /*
2210 * If the scan has been forced (and the driver supports 2213 * If the scan has been forced (and the driver supports
@@ -2373,10 +2376,17 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
2373 struct ieee80211_sub_if_data *sdata; 2376 struct ieee80211_sub_if_data *sdata;
2374 enum nl80211_tx_power_setting txp_type = type; 2377 enum nl80211_tx_power_setting txp_type = type;
2375 bool update_txp_type = false; 2378 bool update_txp_type = false;
2379 bool has_monitor = false;
2376 2380
2377 if (wdev) { 2381 if (wdev) {
2378 sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); 2382 sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
2379 2383
2384 if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
2385 sdata = rtnl_dereference(local->monitor_sdata);
2386 if (!sdata)
2387 return -EOPNOTSUPP;
2388 }
2389
2380 switch (type) { 2390 switch (type) {
2381 case NL80211_TX_POWER_AUTOMATIC: 2391 case NL80211_TX_POWER_AUTOMATIC:
2382 sdata->user_power_level = IEEE80211_UNSET_POWER_LEVEL; 2392 sdata->user_power_level = IEEE80211_UNSET_POWER_LEVEL;
@@ -2415,15 +2425,34 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
2415 2425
2416 mutex_lock(&local->iflist_mtx); 2426 mutex_lock(&local->iflist_mtx);
2417 list_for_each_entry(sdata, &local->interfaces, list) { 2427 list_for_each_entry(sdata, &local->interfaces, list) {
2428 if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
2429 has_monitor = true;
2430 continue;
2431 }
2418 sdata->user_power_level = local->user_power_level; 2432 sdata->user_power_level = local->user_power_level;
2419 if (txp_type != sdata->vif.bss_conf.txpower_type) 2433 if (txp_type != sdata->vif.bss_conf.txpower_type)
2420 update_txp_type = true; 2434 update_txp_type = true;
2421 sdata->vif.bss_conf.txpower_type = txp_type; 2435 sdata->vif.bss_conf.txpower_type = txp_type;
2422 } 2436 }
2423 list_for_each_entry(sdata, &local->interfaces, list) 2437 list_for_each_entry(sdata, &local->interfaces, list) {
2438 if (sdata->vif.type == NL80211_IFTYPE_MONITOR)
2439 continue;
2424 ieee80211_recalc_txpower(sdata, update_txp_type); 2440 ieee80211_recalc_txpower(sdata, update_txp_type);
2441 }
2425 mutex_unlock(&local->iflist_mtx); 2442 mutex_unlock(&local->iflist_mtx);
2426 2443
2444 if (has_monitor) {
2445 sdata = rtnl_dereference(local->monitor_sdata);
2446 if (sdata) {
2447 sdata->user_power_level = local->user_power_level;
2448 if (txp_type != sdata->vif.bss_conf.txpower_type)
2449 update_txp_type = true;
2450 sdata->vif.bss_conf.txpower_type = txp_type;
2451
2452 ieee80211_recalc_txpower(sdata, update_txp_type);
2453 }
2454 }
2455
2427 return 0; 2456 return 0;
2428} 2457}
2429 2458
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 5fae001f286c..1f466d12a6bc 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -211,6 +211,7 @@ static const char *hw_flag_names[] = {
211 FLAG(TX_FRAG_LIST), 211 FLAG(TX_FRAG_LIST),
212 FLAG(REPORTS_LOW_ACK), 212 FLAG(REPORTS_LOW_ACK),
213 FLAG(SUPPORTS_TX_FRAG), 213 FLAG(SUPPORTS_TX_FRAG),
214 FLAG(SUPPORTS_TDLS_BUFFER_STA),
214#undef FLAG 215#undef FLAG
215}; 216};
216 217
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index b15412c21ac9..444ea8d127fe 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -420,7 +420,7 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
420 default: 420 default:
421 p += scnprintf(p, sizeof(buf) + buf - p, 421 p += scnprintf(p, sizeof(buf) + buf - p,
422 "\t\tMAX-MPDU-UNKNOWN\n"); 422 "\t\tMAX-MPDU-UNKNOWN\n");
423 }; 423 }
424 switch (vhtc->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { 424 switch (vhtc->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
425 case 0: 425 case 0:
426 p += scnprintf(p, sizeof(buf) + buf - p, 426 p += scnprintf(p, sizeof(buf) + buf - p,
@@ -438,7 +438,7 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
438 p += scnprintf(p, sizeof(buf) + buf - p, 438 p += scnprintf(p, sizeof(buf) + buf - p,
439 "\t\tUNKNOWN-MHZ: 0x%x\n", 439 "\t\tUNKNOWN-MHZ: 0x%x\n",
440 (vhtc->cap >> 2) & 0x3); 440 (vhtc->cap >> 2) & 0x3);
441 }; 441 }
442 PFLAG(RXLDPC, "RXLDPC"); 442 PFLAG(RXLDPC, "RXLDPC");
443 PFLAG(SHORT_GI_80, "SHORT-GI-80"); 443 PFLAG(SHORT_GI_80, "SHORT-GI-80");
444 PFLAG(SHORT_GI_160, "SHORT-GI-160"); 444 PFLAG(SHORT_GI_160, "SHORT-GI-160");
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index c7f93fd9ca7a..4d82fe7d627c 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -165,7 +165,8 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
165 if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || 165 if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE ||
166 sdata->vif.type == NL80211_IFTYPE_NAN || 166 sdata->vif.type == NL80211_IFTYPE_NAN ||
167 (sdata->vif.type == NL80211_IFTYPE_MONITOR && 167 (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
168 !sdata->vif.mu_mimo_owner))) 168 !sdata->vif.mu_mimo_owner &&
169 !(changed & BSS_CHANGED_TXPOWER))))
169 return; 170 return;
170 171
171 if (!check_sdata_in_driver(sdata)) 172 if (!check_sdata_in_driver(sdata))
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 1621b6ab17ba..d7523530d3f8 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -492,6 +492,7 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
492 case IEEE80211_SMPS_AUTOMATIC: 492 case IEEE80211_SMPS_AUTOMATIC:
493 case IEEE80211_SMPS_NUM_MODES: 493 case IEEE80211_SMPS_NUM_MODES:
494 WARN_ON(1); 494 WARN_ON(1);
495 /* fall through */
495 case IEEE80211_SMPS_OFF: 496 case IEEE80211_SMPS_OFF:
496 action_frame->u.action.u.ht_smps.smps_control = 497 action_frame->u.action.u.ht_smps.smps_control =
497 WLAN_HT_SMPS_CONTROL_DISABLED; 498 WLAN_HT_SMPS_CONTROL_DISABLED;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 885d00b41911..26900025de2f 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1757,10 +1757,6 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
1757 u16 initiator, u16 reason, bool stop); 1757 u16 initiator, u16 reason, bool stop);
1758void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, 1758void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
1759 u16 initiator, u16 reason, bool stop); 1759 u16 initiator, u16 reason, bool stop);
1760void __ieee80211_start_rx_ba_session(struct sta_info *sta,
1761 u8 dialog_token, u16 timeout,
1762 u16 start_seq_num, u16 ba_policy, u16 tid,
1763 u16 buf_size, bool tx, bool auto_seq);
1764void ___ieee80211_start_rx_ba_session(struct sta_info *sta, 1760void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
1765 u8 dialog_token, u16 timeout, 1761 u8 dialog_token, u16 timeout,
1766 u16 start_seq_num, u16 ba_policy, u16 tid, 1762 u16 start_seq_num, u16 ba_policy, u16 tid,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 13b16f90e1cf..5fe01f82df12 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1474,7 +1474,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
1474 break; 1474 break;
1475 case NL80211_IFTYPE_UNSPECIFIED: 1475 case NL80211_IFTYPE_UNSPECIFIED:
1476 case NUM_NL80211_IFTYPES: 1476 case NUM_NL80211_IFTYPES:
1477 BUG(); 1477 WARN_ON(1);
1478 break; 1478 break;
1479 } 1479 }
1480 1480
@@ -1633,7 +1633,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
1633 goto out_unlock; 1633 goto out_unlock;
1634 } 1634 }
1635 } 1635 }
1636 /* otherwise fall through */ 1636 /* fall through */
1637 default: 1637 default:
1638 /* assign a new address if possible -- try n_addresses first */ 1638 /* assign a new address if possible -- try n_addresses first */
1639 for (i = 0; i < local->hw.wiphy->n_addresses; i++) { 1639 for (i = 0; i < local->hw.wiphy->n_addresses; i++) {
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 938049395f90..aee05ec3f7ea 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -178,13 +178,17 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
178 if (!ret) { 178 if (!ret) {
179 key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; 179 key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE;
180 180
181 if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || 181 if (!((key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC |
182 IEEE80211_KEY_FLAG_PUT_MIC_SPACE)) ||
182 (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) 183 (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
183 decrease_tailroom_need_count(sdata, 1); 184 decrease_tailroom_need_count(sdata, 1);
184 185
185 WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && 186 WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) &&
186 (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)); 187 (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV));
187 188
189 WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_MIC_SPACE) &&
190 (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC));
191
188 return 0; 192 return 0;
189 } 193 }
190 194
@@ -237,7 +241,8 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
237 sta = key->sta; 241 sta = key->sta;
238 sdata = key->sdata; 242 sdata = key->sdata;
239 243
240 if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || 244 if (!((key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC |
245 IEEE80211_KEY_FLAG_PUT_MIC_SPACE)) ||
241 (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) 246 (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
242 increment_tailroom_need_count(sdata); 247 increment_tailroom_need_count(sdata);
243 248
@@ -1104,7 +1109,8 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf)
1104 if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { 1109 if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
1105 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; 1110 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
1106 1111
1107 if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || 1112 if (!((key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC |
1113 IEEE80211_KEY_FLAG_PUT_MIC_SPACE)) ||
1108 (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) 1114 (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
1109 increment_tailroom_need_count(key->sdata); 1115 increment_tailroom_need_count(key->sdata);
1110 } 1116 }
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index e054a2fd8d38..0785d04a80bc 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -263,6 +263,9 @@ static void ieee80211_restart_work(struct work_struct *work)
263 flush_delayed_work(&local->roc_work); 263 flush_delayed_work(&local->roc_work);
264 flush_work(&local->hw_roc_done); 264 flush_work(&local->hw_roc_done);
265 265
266 /* wait for all packet processing to be done */
267 synchronize_net();
268
266 ieee80211_reconfig(local); 269 ieee80211_reconfig(local);
267 rtnl_unlock(); 270 rtnl_unlock();
268} 271}
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 5e27364e10ac..73ac607beb5d 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -989,8 +989,10 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
989 switch (sdata->vif.bss_conf.chandef.width) { 989 switch (sdata->vif.bss_conf.chandef.width) {
990 case NL80211_CHAN_WIDTH_20_NOHT: 990 case NL80211_CHAN_WIDTH_20_NOHT:
991 sta_flags |= IEEE80211_STA_DISABLE_HT; 991 sta_flags |= IEEE80211_STA_DISABLE_HT;
992 /* fall through */
992 case NL80211_CHAN_WIDTH_20: 993 case NL80211_CHAN_WIDTH_20:
993 sta_flags |= IEEE80211_STA_DISABLE_40MHZ; 994 sta_flags |= IEEE80211_STA_DISABLE_40MHZ;
995 /* fall through */
994 case NL80211_CHAN_WIDTH_40: 996 case NL80211_CHAN_WIDTH_40:
995 sta_flags |= IEEE80211_STA_DISABLE_VHT; 997 sta_flags |= IEEE80211_STA_DISABLE_VHT;
996 break; 998 break;
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 4394463a0c2e..35ad3983ae4b 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -1250,6 +1250,7 @@ void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata)
1250 break; 1250 break;
1251 case IEEE80211_PROACTIVE_PREQ_WITH_PREP: 1251 case IEEE80211_PROACTIVE_PREQ_WITH_PREP:
1252 flags |= IEEE80211_PREQ_PROACTIVE_PREP_FLAG; 1252 flags |= IEEE80211_PREQ_PROACTIVE_PREP_FLAG;
1253 /* fall through */
1253 case IEEE80211_PROACTIVE_PREQ_NO_PREP: 1254 case IEEE80211_PROACTIVE_PREQ_NO_PREP:
1254 interval = ifmsh->mshcfg.dot11MeshHWMPactivePathToRootTimeout; 1255 interval = ifmsh->mshcfg.dot11MeshHWMPactivePathToRootTimeout;
1255 target_flags |= IEEE80211_PREQ_TO_FLAG | 1256 target_flags |= IEEE80211_PREQ_TO_FLAG |
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 86c8dfef56a4..a5125624a76d 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -257,9 +257,7 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx)
257 if (ret) 257 if (ret)
258 return NULL; 258 return NULL;
259 259
260 ret = rhashtable_walk_start(&iter); 260 rhashtable_walk_start(&iter);
261 if (ret && ret != -EAGAIN)
262 goto err;
263 261
264 while ((mpath = rhashtable_walk_next(&iter))) { 262 while ((mpath = rhashtable_walk_next(&iter))) {
265 if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) 263 if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
@@ -269,7 +267,6 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx)
269 if (i++ == idx) 267 if (i++ == idx)
270 break; 268 break;
271 } 269 }
272err:
273 rhashtable_walk_stop(&iter); 270 rhashtable_walk_stop(&iter);
274 rhashtable_walk_exit(&iter); 271 rhashtable_walk_exit(&iter);
275 272
@@ -513,9 +510,7 @@ void mesh_plink_broken(struct sta_info *sta)
513 if (ret) 510 if (ret)
514 return; 511 return;
515 512
516 ret = rhashtable_walk_start(&iter); 513 rhashtable_walk_start(&iter);
517 if (ret && ret != -EAGAIN)
518 goto out;
519 514
520 while ((mpath = rhashtable_walk_next(&iter))) { 515 while ((mpath = rhashtable_walk_next(&iter))) {
521 if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) 516 if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
@@ -535,7 +530,6 @@ void mesh_plink_broken(struct sta_info *sta)
535 WLAN_REASON_MESH_PATH_DEST_UNREACHABLE, bcast); 530 WLAN_REASON_MESH_PATH_DEST_UNREACHABLE, bcast);
536 } 531 }
537 } 532 }
538out:
539 rhashtable_walk_stop(&iter); 533 rhashtable_walk_stop(&iter);
540 rhashtable_walk_exit(&iter); 534 rhashtable_walk_exit(&iter);
541} 535}
@@ -584,9 +578,7 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta)
584 if (ret) 578 if (ret)
585 return; 579 return;
586 580
587 ret = rhashtable_walk_start(&iter); 581 rhashtable_walk_start(&iter);
588 if (ret && ret != -EAGAIN)
589 goto out;
590 582
591 while ((mpath = rhashtable_walk_next(&iter))) { 583 while ((mpath = rhashtable_walk_next(&iter))) {
592 if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) 584 if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
@@ -597,7 +589,7 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta)
597 if (rcu_access_pointer(mpath->next_hop) == sta) 589 if (rcu_access_pointer(mpath->next_hop) == sta)
598 __mesh_path_del(tbl, mpath); 590 __mesh_path_del(tbl, mpath);
599 } 591 }
600out: 592
601 rhashtable_walk_stop(&iter); 593 rhashtable_walk_stop(&iter);
602 rhashtable_walk_exit(&iter); 594 rhashtable_walk_exit(&iter);
603} 595}
@@ -614,9 +606,7 @@ static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata,
614 if (ret) 606 if (ret)
615 return; 607 return;
616 608
617 ret = rhashtable_walk_start(&iter); 609 rhashtable_walk_start(&iter);
618 if (ret && ret != -EAGAIN)
619 goto out;
620 610
621 while ((mpath = rhashtable_walk_next(&iter))) { 611 while ((mpath = rhashtable_walk_next(&iter))) {
622 if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) 612 if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
@@ -627,7 +617,7 @@ static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata,
627 if (ether_addr_equal(mpath->mpp, proxy)) 617 if (ether_addr_equal(mpath->mpp, proxy))
628 __mesh_path_del(tbl, mpath); 618 __mesh_path_del(tbl, mpath);
629 } 619 }
630out: 620
631 rhashtable_walk_stop(&iter); 621 rhashtable_walk_stop(&iter);
632 rhashtable_walk_exit(&iter); 622 rhashtable_walk_exit(&iter);
633} 623}
@@ -642,9 +632,7 @@ static void table_flush_by_iface(struct mesh_table *tbl)
642 if (ret) 632 if (ret)
643 return; 633 return;
644 634
645 ret = rhashtable_walk_start(&iter); 635 rhashtable_walk_start(&iter);
646 if (ret && ret != -EAGAIN)
647 goto out;
648 636
649 while ((mpath = rhashtable_walk_next(&iter))) { 637 while ((mpath = rhashtable_walk_next(&iter))) {
650 if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) 638 if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
@@ -653,7 +641,7 @@ static void table_flush_by_iface(struct mesh_table *tbl)
653 break; 641 break;
654 __mesh_path_del(tbl, mpath); 642 __mesh_path_del(tbl, mpath);
655 } 643 }
656out: 644
657 rhashtable_walk_stop(&iter); 645 rhashtable_walk_stop(&iter);
658 rhashtable_walk_exit(&iter); 646 rhashtable_walk_exit(&iter);
659} 647}
@@ -873,9 +861,7 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata,
873 if (ret) 861 if (ret)
874 return; 862 return;
875 863
876 ret = rhashtable_walk_start(&iter); 864 rhashtable_walk_start(&iter);
877 if (ret && ret != -EAGAIN)
878 goto out;
879 865
880 while ((mpath = rhashtable_walk_next(&iter))) { 866 while ((mpath = rhashtable_walk_next(&iter))) {
881 if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) 867 if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
@@ -887,7 +873,7 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata,
887 time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) 873 time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE))
888 __mesh_path_del(tbl, mpath); 874 __mesh_path_del(tbl, mpath);
889 } 875 }
890out: 876
891 rhashtable_walk_stop(&iter); 877 rhashtable_walk_stop(&iter);
892 rhashtable_walk_exit(&iter); 878 rhashtable_walk_exit(&iter);
893} 879}
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index e2d00cce3c17..0f6c9ca59062 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -672,7 +672,7 @@ void mesh_plink_timer(struct timer_list *t)
672 break; 672 break;
673 } 673 }
674 reason = WLAN_REASON_MESH_MAX_RETRIES; 674 reason = WLAN_REASON_MESH_MAX_RETRIES;
675 /* fall through on else */ 675 /* fall through */
676 case NL80211_PLINK_CNF_RCVD: 676 case NL80211_PLINK_CNF_RCVD:
677 /* confirm timer */ 677 /* confirm timer */
678 if (!reason) 678 if (!reason)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c244691deab9..39b660b9a908 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -473,6 +473,7 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata,
473 case IEEE80211_SMPS_AUTOMATIC: 473 case IEEE80211_SMPS_AUTOMATIC:
474 case IEEE80211_SMPS_NUM_MODES: 474 case IEEE80211_SMPS_NUM_MODES:
475 WARN_ON(1); 475 WARN_ON(1);
476 /* fall through */
476 case IEEE80211_SMPS_OFF: 477 case IEEE80211_SMPS_OFF:
477 cap |= WLAN_HT_CAP_SM_PS_DISABLED << 478 cap |= WLAN_HT_CAP_SM_PS_DISABLED <<
478 IEEE80211_HT_CAP_SM_PS_SHIFT; 479 IEEE80211_HT_CAP_SM_PS_SHIFT;
@@ -2861,10 +2862,11 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
2861 aid = le16_to_cpu(mgmt->u.assoc_resp.aid); 2862 aid = le16_to_cpu(mgmt->u.assoc_resp.aid);
2862 capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); 2863 capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
2863 2864
2864 if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14))) 2865 /*
2865 sdata_info(sdata, "invalid AID value 0x%x; bits 15:14 not set\n", 2866 * The 5 MSB of the AID field are reserved
2866 aid); 2867 * (802.11-2016 9.4.1.8 AID field)
2867 aid &= ~(BIT(15) | BIT(14)); 2868 */
2869 aid &= 0x7ff;
2868 2870
2869 ifmgd->broken_ap = false; 2871 ifmgd->broken_ap = false;
2870 2872
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index faf4f6055000..f1d40b6645ff 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -801,14 +801,14 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
801 case NL80211_IFTYPE_ADHOC: 801 case NL80211_IFTYPE_ADHOC:
802 if (!sdata->vif.bss_conf.ibss_joined) 802 if (!sdata->vif.bss_conf.ibss_joined)
803 need_offchan = true; 803 need_offchan = true;
804 /* fall through */
805#ifdef CONFIG_MAC80211_MESH 804#ifdef CONFIG_MAC80211_MESH
805 /* fall through */
806 case NL80211_IFTYPE_MESH_POINT: 806 case NL80211_IFTYPE_MESH_POINT:
807 if (ieee80211_vif_is_mesh(&sdata->vif) && 807 if (ieee80211_vif_is_mesh(&sdata->vif) &&
808 !sdata->u.mesh.mesh_id_len) 808 !sdata->u.mesh.mesh_id_len)
809 need_offchan = true; 809 need_offchan = true;
810 /* fall through */
811#endif 810#endif
811 /* fall through */
812 case NL80211_IFTYPE_AP: 812 case NL80211_IFTYPE_AP:
813 case NL80211_IFTYPE_AP_VLAN: 813 case NL80211_IFTYPE_AP_VLAN:
814 case NL80211_IFTYPE_P2P_GO: 814 case NL80211_IFTYPE_P2P_GO:
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 4daafb07602f..fd580614085b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1607,23 +1607,16 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1607 1607
1608 /* 1608 /*
1609 * Change STA power saving mode only at the end of a frame 1609 * Change STA power saving mode only at the end of a frame
1610 * exchange sequence. 1610 * exchange sequence, and only for a data or management
1611 * frame as specified in IEEE 802.11-2016 11.2.3.2
1611 */ 1612 */
1612 if (!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS) && 1613 if (!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS) &&
1613 !ieee80211_has_morefrags(hdr->frame_control) && 1614 !ieee80211_has_morefrags(hdr->frame_control) &&
1614 !ieee80211_is_back_req(hdr->frame_control) && 1615 (ieee80211_is_mgmt(hdr->frame_control) ||
1616 ieee80211_is_data(hdr->frame_control)) &&
1615 !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && 1617 !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) &&
1616 (rx->sdata->vif.type == NL80211_IFTYPE_AP || 1618 (rx->sdata->vif.type == NL80211_IFTYPE_AP ||
1617 rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && 1619 rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) {
1618 /*
1619 * PM bit is only checked in frames where it isn't reserved,
1620 * in AP mode it's reserved in non-bufferable management frames
1621 * (cf. IEEE 802.11-2012 8.2.4.1.7 Power Management field)
1622 * BAR frames should be ignored as specified in
1623 * IEEE 802.11-2012 10.2.1.2.
1624 */
1625 (!ieee80211_is_mgmt(hdr->frame_control) ||
1626 ieee80211_is_bufferable_mmpdu(hdr->frame_control))) {
1627 if (test_sta_flag(sta, WLAN_STA_PS_STA)) { 1620 if (test_sta_flag(sta, WLAN_STA_PS_STA)) {
1628 if (!ieee80211_has_pm(hdr->frame_control)) 1621 if (!ieee80211_has_pm(hdr->frame_control))
1629 sta_ps_end(sta); 1622 sta_ps_end(sta);
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 91093d4a2f84..5cd5e6e5834e 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -47,6 +47,8 @@ static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata,
47 NL80211_FEATURE_TDLS_CHANNEL_SWITCH; 47 NL80211_FEATURE_TDLS_CHANNEL_SWITCH;
48 bool wider_band = ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) && 48 bool wider_band = ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) &&
49 !ifmgd->tdls_wider_bw_prohibited; 49 !ifmgd->tdls_wider_bw_prohibited;
50 bool buffer_sta = ieee80211_hw_check(&local->hw,
51 SUPPORTS_TDLS_BUFFER_STA);
50 struct ieee80211_supported_band *sband = ieee80211_get_sband(sdata); 52 struct ieee80211_supported_band *sband = ieee80211_get_sband(sdata);
51 bool vht = sband && sband->vht_cap.vht_supported; 53 bool vht = sband && sband->vht_cap.vht_supported;
52 u8 *pos = skb_put(skb, 10); 54 u8 *pos = skb_put(skb, 10);
@@ -56,7 +58,8 @@ static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata,
56 *pos++ = 0x0; 58 *pos++ = 0x0;
57 *pos++ = 0x0; 59 *pos++ = 0x0;
58 *pos++ = 0x0; 60 *pos++ = 0x0;
59 *pos++ = chan_switch ? WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH : 0; 61 *pos++ = (chan_switch ? WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH : 0) |
62 (buffer_sta ? WLAN_EXT_CAPA4_TDLS_BUFFER_STA : 0);
60 *pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED; 63 *pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED;
61 *pos++ = 0; 64 *pos++ = 0;
62 *pos++ = 0; 65 *pos++ = 0;
@@ -236,6 +239,7 @@ static enum ieee80211_ac_numbers ieee80211_ac_from_wmm(int ac)
236 switch (ac) { 239 switch (ac) {
237 default: 240 default:
238 WARN_ON_ONCE(1); 241 WARN_ON_ONCE(1);
242 /* fall through */
239 case 0: 243 case 0:
240 return IEEE80211_AC_BE; 244 return IEEE80211_AC_BE;
241 case 1: 245 case 1:
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 3160954fc406..25904af38839 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2922,7 +2922,9 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
2922 2922
2923 gen_iv = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV; 2923 gen_iv = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV;
2924 iv_spc = build.key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE; 2924 iv_spc = build.key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE;
2925 mmic = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC; 2925 mmic = build.key->conf.flags &
2926 (IEEE80211_KEY_FLAG_GENERATE_MMIC |
2927 IEEE80211_KEY_FLAG_PUT_MIC_SPACE);
2926 2928
2927 /* don't handle software crypto */ 2929 /* don't handle software crypto */
2928 if (!(build.key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) 2930 if (!(build.key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index d57e5f6bd8b6..1f82191ce601 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2110,15 +2110,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
2110 cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy, 0); 2110 cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy, 0);
2111 2111
2112 wake_up: 2112 wake_up:
2113 if (local->in_reconfig) {
2114 local->in_reconfig = false;
2115 barrier();
2116
2117 /* Restart deferred ROCs */
2118 mutex_lock(&local->mtx);
2119 ieee80211_start_next_roc(local);
2120 mutex_unlock(&local->mtx);
2121 }
2122 2113
2123 if (local->monitors == local->open_count && local->monitors > 0) 2114 if (local->monitors == local->open_count && local->monitors > 0)
2124 ieee80211_add_virtual_monitor(local); 2115 ieee80211_add_virtual_monitor(local);
@@ -2146,6 +2137,16 @@ int ieee80211_reconfig(struct ieee80211_local *local)
2146 mutex_unlock(&local->sta_mtx); 2137 mutex_unlock(&local->sta_mtx);
2147 } 2138 }
2148 2139
2140 if (local->in_reconfig) {
2141 local->in_reconfig = false;
2142 barrier();
2143
2144 /* Restart deferred ROCs */
2145 mutex_lock(&local->mtx);
2146 ieee80211_start_next_roc(local);
2147 mutex_unlock(&local->mtx);
2148 }
2149
2149 ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, 2150 ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
2150 IEEE80211_QUEUE_STOP_REASON_SUSPEND, 2151 IEEE80211_QUEUE_STOP_REASON_SUSPEND,
2151 false); 2152 false);
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 3e3d3014e9ab..5f7c96368b11 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -165,6 +165,7 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
165 qos = sta->sta.wme; 165 qos = sta->sta.wme;
166 break; 166 break;
167 } 167 }
168 /* fall through */
168 case NL80211_IFTYPE_AP: 169 case NL80211_IFTYPE_AP:
169 ra = skb->data; 170 ra = skb->data;
170 break; 171 break;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index b58722d9de37..785056cb76f6 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Copyright 2002-2004, Instant802 Networks, Inc. 2 * Copyright 2002-2004, Instant802 Networks, Inc.
3 * Copyright 2008, Jouni Malinen <j@w1.fi> 3 * Copyright 2008, Jouni Malinen <j@w1.fi>
4 * Copyright (C) 2016 Intel Deutschland GmbH 4 * Copyright (C) 2016-2017 Intel Deutschland GmbH
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
@@ -59,8 +59,9 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
59 if (info->control.hw_key && 59 if (info->control.hw_key &&
60 (info->flags & IEEE80211_TX_CTL_DONTFRAG || 60 (info->flags & IEEE80211_TX_CTL_DONTFRAG ||
61 ieee80211_hw_check(&tx->local->hw, SUPPORTS_TX_FRAG)) && 61 ieee80211_hw_check(&tx->local->hw, SUPPORTS_TX_FRAG)) &&
62 !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) { 62 !(tx->key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC |
63 /* hwaccel - with no need for SW-generated MMIC */ 63 IEEE80211_KEY_FLAG_PUT_MIC_SPACE))) {
64 /* hwaccel - with no need for SW-generated MMIC or MIC space */
64 return TX_CONTINUE; 65 return TX_CONTINUE;
65 } 66 }
66 67
@@ -75,8 +76,15 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
75 skb_tailroom(skb), tail)) 76 skb_tailroom(skb), tail))
76 return TX_DROP; 77 return TX_DROP;
77 78
78 key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY];
79 mic = skb_put(skb, MICHAEL_MIC_LEN); 79 mic = skb_put(skb, MICHAEL_MIC_LEN);
80
81 if (tx->key->conf.flags & IEEE80211_KEY_FLAG_PUT_MIC_SPACE) {
82 /* Zeroed MIC can help with debug */
83 memset(mic, 0, MICHAEL_MIC_LEN);
84 return TX_CONTINUE;
85 }
86
87 key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY];
80 michael_mic(key, hdr, data, data_len, mic); 88 michael_mic(key, hdr, data, data_len, mic);
81 if (unlikely(info->flags & IEEE80211_TX_INTFL_TKIP_MIC_FAILURE)) 89 if (unlikely(info->flags & IEEE80211_TX_INTFL_TKIP_MIC_FAILURE))
82 mic[0]++; 90 mic[0]++;
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 8ca9915befc8..e545a3c9365f 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -8,6 +8,7 @@
8#include <linux/ipv6.h> 8#include <linux/ipv6.h>
9#include <linux/mpls.h> 9#include <linux/mpls.h>
10#include <linux/netconf.h> 10#include <linux/netconf.h>
11#include <linux/nospec.h>
11#include <linux/vmalloc.h> 12#include <linux/vmalloc.h>
12#include <linux/percpu.h> 13#include <linux/percpu.h>
13#include <net/ip.h> 14#include <net/ip.h>
@@ -935,24 +936,27 @@ errout:
935 return err; 936 return err;
936} 937}
937 938
938static bool mpls_label_ok(struct net *net, unsigned int index, 939static bool mpls_label_ok(struct net *net, unsigned int *index,
939 struct netlink_ext_ack *extack) 940 struct netlink_ext_ack *extack)
940{ 941{
942 bool is_ok = true;
943
941 /* Reserved labels may not be set */ 944 /* Reserved labels may not be set */
942 if (index < MPLS_LABEL_FIRST_UNRESERVED) { 945 if (*index < MPLS_LABEL_FIRST_UNRESERVED) {
943 NL_SET_ERR_MSG(extack, 946 NL_SET_ERR_MSG(extack,
944 "Invalid label - must be MPLS_LABEL_FIRST_UNRESERVED or higher"); 947 "Invalid label - must be MPLS_LABEL_FIRST_UNRESERVED or higher");
945 return false; 948 is_ok = false;
946 } 949 }
947 950
948 /* The full 20 bit range may not be supported. */ 951 /* The full 20 bit range may not be supported. */
949 if (index >= net->mpls.platform_labels) { 952 if (is_ok && *index >= net->mpls.platform_labels) {
950 NL_SET_ERR_MSG(extack, 953 NL_SET_ERR_MSG(extack,
951 "Label >= configured maximum in platform_labels"); 954 "Label >= configured maximum in platform_labels");
952 return false; 955 is_ok = false;
953 } 956 }
954 957
955 return true; 958 *index = array_index_nospec(*index, net->mpls.platform_labels);
959 return is_ok;
956} 960}
957 961
958static int mpls_route_add(struct mpls_route_config *cfg, 962static int mpls_route_add(struct mpls_route_config *cfg,
@@ -975,7 +979,7 @@ static int mpls_route_add(struct mpls_route_config *cfg,
975 index = find_free_label(net); 979 index = find_free_label(net);
976 } 980 }
977 981
978 if (!mpls_label_ok(net, index, extack)) 982 if (!mpls_label_ok(net, &index, extack))
979 goto errout; 983 goto errout;
980 984
981 /* Append makes no sense with mpls */ 985 /* Append makes no sense with mpls */
@@ -1052,7 +1056,7 @@ static int mpls_route_del(struct mpls_route_config *cfg,
1052 1056
1053 index = cfg->rc_label; 1057 index = cfg->rc_label;
1054 1058
1055 if (!mpls_label_ok(net, index, extack)) 1059 if (!mpls_label_ok(net, &index, extack))
1056 goto errout; 1060 goto errout;
1057 1061
1058 mpls_route_update(net, index, NULL, &cfg->rc_nlinfo); 1062 mpls_route_update(net, index, NULL, &cfg->rc_nlinfo);
@@ -1810,7 +1814,7 @@ static int rtm_to_route_config(struct sk_buff *skb,
1810 goto errout; 1814 goto errout;
1811 1815
1812 if (!mpls_label_ok(cfg->rc_nlinfo.nl_net, 1816 if (!mpls_label_ok(cfg->rc_nlinfo.nl_net,
1813 cfg->rc_label, extack)) 1817 &cfg->rc_label, extack))
1814 goto errout; 1818 goto errout;
1815 break; 1819 break;
1816 } 1820 }
@@ -2137,7 +2141,7 @@ static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
2137 goto errout; 2141 goto errout;
2138 } 2142 }
2139 2143
2140 if (!mpls_label_ok(net, in_label, extack)) { 2144 if (!mpls_label_ok(net, &in_label, extack)) {
2141 err = -EINVAL; 2145 err = -EINVAL;
2142 goto errout; 2146 goto errout;
2143 } 2147 }
@@ -2510,12 +2514,15 @@ static int __init mpls_init(void)
2510 2514
2511 rtnl_af_register(&mpls_af_ops); 2515 rtnl_af_register(&mpls_af_ops);
2512 2516
2513 rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, 0); 2517 rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_NEWROUTE,
2514 rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, 0); 2518 mpls_rtm_newroute, NULL, 0);
2515 rtnl_register(PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes, 2519 rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_DELROUTE,
2516 0); 2520 mpls_rtm_delroute, NULL, 0);
2517 rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf, 2521 rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_GETROUTE,
2518 mpls_netconf_dump_devconf, 0); 2522 mpls_getroute, mpls_dump_routes, 0);
2523 rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_GETNETCONF,
2524 mpls_netconf_get_devconf,
2525 mpls_netconf_dump_devconf, 0);
2519 err = ipgre_tunnel_encap_add_mpls_ops(); 2526 err = ipgre_tunnel_encap_add_mpls_ops();
2520 if (err) 2527 if (err)
2521 pr_err("Can't add mpls over gre tunnel ops\n"); 2528 pr_err("Can't add mpls over gre tunnel ops\n");
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index 67e708e98ccf..e7b05de1e6d1 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -143,43 +143,14 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp,
143 if (!nc) 143 if (!nc)
144 return -ENODEV; 144 return -ENODEV;
145 145
146 /* If the channel is active one, we need reconfigure it */
147 spin_lock_irqsave(&nc->lock, flags); 146 spin_lock_irqsave(&nc->lock, flags);
148 ncm = &nc->modes[NCSI_MODE_LINK]; 147 ncm = &nc->modes[NCSI_MODE_LINK];
149 hncdsc = (struct ncsi_aen_hncdsc_pkt *)h; 148 hncdsc = (struct ncsi_aen_hncdsc_pkt *)h;
150 ncm->data[3] = ntohl(hncdsc->status); 149 ncm->data[3] = ntohl(hncdsc->status);
151 netdev_info(ndp->ndev.dev, "NCSI: HNCDSC AEN - channel %u state %s\n",
152 nc->id, ncm->data[3] & 0x3 ? "up" : "down");
153 if (!list_empty(&nc->link) ||
154 nc->state != NCSI_CHANNEL_ACTIVE) {
155 spin_unlock_irqrestore(&nc->lock, flags);
156 return 0;
157 }
158
159 spin_unlock_irqrestore(&nc->lock, flags);
160 if (!(ndp->flags & NCSI_DEV_HWA) && !(ncm->data[3] & 0x1))
161 ndp->flags |= NCSI_DEV_RESHUFFLE;
162
163 /* If this channel is the active one and the link doesn't
164 * work, we have to choose another channel to be active one.
165 * The logic here is exactly similar to what we do when link
166 * is down on the active channel.
167 *
168 * On the other hand, we need configure it when host driver
169 * state on the active channel becomes ready.
170 */
171 ncsi_stop_channel_monitor(nc);
172
173 spin_lock_irqsave(&nc->lock, flags);
174 nc->state = (ncm->data[3] & 0x1) ? NCSI_CHANNEL_INACTIVE :
175 NCSI_CHANNEL_ACTIVE;
176 spin_unlock_irqrestore(&nc->lock, flags); 150 spin_unlock_irqrestore(&nc->lock, flags);
177 151 netdev_printk(KERN_DEBUG, ndp->ndev.dev,
178 spin_lock_irqsave(&ndp->lock, flags); 152 "NCSI: host driver %srunning on channel %u\n",
179 list_add_tail_rcu(&nc->link, &ndp->channel_queue); 153 ncm->data[3] & 0x1 ? "" : "not ", nc->id);
180 spin_unlock_irqrestore(&ndp->lock, flags);
181
182 ncsi_process_next_channel(ndp);
183 154
184 return 0; 155 return 0;
185} 156}
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e4a13cc8a2e7..d3220b43c832 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -12,6 +12,12 @@ config NETFILTER_INGRESS
12config NETFILTER_NETLINK 12config NETFILTER_NETLINK
13 tristate 13 tristate
14 14
15config NETFILTER_FAMILY_BRIDGE
16 bool
17
18config NETFILTER_FAMILY_ARP
19 bool
20
15config NETFILTER_NETLINK_ACCT 21config NETFILTER_NETLINK_ACCT
16tristate "Netfilter NFACCT over NFNETLINK interface" 22tristate "Netfilter NFACCT over NFNETLINK interface"
17 depends on NETFILTER_ADVANCED 23 depends on NETFILTER_ADVANCED
@@ -62,6 +68,8 @@ config NF_LOG_NETDEV
62 select NF_LOG_COMMON 68 select NF_LOG_COMMON
63 69
64if NF_CONNTRACK 70if NF_CONNTRACK
71config NETFILTER_CONNCOUNT
72 tristate
65 73
66config NF_CONNTRACK_MARK 74config NF_CONNTRACK_MARK
67 bool 'Connection mark tracking support' 75 bool 'Connection mark tracking support'
@@ -497,6 +505,13 @@ config NFT_CT
497 This option adds the "ct" expression that you can use to match 505 This option adds the "ct" expression that you can use to match
498 connection tracking information such as the flow state. 506 connection tracking information such as the flow state.
499 507
508config NFT_FLOW_OFFLOAD
509 depends on NF_CONNTRACK && NF_FLOW_TABLE
510 tristate "Netfilter nf_tables hardware flow offload module"
511 help
512 This option adds the "flow_offload" expression that you can use to
513 choose what flows are placed into the hardware.
514
500config NFT_SET_RBTREE 515config NFT_SET_RBTREE
501 tristate "Netfilter nf_tables rbtree set module" 516 tristate "Netfilter nf_tables rbtree set module"
502 help 517 help
@@ -649,6 +664,25 @@ endif # NF_TABLES_NETDEV
649 664
650endif # NF_TABLES 665endif # NF_TABLES
651 666
667config NF_FLOW_TABLE_INET
668 tristate "Netfilter flow table mixed IPv4/IPv6 module"
669 depends on NF_FLOW_TABLE_IPV4
670 depends on NF_FLOW_TABLE_IPV6
671 help
672 This option adds the flow table mixed IPv4/IPv6 support.
673
674 To compile it as a module, choose M here.
675
676config NF_FLOW_TABLE
677 tristate "Netfilter flow table module"
678 depends on NETFILTER_INGRESS
679 depends on NF_CONNTRACK
680 depends on NF_TABLES
681 help
682 This option adds the flow table core infrastructure.
683
684 To compile it as a module, choose M here.
685
652config NETFILTER_XTABLES 686config NETFILTER_XTABLES
653 tristate "Netfilter Xtables support (required for ip_tables)" 687 tristate "Netfilter Xtables support (required for ip_tables)"
654 default m if NETFILTER_ADVANCED=n 688 default m if NETFILTER_ADVANCED=n
@@ -1120,6 +1154,7 @@ config NETFILTER_XT_MATCH_CONNLIMIT
1120 tristate '"connlimit" match support' 1154 tristate '"connlimit" match support'
1121 depends on NF_CONNTRACK 1155 depends on NF_CONNTRACK
1122 depends on NETFILTER_ADVANCED 1156 depends on NETFILTER_ADVANCED
1157 select NETFILTER_CONNCOUNT
1123 ---help--- 1158 ---help---
1124 This match allows you to match against the number of parallel 1159 This match allows you to match against the number of parallel
1125 connections to a server per client IP address (or address block). 1160 connections to a server per client IP address (or address block).
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index f78ed2470831..5d9b8b959e58 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,5 +1,5 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o 2netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o utils.o
3 3
4nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o 4nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
5nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o 5nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
@@ -67,6 +67,8 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
67# SYNPROXY 67# SYNPROXY
68obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o 68obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
69 69
70obj-$(CONFIG_NETFILTER_CONNCOUNT) += nf_conncount.o
71
70# generic packet duplication from netdev family 72# generic packet duplication from netdev family
71obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o 73obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o
72 74
@@ -84,6 +86,7 @@ obj-$(CONFIG_NFT_META) += nft_meta.o
84obj-$(CONFIG_NFT_RT) += nft_rt.o 86obj-$(CONFIG_NFT_RT) += nft_rt.o
85obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o 87obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
86obj-$(CONFIG_NFT_CT) += nft_ct.o 88obj-$(CONFIG_NFT_CT) += nft_ct.o
89obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o
87obj-$(CONFIG_NFT_LIMIT) += nft_limit.o 90obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
88obj-$(CONFIG_NFT_NAT) += nft_nat.o 91obj-$(CONFIG_NFT_NAT) += nft_nat.o
89obj-$(CONFIG_NFT_OBJREF) += nft_objref.o 92obj-$(CONFIG_NFT_OBJREF) += nft_objref.o
@@ -107,6 +110,10 @@ obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o
107obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o 110obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
108obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o 111obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
109 112
113# flow table infrastructure
114obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
115obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
116
110# generic X tables 117# generic X tables
111obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o 118obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
112 119
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 52cd2901a097..0f6b8172fb9a 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -4,8 +4,7 @@
4 * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any 4 * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
5 * way. 5 * way.
6 * 6 *
7 * Rusty Russell (C)2000 -- This code is GPL. 7 * This code is GPL.
8 * Patrick McHardy (c) 2006-2012
9 */ 8 */
10#include <linux/kernel.h> 9#include <linux/kernel.h>
11#include <linux/netfilter.h> 10#include <linux/netfilter.h>
@@ -28,34 +27,12 @@
28 27
29#include "nf_internals.h" 28#include "nf_internals.h"
30 29
31static DEFINE_MUTEX(afinfo_mutex);
32
33const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
34EXPORT_SYMBOL(nf_afinfo);
35const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly; 30const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
36EXPORT_SYMBOL_GPL(nf_ipv6_ops); 31EXPORT_SYMBOL_GPL(nf_ipv6_ops);
37 32
38DEFINE_PER_CPU(bool, nf_skb_duplicated); 33DEFINE_PER_CPU(bool, nf_skb_duplicated);
39EXPORT_SYMBOL_GPL(nf_skb_duplicated); 34EXPORT_SYMBOL_GPL(nf_skb_duplicated);
40 35
41int nf_register_afinfo(const struct nf_afinfo *afinfo)
42{
43 mutex_lock(&afinfo_mutex);
44 RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo);
45 mutex_unlock(&afinfo_mutex);
46 return 0;
47}
48EXPORT_SYMBOL_GPL(nf_register_afinfo);
49
50void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
51{
52 mutex_lock(&afinfo_mutex);
53 RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL);
54 mutex_unlock(&afinfo_mutex);
55 synchronize_rcu();
56}
57EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
58
59#ifdef HAVE_JUMP_LABEL 36#ifdef HAVE_JUMP_LABEL
60struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; 37struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
61EXPORT_SYMBOL(nf_hooks_needed); 38EXPORT_SYMBOL(nf_hooks_needed);
@@ -74,7 +51,8 @@ static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
74 struct nf_hook_entries *e; 51 struct nf_hook_entries *e;
75 size_t alloc = sizeof(*e) + 52 size_t alloc = sizeof(*e) +
76 sizeof(struct nf_hook_entry) * num + 53 sizeof(struct nf_hook_entry) * num +
77 sizeof(struct nf_hook_ops *) * num; 54 sizeof(struct nf_hook_ops *) * num +
55 sizeof(struct nf_hook_entries_rcu_head);
78 56
79 if (num == 0) 57 if (num == 0)
80 return NULL; 58 return NULL;
@@ -85,6 +63,30 @@ static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
85 return e; 63 return e;
86} 64}
87 65
66static void __nf_hook_entries_free(struct rcu_head *h)
67{
68 struct nf_hook_entries_rcu_head *head;
69
70 head = container_of(h, struct nf_hook_entries_rcu_head, head);
71 kvfree(head->allocation);
72}
73
74static void nf_hook_entries_free(struct nf_hook_entries *e)
75{
76 struct nf_hook_entries_rcu_head *head;
77 struct nf_hook_ops **ops;
78 unsigned int num;
79
80 if (!e)
81 return;
82
83 num = e->num_hook_entries;
84 ops = nf_hook_entries_get_hook_ops(e);
85 head = (void *)&ops[num];
86 head->allocation = e;
87 call_rcu(&head->head, __nf_hook_entries_free);
88}
89
88static unsigned int accept_all(void *priv, 90static unsigned int accept_all(void *priv,
89 struct sk_buff *skb, 91 struct sk_buff *skb,
90 const struct nf_hook_state *state) 92 const struct nf_hook_state *state)
@@ -135,6 +137,12 @@ nf_hook_entries_grow(const struct nf_hook_entries *old,
135 ++i; 137 ++i;
136 continue; 138 continue;
137 } 139 }
140
141 if (reg->nat_hook && orig_ops[i]->nat_hook) {
142 kvfree(new);
143 return ERR_PTR(-EBUSY);
144 }
145
138 if (inserted || reg->priority > orig_ops[i]->priority) { 146 if (inserted || reg->priority > orig_ops[i]->priority) {
139 new_ops[nhooks] = (void *)orig_ops[i]; 147 new_ops[nhooks] = (void *)orig_ops[i];
140 new->hooks[nhooks] = old->hooks[i]; 148 new->hooks[nhooks] = old->hooks[i];
@@ -237,27 +245,61 @@ out_assign:
237 return old; 245 return old;
238} 246}
239 247
240static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg) 248static struct nf_hook_entries __rcu **
249nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
250 struct net_device *dev)
241{ 251{
242 if (reg->pf != NFPROTO_NETDEV) 252 switch (pf) {
243 return net->nf.hooks[reg->pf]+reg->hooknum; 253 case NFPROTO_NETDEV:
254 break;
255#ifdef CONFIG_NETFILTER_FAMILY_ARP
256 case NFPROTO_ARP:
257 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= hooknum))
258 return NULL;
259 return net->nf.hooks_arp + hooknum;
260#endif
261#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
262 case NFPROTO_BRIDGE:
263 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= hooknum))
264 return NULL;
265 return net->nf.hooks_bridge + hooknum;
266#endif
267 case NFPROTO_IPV4:
268 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= hooknum))
269 return NULL;
270 return net->nf.hooks_ipv4 + hooknum;
271 case NFPROTO_IPV6:
272 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum))
273 return NULL;
274 return net->nf.hooks_ipv6 + hooknum;
275#if IS_ENABLED(CONFIG_DECNET)
276 case NFPROTO_DECNET:
277 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum))
278 return NULL;
279 return net->nf.hooks_decnet + hooknum;
280#endif
281 default:
282 WARN_ON_ONCE(1);
283 return NULL;
284 }
244 285
245#ifdef CONFIG_NETFILTER_INGRESS 286#ifdef CONFIG_NETFILTER_INGRESS
246 if (reg->hooknum == NF_NETDEV_INGRESS) { 287 if (hooknum == NF_NETDEV_INGRESS) {
247 if (reg->dev && dev_net(reg->dev) == net) 288 if (dev && dev_net(dev) == net)
248 return &reg->dev->nf_hooks_ingress; 289 return &dev->nf_hooks_ingress;
249 } 290 }
250#endif 291#endif
251 WARN_ON_ONCE(1); 292 WARN_ON_ONCE(1);
252 return NULL; 293 return NULL;
253} 294}
254 295
255int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) 296static int __nf_register_net_hook(struct net *net, int pf,
297 const struct nf_hook_ops *reg)
256{ 298{
257 struct nf_hook_entries *p, *new_hooks; 299 struct nf_hook_entries *p, *new_hooks;
258 struct nf_hook_entries __rcu **pp; 300 struct nf_hook_entries __rcu **pp;
259 301
260 if (reg->pf == NFPROTO_NETDEV) { 302 if (pf == NFPROTO_NETDEV) {
261#ifndef CONFIG_NETFILTER_INGRESS 303#ifndef CONFIG_NETFILTER_INGRESS
262 if (reg->hooknum == NF_NETDEV_INGRESS) 304 if (reg->hooknum == NF_NETDEV_INGRESS)
263 return -EOPNOTSUPP; 305 return -EOPNOTSUPP;
@@ -267,7 +309,7 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
267 return -EINVAL; 309 return -EINVAL;
268 } 310 }
269 311
270 pp = nf_hook_entry_head(net, reg); 312 pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
271 if (!pp) 313 if (!pp)
272 return -EINVAL; 314 return -EINVAL;
273 315
@@ -285,21 +327,19 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
285 327
286 hooks_validate(new_hooks); 328 hooks_validate(new_hooks);
287#ifdef CONFIG_NETFILTER_INGRESS 329#ifdef CONFIG_NETFILTER_INGRESS
288 if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) 330 if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
289 net_inc_ingress_queue(); 331 net_inc_ingress_queue();
290#endif 332#endif
291#ifdef HAVE_JUMP_LABEL 333#ifdef HAVE_JUMP_LABEL
292 static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); 334 static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]);
293#endif 335#endif
294 synchronize_net();
295 BUG_ON(p == new_hooks); 336 BUG_ON(p == new_hooks);
296 kvfree(p); 337 nf_hook_entries_free(p);
297 return 0; 338 return 0;
298} 339}
299EXPORT_SYMBOL(nf_register_net_hook);
300 340
301/* 341/*
302 * __nf_unregister_net_hook - remove a hook from blob 342 * nf_remove_net_hook - remove a hook from blob
303 * 343 *
304 * @oldp: current address of hook blob 344 * @oldp: current address of hook blob
305 * @unreg: hook to unregister 345 * @unreg: hook to unregister
@@ -307,8 +347,8 @@ EXPORT_SYMBOL(nf_register_net_hook);
307 * This cannot fail, hook unregistration must always succeed. 347 * This cannot fail, hook unregistration must always succeed.
308 * Therefore replace the to-be-removed hook with a dummy hook. 348 * Therefore replace the to-be-removed hook with a dummy hook.
309 */ 349 */
310static void __nf_unregister_net_hook(struct nf_hook_entries *old, 350static void nf_remove_net_hook(struct nf_hook_entries *old,
311 const struct nf_hook_ops *unreg) 351 const struct nf_hook_ops *unreg, int pf)
312{ 352{
313 struct nf_hook_ops **orig_ops; 353 struct nf_hook_ops **orig_ops;
314 bool found = false; 354 bool found = false;
@@ -326,24 +366,24 @@ static void __nf_unregister_net_hook(struct nf_hook_entries *old,
326 366
327 if (found) { 367 if (found) {
328#ifdef CONFIG_NETFILTER_INGRESS 368#ifdef CONFIG_NETFILTER_INGRESS
329 if (unreg->pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS) 369 if (pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
330 net_dec_ingress_queue(); 370 net_dec_ingress_queue();
331#endif 371#endif
332#ifdef HAVE_JUMP_LABEL 372#ifdef HAVE_JUMP_LABEL
333 static_key_slow_dec(&nf_hooks_needed[unreg->pf][unreg->hooknum]); 373 static_key_slow_dec(&nf_hooks_needed[pf][unreg->hooknum]);
334#endif 374#endif
335 } else { 375 } else {
336 WARN_ONCE(1, "hook not found, pf %d num %d", unreg->pf, unreg->hooknum); 376 WARN_ONCE(1, "hook not found, pf %d num %d", pf, unreg->hooknum);
337 } 377 }
338} 378}
339 379
340void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) 380static void __nf_unregister_net_hook(struct net *net, int pf,
381 const struct nf_hook_ops *reg)
341{ 382{
342 struct nf_hook_entries __rcu **pp; 383 struct nf_hook_entries __rcu **pp;
343 struct nf_hook_entries *p; 384 struct nf_hook_entries *p;
344 unsigned int nfq;
345 385
346 pp = nf_hook_entry_head(net, reg); 386 pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
347 if (!pp) 387 if (!pp)
348 return; 388 return;
349 389
@@ -355,23 +395,52 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
355 return; 395 return;
356 } 396 }
357 397
358 __nf_unregister_net_hook(p, reg); 398 nf_remove_net_hook(p, reg, pf);
359 399
360 p = __nf_hook_entries_try_shrink(pp); 400 p = __nf_hook_entries_try_shrink(pp);
361 mutex_unlock(&nf_hook_mutex); 401 mutex_unlock(&nf_hook_mutex);
362 if (!p) 402 if (!p)
363 return; 403 return;
364 404
365 synchronize_net(); 405 nf_queue_nf_hook_drop(net);
406 nf_hook_entries_free(p);
407}
366 408
367 /* other cpu might still process nfqueue verdict that used reg */ 409void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
368 nfq = nf_queue_nf_hook_drop(net); 410{
369 if (nfq) 411 if (reg->pf == NFPROTO_INET) {
370 synchronize_net(); 412 __nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
371 kvfree(p); 413 __nf_unregister_net_hook(net, NFPROTO_IPV6, reg);
414 } else {
415 __nf_unregister_net_hook(net, reg->pf, reg);
416 }
372} 417}
373EXPORT_SYMBOL(nf_unregister_net_hook); 418EXPORT_SYMBOL(nf_unregister_net_hook);
374 419
420int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
421{
422 int err;
423
424 if (reg->pf == NFPROTO_INET) {
425 err = __nf_register_net_hook(net, NFPROTO_IPV4, reg);
426 if (err < 0)
427 return err;
428
429 err = __nf_register_net_hook(net, NFPROTO_IPV6, reg);
430 if (err < 0) {
431 __nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
432 return err;
433 }
434 } else {
435 err = __nf_register_net_hook(net, reg->pf, reg);
436 if (err < 0)
437 return err;
438 }
439
440 return 0;
441}
442EXPORT_SYMBOL(nf_register_net_hook);
443
375int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg, 444int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
376 unsigned int n) 445 unsigned int n)
377{ 446{
@@ -395,63 +464,10 @@ EXPORT_SYMBOL(nf_register_net_hooks);
395void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, 464void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
396 unsigned int hookcount) 465 unsigned int hookcount)
397{ 466{
398 struct nf_hook_entries *to_free[16], *p; 467 unsigned int i;
399 struct nf_hook_entries __rcu **pp;
400 unsigned int i, j, n;
401
402 mutex_lock(&nf_hook_mutex);
403 for (i = 0; i < hookcount; i++) {
404 pp = nf_hook_entry_head(net, &reg[i]);
405 if (!pp)
406 continue;
407
408 p = nf_entry_dereference(*pp);
409 if (WARN_ON_ONCE(!p))
410 continue;
411 __nf_unregister_net_hook(p, &reg[i]);
412 }
413 mutex_unlock(&nf_hook_mutex);
414
415 do {
416 n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
417
418 mutex_lock(&nf_hook_mutex);
419
420 for (i = 0, j = 0; i < hookcount && j < n; i++) {
421 pp = nf_hook_entry_head(net, &reg[i]);
422 if (!pp)
423 continue;
424
425 p = nf_entry_dereference(*pp);
426 if (!p)
427 continue;
428
429 to_free[j] = __nf_hook_entries_try_shrink(pp);
430 if (to_free[j])
431 ++j;
432 }
433
434 mutex_unlock(&nf_hook_mutex);
435
436 if (j) {
437 unsigned int nfq;
438
439 synchronize_net();
440
441 /* need 2nd synchronize_net() if nfqueue is used, skb
442 * can get reinjected right before nf_queue_hook_drop()
443 */
444 nfq = nf_queue_nf_hook_drop(net);
445 if (nfq)
446 synchronize_net();
447
448 for (i = 0; i < j; i++)
449 kvfree(to_free[i]);
450 }
451 468
452 reg += n; 469 for (i = 0; i < hookcount; i++)
453 hookcount -= n; 470 nf_unregister_net_hook(net, &reg[i]);
454 } while (hookcount > 0);
455} 471}
456EXPORT_SYMBOL(nf_unregister_net_hooks); 472EXPORT_SYMBOL(nf_unregister_net_hooks);
457 473
@@ -569,14 +585,27 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
569EXPORT_SYMBOL(nf_nat_decode_session_hook); 585EXPORT_SYMBOL(nf_nat_decode_session_hook);
570#endif 586#endif
571 587
572static int __net_init netfilter_net_init(struct net *net) 588static void __net_init __netfilter_net_init(struct nf_hook_entries **e, int max)
573{ 589{
574 int i, h; 590 int h;
575 591
576 for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) { 592 for (h = 0; h < max; h++)
577 for (h = 0; h < NF_MAX_HOOKS; h++) 593 RCU_INIT_POINTER(e[h], NULL);
578 RCU_INIT_POINTER(net->nf.hooks[i][h], NULL); 594}
579 } 595
596static int __net_init netfilter_net_init(struct net *net)
597{
598 __netfilter_net_init(net->nf.hooks_ipv4, ARRAY_SIZE(net->nf.hooks_ipv4));
599 __netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6));
600#ifdef CONFIG_NETFILTER_FAMILY_ARP
601 __netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp));
602#endif
603#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
604 __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge));
605#endif
606#if IS_ENABLED(CONFIG_DECNET)
607 __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet));
608#endif
580 609
581#ifdef CONFIG_PROC_FS 610#ifdef CONFIG_PROC_FS
582 net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", 611 net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 5ca18f07683b..257ca393e6f2 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -127,14 +127,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
127 127
128 if (ret <= 0) 128 if (ret <= 0)
129 return ret; 129 return ret;
130 if (SET_WITH_TIMEOUT(set) && 130 return ip_set_match_extensions(set, ext, mext, flags, x);
131 ip_set_timeout_expired(ext_timeout(x, set)))
132 return 0;
133 if (SET_WITH_COUNTER(set))
134 ip_set_update_counter(ext_counter(x, set), ext, mext, flags);
135 if (SET_WITH_SKBINFO(set))
136 ip_set_get_skbinfo(ext_skbinfo(x, set), ext, mext, flags);
137 return 1;
138} 131}
139 132
140static int 133static int
@@ -227,6 +220,7 @@ mtype_list(const struct ip_set *set,
227 rcu_read_lock(); 220 rcu_read_lock();
228 for (; cb->args[IPSET_CB_ARG0] < map->elements; 221 for (; cb->args[IPSET_CB_ARG0] < map->elements;
229 cb->args[IPSET_CB_ARG0]++) { 222 cb->args[IPSET_CB_ARG0]++) {
223 cond_resched_rcu();
230 id = cb->args[IPSET_CB_ARG0]; 224 id = cb->args[IPSET_CB_ARG0];
231 x = get_ext(set, map, id); 225 x = get_ext(set, map, id);
232 if (!test_bit(id, map->members) || 226 if (!test_bit(id, map->members) ||
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index d8975a0b4282..488d6d05c65c 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -263,12 +263,8 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
263 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip); 263 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip);
264 if (ret) 264 if (ret)
265 return ret; 265 return ret;
266 if (first_ip > last_ip) { 266 if (first_ip > last_ip)
267 u32 tmp = first_ip; 267 swap(first_ip, last_ip);
268
269 first_ip = last_ip;
270 last_ip = tmp;
271 }
272 } else if (tb[IPSET_ATTR_CIDR]) { 268 } else if (tb[IPSET_ATTR_CIDR]) {
273 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 269 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
274 270
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 4c279fbd2d5d..c00b6a2e8e3c 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -337,12 +337,8 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
337 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip); 337 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip);
338 if (ret) 338 if (ret)
339 return ret; 339 return ret;
340 if (first_ip > last_ip) { 340 if (first_ip > last_ip)
341 u32 tmp = first_ip; 341 swap(first_ip, last_ip);
342
343 first_ip = last_ip;
344 last_ip = tmp;
345 }
346 } else if (tb[IPSET_ATTR_CIDR]) { 342 } else if (tb[IPSET_ATTR_CIDR]) {
347 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 343 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
348 344
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 7f9bbd7c98b5..b561ca8b3659 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -238,12 +238,8 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
238 238
239 first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]); 239 first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
240 last_port = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); 240 last_port = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
241 if (first_port > last_port) { 241 if (first_port > last_port)
242 u16 tmp = first_port; 242 swap(first_port, last_port);
243
244 first_port = last_port;
245 last_port = tmp;
246 }
247 243
248 elements = last_port - first_port + 1; 244 elements = last_port - first_port + 1;
249 set->dsize = ip_set_elem_len(set, tb, 0, 0); 245 set->dsize = ip_set_elem_len(set, tb, 0, 0);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index cf84f7b37cd9..975a85a48d39 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -57,7 +57,7 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
57 57
58/* When the nfnl mutex is held: */ 58/* When the nfnl mutex is held: */
59#define ip_set_dereference(p) \ 59#define ip_set_dereference(p) \
60 rcu_dereference_protected(p, 1) 60 rcu_dereference_protected(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
61#define ip_set(inst, id) \ 61#define ip_set(inst, id) \
62 ip_set_dereference((inst)->ip_set_list)[id] 62 ip_set_dereference((inst)->ip_set_list)[id]
63 63
@@ -472,6 +472,31 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
472} 472}
473EXPORT_SYMBOL_GPL(ip_set_put_extensions); 473EXPORT_SYMBOL_GPL(ip_set_put_extensions);
474 474
475bool
476ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext,
477 struct ip_set_ext *mext, u32 flags, void *data)
478{
479 if (SET_WITH_TIMEOUT(set) &&
480 ip_set_timeout_expired(ext_timeout(data, set)))
481 return false;
482 if (SET_WITH_COUNTER(set)) {
483 struct ip_set_counter *counter = ext_counter(data, set);
484
485 if (flags & IPSET_FLAG_MATCH_COUNTERS &&
486 !(ip_set_match_counter(ip_set_get_packets(counter),
487 mext->packets, mext->packets_op) &&
488 ip_set_match_counter(ip_set_get_bytes(counter),
489 mext->bytes, mext->bytes_op)))
490 return false;
491 ip_set_update_counter(counter, ext, flags);
492 }
493 if (SET_WITH_SKBINFO(set))
494 ip_set_get_skbinfo(ext_skbinfo(data, set),
495 ext, mext, flags);
496 return true;
497}
498EXPORT_SYMBOL_GPL(ip_set_match_extensions);
499
475/* Creating/destroying/renaming/swapping affect the existence and 500/* Creating/destroying/renaming/swapping affect the existence and
476 * the properties of a set. All of these can be executed from userspace 501 * the properties of a set. All of these can be executed from userspace
477 * only and serialized by the nfnl mutex indirectly from nfnetlink. 502 * only and serialized by the nfnl mutex indirectly from nfnetlink.
@@ -1386,11 +1411,9 @@ dump_last:
1386 goto next_set; 1411 goto next_set;
1387 if (set->variant->uref) 1412 if (set->variant->uref)
1388 set->variant->uref(set, cb, true); 1413 set->variant->uref(set, cb, true);
1389 /* Fall through and add elements */ 1414 /* fall through */
1390 default: 1415 default:
1391 rcu_read_lock_bh();
1392 ret = set->variant->list(set, skb, cb); 1416 ret = set->variant->list(set, skb, cb);
1393 rcu_read_unlock_bh();
1394 if (!cb->args[IPSET_CB_ARG0]) 1417 if (!cb->args[IPSET_CB_ARG0])
1395 /* Set is done, proceed with next one */ 1418 /* Set is done, proceed with next one */
1396 goto next_set; 1419 goto next_set;
@@ -2055,6 +2078,7 @@ ip_set_net_exit(struct net *net)
2055 2078
2056 inst->is_deleted = true; /* flag for ip_set_nfnl_put */ 2079 inst->is_deleted = true; /* flag for ip_set_nfnl_put */
2057 2080
2081 nfnl_lock(NFNL_SUBSYS_IPSET);
2058 for (i = 0; i < inst->ip_set_max; i++) { 2082 for (i = 0; i < inst->ip_set_max; i++) {
2059 set = ip_set(inst, i); 2083 set = ip_set(inst, i);
2060 if (set) { 2084 if (set) {
@@ -2062,6 +2086,7 @@ ip_set_net_exit(struct net *net)
2062 ip_set_destroy_set(set); 2086 ip_set_destroy_set(set);
2063 } 2087 }
2064 } 2088 }
2089 nfnl_unlock(NFNL_SUBSYS_IPSET);
2065 kfree(rcu_dereference_protected(inst->ip_set_list, 1)); 2090 kfree(rcu_dereference_protected(inst->ip_set_list, 1));
2066} 2091}
2067 2092
@@ -2097,7 +2122,6 @@ ip_set_init(void)
2097 return ret; 2122 return ret;
2098 } 2123 }
2099 2124
2100 pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL);
2101 return 0; 2125 return 0;
2102} 2126}
2103 2127
@@ -2113,3 +2137,5 @@ ip_set_fini(void)
2113 2137
2114module_init(ip_set_init); 2138module_init(ip_set_init);
2115module_exit(ip_set_fini); 2139module_exit(ip_set_fini);
2140
2141MODULE_DESCRIPTION("ip_set: protocol " __stringify(IPSET_PROTOCOL));
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index efffc8eabafe..bbad940c0137 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -917,12 +917,9 @@ static inline int
917mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext, 917mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
918 struct ip_set_ext *mext, struct ip_set *set, u32 flags) 918 struct ip_set_ext *mext, struct ip_set *set, u32 flags)
919{ 919{
920 if (SET_WITH_COUNTER(set)) 920 if (!ip_set_match_extensions(set, ext, mext, flags, data))
921 ip_set_update_counter(ext_counter(data, set), 921 return 0;
922 ext, mext, flags); 922 /* nomatch entries return -ENOTEMPTY */
923 if (SET_WITH_SKBINFO(set))
924 ip_set_get_skbinfo(ext_skbinfo(data, set),
925 ext, mext, flags);
926 return mtype_do_data_match(data); 923 return mtype_do_data_match(data);
927} 924}
928 925
@@ -941,9 +938,9 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
941 struct mtype_elem *data; 938 struct mtype_elem *data;
942#if IPSET_NET_COUNT == 2 939#if IPSET_NET_COUNT == 2
943 struct mtype_elem orig = *d; 940 struct mtype_elem orig = *d;
944 int i, j = 0, k; 941 int ret, i, j = 0, k;
945#else 942#else
946 int i, j = 0; 943 int ret, i, j = 0;
947#endif 944#endif
948 u32 key, multi = 0; 945 u32 key, multi = 0;
949 946
@@ -969,18 +966,13 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
969 data = ahash_data(n, i, set->dsize); 966 data = ahash_data(n, i, set->dsize);
970 if (!mtype_data_equal(data, d, &multi)) 967 if (!mtype_data_equal(data, d, &multi))
971 continue; 968 continue;
972 if (SET_WITH_TIMEOUT(set)) { 969 ret = mtype_data_match(data, ext, mext, set, flags);
973 if (!ip_set_timeout_expired( 970 if (ret != 0)
974 ext_timeout(data, set))) 971 return ret;
975 return mtype_data_match(data, ext,
976 mext, set,
977 flags);
978#ifdef IP_SET_HASH_WITH_MULTI 972#ifdef IP_SET_HASH_WITH_MULTI
979 multi = 0; 973 /* No match, reset multiple match flag */
974 multi = 0;
980#endif 975#endif
981 } else
982 return mtype_data_match(data, ext,
983 mext, set, flags);
984 } 976 }
985#if IPSET_NET_COUNT == 2 977#if IPSET_NET_COUNT == 2
986 } 978 }
@@ -1027,12 +1019,11 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
1027 if (!test_bit(i, n->used)) 1019 if (!test_bit(i, n->used))
1028 continue; 1020 continue;
1029 data = ahash_data(n, i, set->dsize); 1021 data = ahash_data(n, i, set->dsize);
1030 if (mtype_data_equal(data, d, &multi) && 1022 if (!mtype_data_equal(data, d, &multi))
1031 !(SET_WITH_TIMEOUT(set) && 1023 continue;
1032 ip_set_timeout_expired(ext_timeout(data, set)))) { 1024 ret = mtype_data_match(data, ext, mext, set, flags);
1033 ret = mtype_data_match(data, ext, mext, set, flags); 1025 if (ret != 0)
1034 goto out; 1026 goto out;
1035 }
1036 } 1027 }
1037out: 1028out:
1038 return ret; 1029 return ret;
@@ -1143,6 +1134,7 @@ mtype_list(const struct ip_set *set,
1143 rcu_read_lock(); 1134 rcu_read_lock();
1144 for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits); 1135 for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
1145 cb->args[IPSET_CB_ARG0]++) { 1136 cb->args[IPSET_CB_ARG0]++) {
1137 cond_resched_rcu();
1146 incomplete = skb_tail_pointer(skb); 1138 incomplete = skb_tail_pointer(skb);
1147 n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0])); 1139 n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0]));
1148 pr_debug("cb->arg bucket: %lu, t %p n %p\n", 1140 pr_debug("cb->arg bucket: %lu, t %p n %p\n",
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 0f164e986bf1..88b83d6d3084 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -168,7 +168,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
168 struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; 168 struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
169 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 169 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
170 u32 ip = 0, ip_to = 0, p = 0, port, port_to; 170 u32 ip = 0, ip_to = 0, p = 0, port, port_to;
171 u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; 171 u32 ip2_from = 0, ip2_to = 0, ip2;
172 bool with_ports = false; 172 bool with_ports = false;
173 u8 cidr; 173 u8 cidr;
174 int ret; 174 int ret;
@@ -269,22 +269,21 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
269 ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1); 269 ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1);
270 } 270 }
271 271
272 if (retried) 272 if (retried) {
273 ip = ntohl(h->next.ip); 273 ip = ntohl(h->next.ip);
274 p = ntohs(h->next.port);
275 ip2 = ntohl(h->next.ip2);
276 } else {
277 p = port;
278 ip2 = ip2_from;
279 }
274 for (; ip <= ip_to; ip++) { 280 for (; ip <= ip_to; ip++) {
275 e.ip = htonl(ip); 281 e.ip = htonl(ip);
276 p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
277 : port;
278 for (; p <= port_to; p++) { 282 for (; p <= port_to; p++) {
279 e.port = htons(p); 283 e.port = htons(p);
280 ip2 = retried && 284 do {
281 ip == ntohl(h->next.ip) &&
282 p == ntohs(h->next.port)
283 ? ntohl(h->next.ip2) : ip2_from;
284 while (ip2 <= ip2_to) {
285 e.ip2 = htonl(ip2); 285 e.ip2 = htonl(ip2);
286 ip2_last = ip_set_range_to_cidr(ip2, ip2_to, 286 ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr);
287 &cidr);
288 e.cidr = cidr - 1; 287 e.cidr = cidr - 1;
289 ret = adtfn(set, &e, &ext, &ext, flags); 288 ret = adtfn(set, &e, &ext, &ext, flags);
290 289
@@ -292,9 +291,10 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
292 return ret; 291 return ret;
293 292
294 ret = 0; 293 ret = 0;
295 ip2 = ip2_last + 1; 294 } while (ip2++ < ip2_to);
296 } 295 ip2 = ip2_from;
297 } 296 }
297 p = port;
298 } 298 }
299 return ret; 299 return ret;
300} 300}
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 1c67a1761e45..5449e23af13a 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -143,7 +143,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
143 ipset_adtfn adtfn = set->variant->adt[adt]; 143 ipset_adtfn adtfn = set->variant->adt[adt];
144 struct hash_net4_elem e = { .cidr = HOST_MASK }; 144 struct hash_net4_elem e = { .cidr = HOST_MASK };
145 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 145 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
146 u32 ip = 0, ip_to = 0, last; 146 u32 ip = 0, ip_to = 0;
147 int ret; 147 int ret;
148 148
149 if (tb[IPSET_ATTR_LINENO]) 149 if (tb[IPSET_ATTR_LINENO])
@@ -193,16 +193,15 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
193 } 193 }
194 if (retried) 194 if (retried)
195 ip = ntohl(h->next.ip); 195 ip = ntohl(h->next.ip);
196 while (ip <= ip_to) { 196 do {
197 e.ip = htonl(ip); 197 e.ip = htonl(ip);
198 last = ip_set_range_to_cidr(ip, ip_to, &e.cidr); 198 ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
199 ret = adtfn(set, &e, &ext, &ext, flags); 199 ret = adtfn(set, &e, &ext, &ext, flags);
200 if (ret && !ip_set_eexist(ret, flags)) 200 if (ret && !ip_set_eexist(ret, flags))
201 return ret; 201 return ret;
202 202
203 ret = 0; 203 ret = 0;
204 ip = last + 1; 204 } while (ip++ < ip_to);
205 }
206 return ret; 205 return ret;
207} 206}
208 207
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index d417074f1c1a..f5164c1efce2 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -200,7 +200,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
200 ipset_adtfn adtfn = set->variant->adt[adt]; 200 ipset_adtfn adtfn = set->variant->adt[adt];
201 struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; 201 struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
202 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 202 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
203 u32 ip = 0, ip_to = 0, last; 203 u32 ip = 0, ip_to = 0;
204 int ret; 204 int ret;
205 205
206 if (tb[IPSET_ATTR_LINENO]) 206 if (tb[IPSET_ATTR_LINENO])
@@ -255,17 +255,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
255 255
256 if (retried) 256 if (retried)
257 ip = ntohl(h->next.ip); 257 ip = ntohl(h->next.ip);
258 while (ip <= ip_to) { 258 do {
259 e.ip = htonl(ip); 259 e.ip = htonl(ip);
260 last = ip_set_range_to_cidr(ip, ip_to, &e.cidr); 260 ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
261 ret = adtfn(set, &e, &ext, &ext, flags); 261 ret = adtfn(set, &e, &ext, &ext, flags);
262 262
263 if (ret && !ip_set_eexist(ret, flags)) 263 if (ret && !ip_set_eexist(ret, flags))
264 return ret; 264 return ret;
265 265
266 ret = 0; 266 ret = 0;
267 ip = last + 1; 267 } while (ip++ < ip_to);
268 }
269 return ret; 268 return ret;
270} 269}
271 270
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 7f9ae2e9645b..5a2b923bd81f 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -169,8 +169,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
169 ipset_adtfn adtfn = set->variant->adt[adt]; 169 ipset_adtfn adtfn = set->variant->adt[adt];
170 struct hash_netnet4_elem e = { }; 170 struct hash_netnet4_elem e = { };
171 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 171 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
172 u32 ip = 0, ip_to = 0, last; 172 u32 ip = 0, ip_to = 0;
173 u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2; 173 u32 ip2 = 0, ip2_from = 0, ip2_to = 0;
174 int ret; 174 int ret;
175 175
176 if (tb[IPSET_ATTR_LINENO]) 176 if (tb[IPSET_ATTR_LINENO])
@@ -247,27 +247,27 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
247 ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); 247 ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
248 } 248 }
249 249
250 if (retried) 250 if (retried) {
251 ip = ntohl(h->next.ip[0]); 251 ip = ntohl(h->next.ip[0]);
252 ip2 = ntohl(h->next.ip[1]);
253 } else {
254 ip2 = ip2_from;
255 }
252 256
253 while (ip <= ip_to) { 257 do {
254 e.ip[0] = htonl(ip); 258 e.ip[0] = htonl(ip);
255 last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); 259 ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
256 ip2 = (retried && 260 do {
257 ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1])
258 : ip2_from;
259 while (ip2 <= ip2_to) {
260 e.ip[1] = htonl(ip2); 261 e.ip[1] = htonl(ip2);
261 last2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); 262 ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
262 ret = adtfn(set, &e, &ext, &ext, flags); 263 ret = adtfn(set, &e, &ext, &ext, flags);
263 if (ret && !ip_set_eexist(ret, flags)) 264 if (ret && !ip_set_eexist(ret, flags))
264 return ret; 265 return ret;
265 266
266 ret = 0; 267 ret = 0;
267 ip2 = last2 + 1; 268 } while (ip2++ < ip2_to);
268 } 269 ip2 = ip2_from;
269 ip = last + 1; 270 } while (ip++ < ip_to);
270 }
271 return ret; 271 return ret;
272} 272}
273 273
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index e6ef382febe4..1a187be9ebc8 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -161,7 +161,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
161 ipset_adtfn adtfn = set->variant->adt[adt]; 161 ipset_adtfn adtfn = set->variant->adt[adt];
162 struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; 162 struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
163 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 163 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
164 u32 port, port_to, p = 0, ip = 0, ip_to = 0, last; 164 u32 port, port_to, p = 0, ip = 0, ip_to = 0;
165 bool with_ports = false; 165 bool with_ports = false;
166 u8 cidr; 166 u8 cidr;
167 int ret; 167 int ret;
@@ -239,25 +239,26 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
239 ip_set_mask_from_to(ip, ip_to, e.cidr + 1); 239 ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
240 } 240 }
241 241
242 if (retried) 242 if (retried) {
243 ip = ntohl(h->next.ip); 243 ip = ntohl(h->next.ip);
244 while (ip <= ip_to) { 244 p = ntohs(h->next.port);
245 } else {
246 p = port;
247 }
248 do {
245 e.ip = htonl(ip); 249 e.ip = htonl(ip);
246 last = ip_set_range_to_cidr(ip, ip_to, &cidr); 250 ip = ip_set_range_to_cidr(ip, ip_to, &cidr);
247 e.cidr = cidr - 1; 251 e.cidr = cidr - 1;
248 p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
249 : port;
250 for (; p <= port_to; p++) { 252 for (; p <= port_to; p++) {
251 e.port = htons(p); 253 e.port = htons(p);
252 ret = adtfn(set, &e, &ext, &ext, flags); 254 ret = adtfn(set, &e, &ext, &ext, flags);
253
254 if (ret && !ip_set_eexist(ret, flags)) 255 if (ret && !ip_set_eexist(ret, flags))
255 return ret; 256 return ret;
256 257
257 ret = 0; 258 ret = 0;
258 } 259 }
259 ip = last + 1; 260 p = port;
260 } 261 } while (ip++ < ip_to);
261 return ret; 262 return ret;
262} 263}
263 264
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 8602f2595a1a..d391485a6acd 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -184,8 +184,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
184 ipset_adtfn adtfn = set->variant->adt[adt]; 184 ipset_adtfn adtfn = set->variant->adt[adt];
185 struct hash_netportnet4_elem e = { }; 185 struct hash_netportnet4_elem e = { };
186 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 186 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
187 u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to; 187 u32 ip = 0, ip_to = 0, p = 0, port, port_to;
188 u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; 188 u32 ip2_from = 0, ip2_to = 0, ip2;
189 bool with_ports = false; 189 bool with_ports = false;
190 int ret; 190 int ret;
191 191
@@ -288,33 +288,34 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
288 ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); 288 ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
289 } 289 }
290 290
291 if (retried) 291 if (retried) {
292 ip = ntohl(h->next.ip[0]); 292 ip = ntohl(h->next.ip[0]);
293 p = ntohs(h->next.port);
294 ip2 = ntohl(h->next.ip[1]);
295 } else {
296 p = port;
297 ip2 = ip2_from;
298 }
293 299
294 while (ip <= ip_to) { 300 do {
295 e.ip[0] = htonl(ip); 301 e.ip[0] = htonl(ip);
296 ip_last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); 302 ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
297 p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port)
298 : port;
299 for (; p <= port_to; p++) { 303 for (; p <= port_to; p++) {
300 e.port = htons(p); 304 e.port = htons(p);
301 ip2 = (retried && ip == ntohl(h->next.ip[0]) && 305 do {
302 p == ntohs(h->next.port)) ? ntohl(h->next.ip[1])
303 : ip2_from;
304 while (ip2 <= ip2_to) {
305 e.ip[1] = htonl(ip2); 306 e.ip[1] = htonl(ip2);
306 ip2_last = ip_set_range_to_cidr(ip2, ip2_to, 307 ip2 = ip_set_range_to_cidr(ip2, ip2_to,
307 &e.cidr[1]); 308 &e.cidr[1]);
308 ret = adtfn(set, &e, &ext, &ext, flags); 309 ret = adtfn(set, &e, &ext, &ext, flags);
309 if (ret && !ip_set_eexist(ret, flags)) 310 if (ret && !ip_set_eexist(ret, flags))
310 return ret; 311 return ret;
311 312
312 ret = 0; 313 ret = 0;
313 ip2 = ip2_last + 1; 314 } while (ip2++ < ip2_to);
314 } 315 ip2 = ip2_from;
315 } 316 }
316 ip = ip_last + 1; 317 p = port;
317 } 318 } while (ip++ < ip_to);
318 return ret; 319 return ret;
319} 320}
320 321
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index e864681b8dc5..072a658fde04 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -55,8 +55,9 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
55 struct ip_set_adt_opt *opt, const struct ip_set_ext *ext) 55 struct ip_set_adt_opt *opt, const struct ip_set_ext *ext)
56{ 56{
57 struct list_set *map = set->data; 57 struct list_set *map = set->data;
58 struct ip_set_ext *mext = &opt->ext;
58 struct set_elem *e; 59 struct set_elem *e;
59 u32 cmdflags = opt->cmdflags; 60 u32 flags = opt->cmdflags;
60 int ret; 61 int ret;
61 62
62 /* Don't lookup sub-counters at all */ 63 /* Don't lookup sub-counters at all */
@@ -64,21 +65,11 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
64 if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE) 65 if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE)
65 opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE; 66 opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE;
66 list_for_each_entry_rcu(e, &map->members, list) { 67 list_for_each_entry_rcu(e, &map->members, list) {
67 if (SET_WITH_TIMEOUT(set) &&
68 ip_set_timeout_expired(ext_timeout(e, set)))
69 continue;
70 ret = ip_set_test(e->id, skb, par, opt); 68 ret = ip_set_test(e->id, skb, par, opt);
71 if (ret > 0) { 69 if (ret <= 0)
72 if (SET_WITH_COUNTER(set)) 70 continue;
73 ip_set_update_counter(ext_counter(e, set), 71 if (ip_set_match_extensions(set, ext, mext, flags, e))
74 ext, &opt->ext, 72 return 1;
75 cmdflags);
76 if (SET_WITH_SKBINFO(set))
77 ip_set_get_skbinfo(ext_skbinfo(e, set),
78 ext, &opt->ext,
79 cmdflags);
80 return ret;
81 }
82 } 73 }
83 return 0; 74 return 0;
84} 75}
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 299edc6add5a..1c98c907bc63 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -595,7 +595,6 @@ static int ip_vs_app_open(struct inode *inode, struct file *file)
595} 595}
596 596
597static const struct file_operations ip_vs_app_fops = { 597static const struct file_operations ip_vs_app_fops = {
598 .owner = THIS_MODULE,
599 .open = ip_vs_app_open, 598 .open = ip_vs_app_open,
600 .read = seq_read, 599 .read = seq_read,
601 .llseek = seq_lseek, 600 .llseek = seq_lseek,
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 3e053cb30070..370abbf6f421 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -322,7 +322,7 @@ ip_vs_conn_fill_param_proto(struct netns_ipvs *ipvs,
322{ 322{
323 __be16 _ports[2], *pptr; 323 __be16 _ports[2], *pptr;
324 324
325 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); 325 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports);
326 if (pptr == NULL) 326 if (pptr == NULL)
327 return 1; 327 return 1;
328 328
@@ -1143,7 +1143,6 @@ static int ip_vs_conn_open(struct inode *inode, struct file *file)
1143} 1143}
1144 1144
1145static const struct file_operations ip_vs_conn_fops = { 1145static const struct file_operations ip_vs_conn_fops = {
1146 .owner = THIS_MODULE,
1147 .open = ip_vs_conn_open, 1146 .open = ip_vs_conn_open,
1148 .read = seq_read, 1147 .read = seq_read,
1149 .llseek = seq_lseek, 1148 .llseek = seq_lseek,
@@ -1221,7 +1220,6 @@ static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
1221} 1220}
1222 1221
1223static const struct file_operations ip_vs_conn_sync_fops = { 1222static const struct file_operations ip_vs_conn_sync_fops = {
1224 .owner = THIS_MODULE,
1225 .open = ip_vs_conn_sync_open, 1223 .open = ip_vs_conn_sync_open,
1226 .read = seq_read, 1224 .read = seq_read,
1227 .llseek = seq_lseek, 1225 .llseek = seq_lseek,
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 5cb7cac9177d..5f6f73cf2174 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -433,7 +433,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
433 /* 433 /*
434 * IPv6 frags, only the first hit here. 434 * IPv6 frags, only the first hit here.
435 */ 435 */
436 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); 436 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports);
437 if (pptr == NULL) 437 if (pptr == NULL)
438 return NULL; 438 return NULL;
439 439
@@ -566,7 +566,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
566 struct netns_ipvs *ipvs = svc->ipvs; 566 struct netns_ipvs *ipvs = svc->ipvs;
567 struct net *net = ipvs->net; 567 struct net *net = ipvs->net;
568 568
569 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); 569 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports);
570 if (!pptr) 570 if (!pptr)
571 return NF_DROP; 571 return NF_DROP;
572 dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0]; 572 dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0];
@@ -982,7 +982,7 @@ static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
982 unsigned int offset; 982 unsigned int offset;
983 983
984 *related = 1; 984 *related = 1;
985 ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh); 985 ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph);
986 if (ic == NULL) 986 if (ic == NULL)
987 return NF_DROP; 987 return NF_DROP;
988 988
@@ -1214,7 +1214,7 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
1214 return NULL; 1214 return NULL;
1215 1215
1216 pptr = frag_safe_skb_hp(skb, iph->len, 1216 pptr = frag_safe_skb_hp(skb, iph->len,
1217 sizeof(_ports), _ports, iph); 1217 sizeof(_ports), _ports);
1218 if (!pptr) 1218 if (!pptr)
1219 return NULL; 1219 return NULL;
1220 1220
@@ -1407,7 +1407,7 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
1407 __be16 _ports[2], *pptr; 1407 __be16 _ports[2], *pptr;
1408 1408
1409 pptr = frag_safe_skb_hp(skb, iph.len, 1409 pptr = frag_safe_skb_hp(skb, iph.len,
1410 sizeof(_ports), _ports, &iph); 1410 sizeof(_ports), _ports);
1411 if (pptr == NULL) 1411 if (pptr == NULL)
1412 return NF_ACCEPT; /* Not for me */ 1412 return NF_ACCEPT; /* Not for me */
1413 if (ip_vs_has_real_service(ipvs, af, iph.protocol, &iph.saddr, 1413 if (ip_vs_has_real_service(ipvs, af, iph.protocol, &iph.saddr,
@@ -1741,7 +1741,7 @@ static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
1741 1741
1742 *related = 1; 1742 *related = 1;
1743 1743
1744 ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph, iph); 1744 ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph);
1745 if (ic == NULL) 1745 if (ic == NULL)
1746 return NF_DROP; 1746 return NF_DROP;
1747 1747
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index fff213eacf2a..5ebde4b15810 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2116,7 +2116,6 @@ static int ip_vs_info_open(struct inode *inode, struct file *file)
2116} 2116}
2117 2117
2118static const struct file_operations ip_vs_info_fops = { 2118static const struct file_operations ip_vs_info_fops = {
2119 .owner = THIS_MODULE,
2120 .open = ip_vs_info_open, 2119 .open = ip_vs_info_open,
2121 .read = seq_read, 2120 .read = seq_read,
2122 .llseek = seq_lseek, 2121 .llseek = seq_lseek,
@@ -2161,7 +2160,6 @@ static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2161} 2160}
2162 2161
2163static const struct file_operations ip_vs_stats_fops = { 2162static const struct file_operations ip_vs_stats_fops = {
2164 .owner = THIS_MODULE,
2165 .open = ip_vs_stats_seq_open, 2163 .open = ip_vs_stats_seq_open,
2166 .read = seq_read, 2164 .read = seq_read,
2167 .llseek = seq_lseek, 2165 .llseek = seq_lseek,
@@ -2230,7 +2228,6 @@ static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2230} 2228}
2231 2229
2232static const struct file_operations ip_vs_stats_percpu_fops = { 2230static const struct file_operations ip_vs_stats_percpu_fops = {
2233 .owner = THIS_MODULE,
2234 .open = ip_vs_stats_percpu_seq_open, 2231 .open = ip_vs_stats_percpu_seq_open,
2235 .read = seq_read, 2232 .read = seq_read,
2236 .llseek = seq_lseek, 2233 .llseek = seq_lseek,
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 121a321b91be..bcd9b7bde4ee 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -315,6 +315,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
315 switch (skb->ip_summed) { 315 switch (skb->ip_summed) {
316 case CHECKSUM_NONE: 316 case CHECKSUM_NONE:
317 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); 317 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
318 /* fall through */
318 case CHECKSUM_COMPLETE: 319 case CHECKSUM_COMPLETE:
319#ifdef CONFIG_IP_VS_IPV6 320#ifdef CONFIG_IP_VS_IPV6
320 if (af == AF_INET6) { 321 if (af == AF_INET6) {
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 30e11cd6aa8a..c15ef7c2a1fa 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -319,6 +319,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
319 case CHECKSUM_NONE: 319 case CHECKSUM_NONE:
320 skb->csum = skb_checksum(skb, udphoff, 320 skb->csum = skb_checksum(skb, udphoff,
321 skb->len - udphoff, 0); 321 skb->len - udphoff, 0);
322 /* fall through */
322 case CHECKSUM_COMPLETE: 323 case CHECKSUM_COMPLETE:
323#ifdef CONFIG_IP_VS_IPV6 324#ifdef CONFIG_IP_VS_IPV6
324 if (af == AF_INET6) { 325 if (af == AF_INET6) {
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 9ee71cb276d7..fbaf3bd05b2e 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1636,17 +1636,14 @@ static int
1636ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) 1636ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
1637{ 1637{
1638 struct msghdr msg = {NULL,}; 1638 struct msghdr msg = {NULL,};
1639 struct kvec iov; 1639 struct kvec iov = {buffer, buflen};
1640 int len; 1640 int len;
1641 1641
1642 EnterFunction(7); 1642 EnterFunction(7);
1643 1643
1644 /* Receive a packet */ 1644 /* Receive a packet */
1645 iov.iov_base = buffer; 1645 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, buflen);
1646 iov.iov_len = (size_t)buflen; 1646 len = sock_recvmsg(sock, &msg, MSG_DONTWAIT);
1647
1648 len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT);
1649
1650 if (len < 0) 1647 if (len < 0)
1651 return len; 1648 return len;
1652 1649
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
new file mode 100644
index 000000000000..6d65389e308f
--- /dev/null
+++ b/net/netfilter/nf_conncount.c
@@ -0,0 +1,373 @@
1/*
2 * count the number of connections matching an arbitrary key.
3 *
4 * (C) 2017 Red Hat GmbH
5 * Author: Florian Westphal <fw@strlen.de>
6 *
7 * split from xt_connlimit.c:
8 * (c) 2000 Gerd Knorr <kraxel@bytesex.org>
9 * Nov 2002: Martin Bene <martin.bene@icomedias.com>:
10 * only ignore TIME_WAIT or gone connections
11 * (C) CC Computer Consultants GmbH, 2007
12 */
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14#include <linux/in.h>
15#include <linux/in6.h>
16#include <linux/ip.h>
17#include <linux/ipv6.h>
18#include <linux/jhash.h>
19#include <linux/slab.h>
20#include <linux/list.h>
21#include <linux/rbtree.h>
22#include <linux/module.h>
23#include <linux/random.h>
24#include <linux/skbuff.h>
25#include <linux/spinlock.h>
26#include <linux/netfilter/nf_conntrack_tcp.h>
27#include <linux/netfilter/x_tables.h>
28#include <net/netfilter/nf_conntrack.h>
29#include <net/netfilter/nf_conntrack_count.h>
30#include <net/netfilter/nf_conntrack_core.h>
31#include <net/netfilter/nf_conntrack_tuple.h>
32#include <net/netfilter/nf_conntrack_zones.h>
33
34#define CONNCOUNT_SLOTS 256U
35
36#ifdef CONFIG_LOCKDEP
37#define CONNCOUNT_LOCK_SLOTS 8U
38#else
39#define CONNCOUNT_LOCK_SLOTS 256U
40#endif
41
42#define CONNCOUNT_GC_MAX_NODES 8
43#define MAX_KEYLEN 5
44
45/* we will save the tuples of all connections we care about */
46struct nf_conncount_tuple {
47 struct hlist_node node;
48 struct nf_conntrack_tuple tuple;
49};
50
51struct nf_conncount_rb {
52 struct rb_node node;
53 struct hlist_head hhead; /* connections/hosts in same subnet */
54 u32 key[MAX_KEYLEN];
55};
56
57static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp;
58
59struct nf_conncount_data {
60 unsigned int keylen;
61 struct rb_root root[CONNCOUNT_SLOTS];
62};
63
64static u_int32_t conncount_rnd __read_mostly;
65static struct kmem_cache *conncount_rb_cachep __read_mostly;
66static struct kmem_cache *conncount_conn_cachep __read_mostly;
67
68static inline bool already_closed(const struct nf_conn *conn)
69{
70 if (nf_ct_protonum(conn) == IPPROTO_TCP)
71 return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT ||
72 conn->proto.tcp.state == TCP_CONNTRACK_CLOSE;
73 else
74 return false;
75}
76
77static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
78{
79 return memcmp(a, b, klen * sizeof(u32));
80}
81
82static bool add_hlist(struct hlist_head *head,
83 const struct nf_conntrack_tuple *tuple)
84{
85 struct nf_conncount_tuple *conn;
86
87 conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
88 if (conn == NULL)
89 return false;
90 conn->tuple = *tuple;
91 hlist_add_head(&conn->node, head);
92 return true;
93}
94
95static unsigned int check_hlist(struct net *net,
96 struct hlist_head *head,
97 const struct nf_conntrack_tuple *tuple,
98 const struct nf_conntrack_zone *zone,
99 bool *addit)
100{
101 const struct nf_conntrack_tuple_hash *found;
102 struct nf_conncount_tuple *conn;
103 struct hlist_node *n;
104 struct nf_conn *found_ct;
105 unsigned int length = 0;
106
107 *addit = true;
108
109 /* check the saved connections */
110 hlist_for_each_entry_safe(conn, n, head, node) {
111 found = nf_conntrack_find_get(net, zone, &conn->tuple);
112 if (found == NULL) {
113 hlist_del(&conn->node);
114 kmem_cache_free(conncount_conn_cachep, conn);
115 continue;
116 }
117
118 found_ct = nf_ct_tuplehash_to_ctrack(found);
119
120 if (nf_ct_tuple_equal(&conn->tuple, tuple)) {
121 /*
122 * Just to be sure we have it only once in the list.
123 * We should not see tuples twice unless someone hooks
124 * this into a table without "-p tcp --syn".
125 */
126 *addit = false;
127 } else if (already_closed(found_ct)) {
128 /*
129 * we do not care about connections which are
130 * closed already -> ditch it
131 */
132 nf_ct_put(found_ct);
133 hlist_del(&conn->node);
134 kmem_cache_free(conncount_conn_cachep, conn);
135 continue;
136 }
137
138 nf_ct_put(found_ct);
139 length++;
140 }
141
142 return length;
143}
144
145static void tree_nodes_free(struct rb_root *root,
146 struct nf_conncount_rb *gc_nodes[],
147 unsigned int gc_count)
148{
149 struct nf_conncount_rb *rbconn;
150
151 while (gc_count) {
152 rbconn = gc_nodes[--gc_count];
153 rb_erase(&rbconn->node, root);
154 kmem_cache_free(conncount_rb_cachep, rbconn);
155 }
156}
157
158static unsigned int
159count_tree(struct net *net, struct rb_root *root,
160 const u32 *key, u8 keylen,
161 u8 family,
162 const struct nf_conntrack_tuple *tuple,
163 const struct nf_conntrack_zone *zone)
164{
165 struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
166 struct rb_node **rbnode, *parent;
167 struct nf_conncount_rb *rbconn;
168 struct nf_conncount_tuple *conn;
169 unsigned int gc_count;
170 bool no_gc = false;
171
172 restart:
173 gc_count = 0;
174 parent = NULL;
175 rbnode = &(root->rb_node);
176 while (*rbnode) {
177 int diff;
178 bool addit;
179
180 rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node);
181
182 parent = *rbnode;
183 diff = key_diff(key, rbconn->key, keylen);
184 if (diff < 0) {
185 rbnode = &((*rbnode)->rb_left);
186 } else if (diff > 0) {
187 rbnode = &((*rbnode)->rb_right);
188 } else {
189 /* same source network -> be counted! */
190 unsigned int count;
191 count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
192
193 tree_nodes_free(root, gc_nodes, gc_count);
194 if (!addit)
195 return count;
196
197 if (!add_hlist(&rbconn->hhead, tuple))
198 return 0; /* hotdrop */
199
200 return count + 1;
201 }
202
203 if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
204 continue;
205
206 /* only used for GC on hhead, retval and 'addit' ignored */
207 check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
208 if (hlist_empty(&rbconn->hhead))
209 gc_nodes[gc_count++] = rbconn;
210 }
211
212 if (gc_count) {
213 no_gc = true;
214 tree_nodes_free(root, gc_nodes, gc_count);
215 /* tree_node_free before new allocation permits
216 * allocator to re-use newly free'd object.
217 *
218 * This is a rare event; in most cases we will find
219 * existing node to re-use. (or gc_count is 0).
220 */
221 goto restart;
222 }
223
224 /* no match, need to insert new node */
225 rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
226 if (rbconn == NULL)
227 return 0;
228
229 conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
230 if (conn == NULL) {
231 kmem_cache_free(conncount_rb_cachep, rbconn);
232 return 0;
233 }
234
235 conn->tuple = *tuple;
236 memcpy(rbconn->key, key, sizeof(u32) * keylen);
237
238 INIT_HLIST_HEAD(&rbconn->hhead);
239 hlist_add_head(&conn->node, &rbconn->hhead);
240
241 rb_link_node(&rbconn->node, parent, rbnode);
242 rb_insert_color(&rbconn->node, root);
243 return 1;
244}
245
246unsigned int nf_conncount_count(struct net *net,
247 struct nf_conncount_data *data,
248 const u32 *key,
249 unsigned int family,
250 const struct nf_conntrack_tuple *tuple,
251 const struct nf_conntrack_zone *zone)
252{
253 struct rb_root *root;
254 int count;
255 u32 hash;
256
257 hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS;
258 root = &data->root[hash];
259
260 spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
261
262 count = count_tree(net, root, key, data->keylen, family, tuple, zone);
263
264 spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
265
266 return count;
267}
268EXPORT_SYMBOL_GPL(nf_conncount_count);
269
270struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
271 unsigned int keylen)
272{
273 struct nf_conncount_data *data;
274 int ret, i;
275
276 if (keylen % sizeof(u32) ||
277 keylen / sizeof(u32) > MAX_KEYLEN ||
278 keylen == 0)
279 return ERR_PTR(-EINVAL);
280
281 net_get_random_once(&conncount_rnd, sizeof(conncount_rnd));
282
283 data = kmalloc(sizeof(*data), GFP_KERNEL);
284 if (!data)
285 return ERR_PTR(-ENOMEM);
286
287 ret = nf_ct_netns_get(net, family);
288 if (ret < 0) {
289 kfree(data);
290 return ERR_PTR(ret);
291 }
292
293 for (i = 0; i < ARRAY_SIZE(data->root); ++i)
294 data->root[i] = RB_ROOT;
295
296 data->keylen = keylen / sizeof(u32);
297
298 return data;
299}
300EXPORT_SYMBOL_GPL(nf_conncount_init);
301
302static void destroy_tree(struct rb_root *r)
303{
304 struct nf_conncount_tuple *conn;
305 struct nf_conncount_rb *rbconn;
306 struct hlist_node *n;
307 struct rb_node *node;
308
309 while ((node = rb_first(r)) != NULL) {
310 rbconn = rb_entry(node, struct nf_conncount_rb, node);
311
312 rb_erase(node, r);
313
314 hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node)
315 kmem_cache_free(conncount_conn_cachep, conn);
316
317 kmem_cache_free(conncount_rb_cachep, rbconn);
318 }
319}
320
321void nf_conncount_destroy(struct net *net, unsigned int family,
322 struct nf_conncount_data *data)
323{
324 unsigned int i;
325
326 nf_ct_netns_put(net, family);
327
328 for (i = 0; i < ARRAY_SIZE(data->root); ++i)
329 destroy_tree(&data->root[i]);
330
331 kfree(data);
332}
333EXPORT_SYMBOL_GPL(nf_conncount_destroy);
334
335static int __init nf_conncount_modinit(void)
336{
337 int i;
338
339 BUILD_BUG_ON(CONNCOUNT_LOCK_SLOTS > CONNCOUNT_SLOTS);
340 BUILD_BUG_ON((CONNCOUNT_SLOTS % CONNCOUNT_LOCK_SLOTS) != 0);
341
342 for (i = 0; i < CONNCOUNT_LOCK_SLOTS; ++i)
343 spin_lock_init(&nf_conncount_locks[i]);
344
345 conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple",
346 sizeof(struct nf_conncount_tuple),
347 0, 0, NULL);
348 if (!conncount_conn_cachep)
349 return -ENOMEM;
350
351 conncount_rb_cachep = kmem_cache_create("nf_conncount_rb",
352 sizeof(struct nf_conncount_rb),
353 0, 0, NULL);
354 if (!conncount_rb_cachep) {
355 kmem_cache_destroy(conncount_conn_cachep);
356 return -ENOMEM;
357 }
358
359 return 0;
360}
361
362static void __exit nf_conncount_modexit(void)
363{
364 kmem_cache_destroy(conncount_conn_cachep);
365 kmem_cache_destroy(conncount_rb_cachep);
366}
367
368module_init(nf_conncount_modinit);
369module_exit(nf_conncount_modexit);
370MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
371MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
372MODULE_DESCRIPTION("netfilter: count number of connections matching a key");
373MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 85f643c1e227..705198de671d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -58,8 +58,6 @@
58 58
59#include "nf_internals.h" 59#include "nf_internals.h"
60 60
61#define NF_CONNTRACK_VERSION "0.5.0"
62
63int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, 61int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
64 enum nf_nat_manip_type manip, 62 enum nf_nat_manip_type manip,
65 const struct nlattr *attr) __read_mostly; 63 const struct nlattr *attr) __read_mostly;
@@ -901,6 +899,9 @@ static unsigned int early_drop_list(struct net *net,
901 hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { 899 hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
902 tmp = nf_ct_tuplehash_to_ctrack(h); 900 tmp = nf_ct_tuplehash_to_ctrack(h);
903 901
902 if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
903 continue;
904
904 if (nf_ct_is_expired(tmp)) { 905 if (nf_ct_is_expired(tmp)) {
905 nf_ct_gc_expired(tmp); 906 nf_ct_gc_expired(tmp);
906 continue; 907 continue;
@@ -975,6 +976,18 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
975 return false; 976 return false;
976} 977}
977 978
979#define DAY (86400 * HZ)
980
981/* Set an arbitrary timeout large enough not to ever expire, this save
982 * us a check for the IPS_OFFLOAD_BIT from the packet path via
983 * nf_ct_is_expired().
984 */
985static void nf_ct_offload_timeout(struct nf_conn *ct)
986{
987 if (nf_ct_expires(ct) < DAY / 2)
988 ct->timeout = nfct_time_stamp + DAY;
989}
990
978static void gc_worker(struct work_struct *work) 991static void gc_worker(struct work_struct *work)
979{ 992{
980 unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); 993 unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
@@ -1011,6 +1024,11 @@ static void gc_worker(struct work_struct *work)
1011 tmp = nf_ct_tuplehash_to_ctrack(h); 1024 tmp = nf_ct_tuplehash_to_ctrack(h);
1012 1025
1013 scanned++; 1026 scanned++;
1027 if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
1028 nf_ct_offload_timeout(tmp);
1029 continue;
1030 }
1031
1014 if (nf_ct_is_expired(tmp)) { 1032 if (nf_ct_is_expired(tmp)) {
1015 nf_ct_gc_expired(tmp); 1033 nf_ct_gc_expired(tmp);
1016 expired_count++; 1034 expired_count++;
@@ -1044,7 +1062,7 @@ static void gc_worker(struct work_struct *work)
1044 * we will just continue with next hash slot. 1062 * we will just continue with next hash slot.
1045 */ 1063 */
1046 rcu_read_unlock(); 1064 rcu_read_unlock();
1047 cond_resched_rcu_qs(); 1065 cond_resched();
1048 } while (++buckets < goal); 1066 } while (++buckets < goal);
1049 1067
1050 if (gc_work->exiting) 1068 if (gc_work->exiting)
@@ -2048,10 +2066,6 @@ int nf_conntrack_init_start(void)
2048 if (!nf_conntrack_cachep) 2066 if (!nf_conntrack_cachep)
2049 goto err_cachep; 2067 goto err_cachep;
2050 2068
2051 printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
2052 NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
2053 nf_conntrack_max);
2054
2055 ret = nf_conntrack_expect_init(); 2069 ret = nf_conntrack_expect_init();
2056 if (ret < 0) 2070 if (ret < 0)
2057 goto err_expect; 2071 goto err_expect;
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index d6748a8a79c5..8ef21d9f9a00 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -649,7 +649,6 @@ static int exp_open(struct inode *inode, struct file *file)
649} 649}
650 650
651static const struct file_operations exp_file_ops = { 651static const struct file_operations exp_file_ops = {
652 .owner = THIS_MODULE,
653 .open = exp_open, 652 .open = exp_open,
654 .read = seq_read, 653 .read = seq_read,
655 .llseek = seq_lseek, 654 .llseek = seq_lseek,
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index dc6347342e34..1601275efe2d 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -1,4 +1,4 @@
1/**************************************************************************** 1/*
2 * ip_conntrack_helper_h323_asn1.c - BER and PER decoding library for H.323 2 * ip_conntrack_helper_h323_asn1.c - BER and PER decoding library for H.323
3 * conntrack/NAT module. 3 * conntrack/NAT module.
4 * 4 *
@@ -8,7 +8,7 @@
8 * 8 *
9 * See ip_conntrack_helper_h323_asn1.h for details. 9 * See ip_conntrack_helper_h323_asn1.h for details.
10 * 10 *
11 ****************************************************************************/ 11 */
12 12
13#ifdef __KERNEL__ 13#ifdef __KERNEL__
14#include <linux/kernel.h> 14#include <linux/kernel.h>
@@ -140,14 +140,15 @@ static const decoder_t Decoders[] = {
140 decode_choice, 140 decode_choice,
141}; 141};
142 142
143/**************************************************************************** 143/*
144 * H.323 Types 144 * H.323 Types
145 ****************************************************************************/ 145 */
146#include "nf_conntrack_h323_types.c" 146#include "nf_conntrack_h323_types.c"
147 147
148/**************************************************************************** 148/*
149 * Functions 149 * Functions
150 ****************************************************************************/ 150 */
151
151/* Assume bs is aligned && v < 16384 */ 152/* Assume bs is aligned && v < 16384 */
152static unsigned int get_len(struct bitstr *bs) 153static unsigned int get_len(struct bitstr *bs)
153{ 154{
@@ -177,7 +178,6 @@ static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes, size_t bits)
177 return 0; 178 return 0;
178} 179}
179 180
180/****************************************************************************/
181static unsigned int get_bit(struct bitstr *bs) 181static unsigned int get_bit(struct bitstr *bs)
182{ 182{
183 unsigned int b = (*bs->cur) & (0x80 >> bs->bit); 183 unsigned int b = (*bs->cur) & (0x80 >> bs->bit);
@@ -187,7 +187,6 @@ static unsigned int get_bit(struct bitstr *bs)
187 return b; 187 return b;
188} 188}
189 189
190/****************************************************************************/
191/* Assume b <= 8 */ 190/* Assume b <= 8 */
192static unsigned int get_bits(struct bitstr *bs, unsigned int b) 191static unsigned int get_bits(struct bitstr *bs, unsigned int b)
193{ 192{
@@ -213,7 +212,6 @@ static unsigned int get_bits(struct bitstr *bs, unsigned int b)
213 return v; 212 return v;
214} 213}
215 214
216/****************************************************************************/
217/* Assume b <= 32 */ 215/* Assume b <= 32 */
218static unsigned int get_bitmap(struct bitstr *bs, unsigned int b) 216static unsigned int get_bitmap(struct bitstr *bs, unsigned int b)
219{ 217{
@@ -251,9 +249,9 @@ static unsigned int get_bitmap(struct bitstr *bs, unsigned int b)
251 return v; 249 return v;
252} 250}
253 251
254/**************************************************************************** 252/*
255 * Assume bs is aligned and sizeof(unsigned int) == 4 253 * Assume bs is aligned and sizeof(unsigned int) == 4
256 ****************************************************************************/ 254 */
257static unsigned int get_uint(struct bitstr *bs, int b) 255static unsigned int get_uint(struct bitstr *bs, int b)
258{ 256{
259 unsigned int v = 0; 257 unsigned int v = 0;
@@ -262,12 +260,15 @@ static unsigned int get_uint(struct bitstr *bs, int b)
262 case 4: 260 case 4:
263 v |= *bs->cur++; 261 v |= *bs->cur++;
264 v <<= 8; 262 v <<= 8;
263 /* fall through */
265 case 3: 264 case 3:
266 v |= *bs->cur++; 265 v |= *bs->cur++;
267 v <<= 8; 266 v <<= 8;
267 /* fall through */
268 case 2: 268 case 2:
269 v |= *bs->cur++; 269 v |= *bs->cur++;
270 v <<= 8; 270 v <<= 8;
271 /* fall through */
271 case 1: 272 case 1:
272 v |= *bs->cur++; 273 v |= *bs->cur++;
273 break; 274 break;
@@ -275,7 +276,6 @@ static unsigned int get_uint(struct bitstr *bs, int b)
275 return v; 276 return v;
276} 277}
277 278
278/****************************************************************************/
279static int decode_nul(struct bitstr *bs, const struct field_t *f, 279static int decode_nul(struct bitstr *bs, const struct field_t *f,
280 char *base, int level) 280 char *base, int level)
281{ 281{
@@ -284,7 +284,6 @@ static int decode_nul(struct bitstr *bs, const struct field_t *f,
284 return H323_ERROR_NONE; 284 return H323_ERROR_NONE;
285} 285}
286 286
287/****************************************************************************/
288static int decode_bool(struct bitstr *bs, const struct field_t *f, 287static int decode_bool(struct bitstr *bs, const struct field_t *f,
289 char *base, int level) 288 char *base, int level)
290{ 289{
@@ -296,7 +295,6 @@ static int decode_bool(struct bitstr *bs, const struct field_t *f,
296 return H323_ERROR_NONE; 295 return H323_ERROR_NONE;
297} 296}
298 297
299/****************************************************************************/
300static int decode_oid(struct bitstr *bs, const struct field_t *f, 298static int decode_oid(struct bitstr *bs, const struct field_t *f,
301 char *base, int level) 299 char *base, int level)
302{ 300{
@@ -316,7 +314,6 @@ static int decode_oid(struct bitstr *bs, const struct field_t *f,
316 return H323_ERROR_NONE; 314 return H323_ERROR_NONE;
317} 315}
318 316
319/****************************************************************************/
320static int decode_int(struct bitstr *bs, const struct field_t *f, 317static int decode_int(struct bitstr *bs, const struct field_t *f,
321 char *base, int level) 318 char *base, int level)
322{ 319{
@@ -364,7 +361,6 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
364 return H323_ERROR_NONE; 361 return H323_ERROR_NONE;
365} 362}
366 363
367/****************************************************************************/
368static int decode_enum(struct bitstr *bs, const struct field_t *f, 364static int decode_enum(struct bitstr *bs, const struct field_t *f,
369 char *base, int level) 365 char *base, int level)
370{ 366{
@@ -381,7 +377,6 @@ static int decode_enum(struct bitstr *bs, const struct field_t *f,
381 return H323_ERROR_NONE; 377 return H323_ERROR_NONE;
382} 378}
383 379
384/****************************************************************************/
385static int decode_bitstr(struct bitstr *bs, const struct field_t *f, 380static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
386 char *base, int level) 381 char *base, int level)
387{ 382{
@@ -418,7 +413,6 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
418 return H323_ERROR_NONE; 413 return H323_ERROR_NONE;
419} 414}
420 415
421/****************************************************************************/
422static int decode_numstr(struct bitstr *bs, const struct field_t *f, 416static int decode_numstr(struct bitstr *bs, const struct field_t *f,
423 char *base, int level) 417 char *base, int level)
424{ 418{
@@ -439,7 +433,6 @@ static int decode_numstr(struct bitstr *bs, const struct field_t *f,
439 return H323_ERROR_NONE; 433 return H323_ERROR_NONE;
440} 434}
441 435
442/****************************************************************************/
443static int decode_octstr(struct bitstr *bs, const struct field_t *f, 436static int decode_octstr(struct bitstr *bs, const struct field_t *f,
444 char *base, int level) 437 char *base, int level)
445{ 438{
@@ -493,7 +486,6 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f,
493 return H323_ERROR_NONE; 486 return H323_ERROR_NONE;
494} 487}
495 488
496/****************************************************************************/
497static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, 489static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
498 char *base, int level) 490 char *base, int level)
499{ 491{
@@ -523,7 +515,6 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
523 return H323_ERROR_NONE; 515 return H323_ERROR_NONE;
524} 516}
525 517
526/****************************************************************************/
527static int decode_seq(struct bitstr *bs, const struct field_t *f, 518static int decode_seq(struct bitstr *bs, const struct field_t *f,
528 char *base, int level) 519 char *base, int level)
529{ 520{
@@ -653,7 +644,6 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
653 return H323_ERROR_NONE; 644 return H323_ERROR_NONE;
654} 645}
655 646
656/****************************************************************************/
657static int decode_seqof(struct bitstr *bs, const struct field_t *f, 647static int decode_seqof(struct bitstr *bs, const struct field_t *f,
658 char *base, int level) 648 char *base, int level)
659{ 649{
@@ -750,8 +740,6 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
750 return H323_ERROR_NONE; 740 return H323_ERROR_NONE;
751} 741}
752 742
753
754/****************************************************************************/
755static int decode_choice(struct bitstr *bs, const struct field_t *f, 743static int decode_choice(struct bitstr *bs, const struct field_t *f,
756 char *base, int level) 744 char *base, int level)
757{ 745{
@@ -833,7 +821,6 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
833 return H323_ERROR_NONE; 821 return H323_ERROR_NONE;
834} 822}
835 823
836/****************************************************************************/
837int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras) 824int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras)
838{ 825{
839 static const struct field_t ras_message = { 826 static const struct field_t ras_message = {
@@ -849,7 +836,6 @@ int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras)
849 return decode_choice(&bs, &ras_message, (char *) ras, 0); 836 return decode_choice(&bs, &ras_message, (char *) ras, 0);
850} 837}
851 838
852/****************************************************************************/
853static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg, 839static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg,
854 size_t sz, H323_UserInformation *uuie) 840 size_t sz, H323_UserInformation *uuie)
855{ 841{
@@ -867,7 +853,6 @@ static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg,
867 return decode_seq(&bs, &h323_userinformation, (char *) uuie, 0); 853 return decode_seq(&bs, &h323_userinformation, (char *) uuie, 0);
868} 854}
869 855
870/****************************************************************************/
871int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz, 856int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz,
872 MultimediaSystemControlMessage * 857 MultimediaSystemControlMessage *
873 mscm) 858 mscm)
@@ -886,7 +871,6 @@ int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz,
886 (char *) mscm, 0); 871 (char *) mscm, 0);
887} 872}
888 873
889/****************************************************************************/
890int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931) 874int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931)
891{ 875{
892 unsigned char *p = buf; 876 unsigned char *p = buf;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index f71f0d2558fd..005589c6d0f6 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -24,6 +24,7 @@
24#include <linux/skbuff.h> 24#include <linux/skbuff.h>
25#include <net/route.h> 25#include <net/route.h>
26#include <net/ip6_route.h> 26#include <net/ip6_route.h>
27#include <linux/netfilter_ipv6.h>
27 28
28#include <net/netfilter/nf_conntrack.h> 29#include <net/netfilter/nf_conntrack.h>
29#include <net/netfilter/nf_conntrack_core.h> 30#include <net/netfilter/nf_conntrack_core.h>
@@ -115,7 +116,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_h245;
115static struct nf_conntrack_helper nf_conntrack_helper_q931[]; 116static struct nf_conntrack_helper nf_conntrack_helper_q931[];
116static struct nf_conntrack_helper nf_conntrack_helper_ras[]; 117static struct nf_conntrack_helper nf_conntrack_helper_ras[];
117 118
118/****************************************************************************/
119static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff, 119static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
120 struct nf_conn *ct, enum ip_conntrack_info ctinfo, 120 struct nf_conn *ct, enum ip_conntrack_info ctinfo,
121 unsigned char **data, int *datalen, int *dataoff) 121 unsigned char **data, int *datalen, int *dataoff)
@@ -219,7 +219,6 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
219 return 0; 219 return 0;
220} 220}
221 221
222/****************************************************************************/
223static int get_h245_addr(struct nf_conn *ct, const unsigned char *data, 222static int get_h245_addr(struct nf_conn *ct, const unsigned char *data,
224 H245_TransportAddress *taddr, 223 H245_TransportAddress *taddr,
225 union nf_inet_addr *addr, __be16 *port) 224 union nf_inet_addr *addr, __be16 *port)
@@ -254,7 +253,6 @@ static int get_h245_addr(struct nf_conn *ct, const unsigned char *data,
254 return 1; 253 return 1;
255} 254}
256 255
257/****************************************************************************/
258static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, 256static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
259 enum ip_conntrack_info ctinfo, 257 enum ip_conntrack_info ctinfo,
260 unsigned int protoff, 258 unsigned int protoff,
@@ -328,7 +326,6 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
328 return ret; 326 return ret;
329} 327}
330 328
331/****************************************************************************/
332static int expect_t120(struct sk_buff *skb, 329static int expect_t120(struct sk_buff *skb,
333 struct nf_conn *ct, 330 struct nf_conn *ct,
334 enum ip_conntrack_info ctinfo, 331 enum ip_conntrack_info ctinfo,
@@ -380,7 +377,6 @@ static int expect_t120(struct sk_buff *skb,
380 return ret; 377 return ret;
381} 378}
382 379
383/****************************************************************************/
384static int process_h245_channel(struct sk_buff *skb, 380static int process_h245_channel(struct sk_buff *skb,
385 struct nf_conn *ct, 381 struct nf_conn *ct,
386 enum ip_conntrack_info ctinfo, 382 enum ip_conntrack_info ctinfo,
@@ -410,7 +406,6 @@ static int process_h245_channel(struct sk_buff *skb,
410 return 0; 406 return 0;
411} 407}
412 408
413/****************************************************************************/
414static int process_olc(struct sk_buff *skb, struct nf_conn *ct, 409static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
415 enum ip_conntrack_info ctinfo, 410 enum ip_conntrack_info ctinfo,
416 unsigned int protoff, 411 unsigned int protoff,
@@ -472,7 +467,6 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
472 return 0; 467 return 0;
473} 468}
474 469
475/****************************************************************************/
476static int process_olca(struct sk_buff *skb, struct nf_conn *ct, 470static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
477 enum ip_conntrack_info ctinfo, 471 enum ip_conntrack_info ctinfo,
478 unsigned int protoff, unsigned char **data, int dataoff, 472 unsigned int protoff, unsigned char **data, int dataoff,
@@ -542,7 +536,6 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
542 return 0; 536 return 0;
543} 537}
544 538
545/****************************************************************************/
546static int process_h245(struct sk_buff *skb, struct nf_conn *ct, 539static int process_h245(struct sk_buff *skb, struct nf_conn *ct,
547 enum ip_conntrack_info ctinfo, 540 enum ip_conntrack_info ctinfo,
548 unsigned int protoff, unsigned char **data, int dataoff, 541 unsigned int protoff, unsigned char **data, int dataoff,
@@ -578,7 +571,6 @@ static int process_h245(struct sk_buff *skb, struct nf_conn *ct,
578 return 0; 571 return 0;
579} 572}
580 573
581/****************************************************************************/
582static int h245_help(struct sk_buff *skb, unsigned int protoff, 574static int h245_help(struct sk_buff *skb, unsigned int protoff,
583 struct nf_conn *ct, enum ip_conntrack_info ctinfo) 575 struct nf_conn *ct, enum ip_conntrack_info ctinfo)
584{ 576{
@@ -628,7 +620,6 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff,
628 return NF_DROP; 620 return NF_DROP;
629} 621}
630 622
631/****************************************************************************/
632static const struct nf_conntrack_expect_policy h245_exp_policy = { 623static const struct nf_conntrack_expect_policy h245_exp_policy = {
633 .max_expected = H323_RTP_CHANNEL_MAX * 4 + 2 /* T.120 */, 624 .max_expected = H323_RTP_CHANNEL_MAX * 4 + 2 /* T.120 */,
634 .timeout = 240, 625 .timeout = 240,
@@ -643,7 +634,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = {
643 .expect_policy = &h245_exp_policy, 634 .expect_policy = &h245_exp_policy,
644}; 635};
645 636
646/****************************************************************************/
647int get_h225_addr(struct nf_conn *ct, unsigned char *data, 637int get_h225_addr(struct nf_conn *ct, unsigned char *data,
648 TransportAddress *taddr, 638 TransportAddress *taddr,
649 union nf_inet_addr *addr, __be16 *port) 639 union nf_inet_addr *addr, __be16 *port)
@@ -675,7 +665,6 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data,
675 return 1; 665 return 1;
676} 666}
677 667
678/****************************************************************************/
679static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, 668static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
680 enum ip_conntrack_info ctinfo, 669 enum ip_conntrack_info ctinfo,
681 unsigned int protoff, unsigned char **data, int dataoff, 670 unsigned int protoff, unsigned char **data, int dataoff,
@@ -726,20 +715,15 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
726} 715}
727 716
728/* If the calling party is on the same side of the forward-to party, 717/* If the calling party is on the same side of the forward-to party,
729 * we don't need to track the second call */ 718 * we don't need to track the second call
719 */
730static int callforward_do_filter(struct net *net, 720static int callforward_do_filter(struct net *net,
731 const union nf_inet_addr *src, 721 const union nf_inet_addr *src,
732 const union nf_inet_addr *dst, 722 const union nf_inet_addr *dst,
733 u_int8_t family) 723 u_int8_t family)
734{ 724{
735 const struct nf_afinfo *afinfo;
736 int ret = 0; 725 int ret = 0;
737 726
738 /* rcu_read_lock()ed by nf_hook_thresh */
739 afinfo = nf_get_afinfo(family);
740 if (!afinfo)
741 return 0;
742
743 switch (family) { 727 switch (family) {
744 case AF_INET: { 728 case AF_INET: {
745 struct flowi4 fl1, fl2; 729 struct flowi4 fl1, fl2;
@@ -750,10 +734,10 @@ static int callforward_do_filter(struct net *net,
750 734
751 memset(&fl2, 0, sizeof(fl2)); 735 memset(&fl2, 0, sizeof(fl2));
752 fl2.daddr = dst->ip; 736 fl2.daddr = dst->ip;
753 if (!afinfo->route(net, (struct dst_entry **)&rt1, 737 if (!nf_ip_route(net, (struct dst_entry **)&rt1,
754 flowi4_to_flowi(&fl1), false)) { 738 flowi4_to_flowi(&fl1), false)) {
755 if (!afinfo->route(net, (struct dst_entry **)&rt2, 739 if (!nf_ip_route(net, (struct dst_entry **)&rt2,
756 flowi4_to_flowi(&fl2), false)) { 740 flowi4_to_flowi(&fl2), false)) {
757 if (rt_nexthop(rt1, fl1.daddr) == 741 if (rt_nexthop(rt1, fl1.daddr) ==
758 rt_nexthop(rt2, fl2.daddr) && 742 rt_nexthop(rt2, fl2.daddr) &&
759 rt1->dst.dev == rt2->dst.dev) 743 rt1->dst.dev == rt2->dst.dev)
@@ -766,18 +750,23 @@ static int callforward_do_filter(struct net *net,
766 } 750 }
767#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) 751#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
768 case AF_INET6: { 752 case AF_INET6: {
769 struct flowi6 fl1, fl2; 753 const struct nf_ipv6_ops *v6ops;
770 struct rt6_info *rt1, *rt2; 754 struct rt6_info *rt1, *rt2;
755 struct flowi6 fl1, fl2;
756
757 v6ops = nf_get_ipv6_ops();
758 if (!v6ops)
759 return 0;
771 760
772 memset(&fl1, 0, sizeof(fl1)); 761 memset(&fl1, 0, sizeof(fl1));
773 fl1.daddr = src->in6; 762 fl1.daddr = src->in6;
774 763
775 memset(&fl2, 0, sizeof(fl2)); 764 memset(&fl2, 0, sizeof(fl2));
776 fl2.daddr = dst->in6; 765 fl2.daddr = dst->in6;
777 if (!afinfo->route(net, (struct dst_entry **)&rt1, 766 if (!v6ops->route(net, (struct dst_entry **)&rt1,
778 flowi6_to_flowi(&fl1), false)) { 767 flowi6_to_flowi(&fl1), false)) {
779 if (!afinfo->route(net, (struct dst_entry **)&rt2, 768 if (!v6ops->route(net, (struct dst_entry **)&rt2,
780 flowi6_to_flowi(&fl2), false)) { 769 flowi6_to_flowi(&fl2), false)) {
781 if (ipv6_addr_equal(rt6_nexthop(rt1, &fl1.daddr), 770 if (ipv6_addr_equal(rt6_nexthop(rt1, &fl1.daddr),
782 rt6_nexthop(rt2, &fl2.daddr)) && 771 rt6_nexthop(rt2, &fl2.daddr)) &&
783 rt1->dst.dev == rt2->dst.dev) 772 rt1->dst.dev == rt2->dst.dev)
@@ -794,7 +783,6 @@ static int callforward_do_filter(struct net *net,
794 783
795} 784}
796 785
797/****************************************************************************/
798static int expect_callforwarding(struct sk_buff *skb, 786static int expect_callforwarding(struct sk_buff *skb,
799 struct nf_conn *ct, 787 struct nf_conn *ct,
800 enum ip_conntrack_info ctinfo, 788 enum ip_conntrack_info ctinfo,
@@ -815,7 +803,8 @@ static int expect_callforwarding(struct sk_buff *skb,
815 return 0; 803 return 0;
816 804
817 /* If the calling party is on the same side of the forward-to party, 805 /* If the calling party is on the same side of the forward-to party,
818 * we don't need to track the second call */ 806 * we don't need to track the second call
807 */
819 if (callforward_filter && 808 if (callforward_filter &&
820 callforward_do_filter(net, &addr, &ct->tuplehash[!dir].tuple.src.u3, 809 callforward_do_filter(net, &addr, &ct->tuplehash[!dir].tuple.src.u3,
821 nf_ct_l3num(ct))) { 810 nf_ct_l3num(ct))) {
@@ -854,7 +843,6 @@ static int expect_callforwarding(struct sk_buff *skb,
854 return ret; 843 return ret;
855} 844}
856 845
857/****************************************************************************/
858static int process_setup(struct sk_buff *skb, struct nf_conn *ct, 846static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
859 enum ip_conntrack_info ctinfo, 847 enum ip_conntrack_info ctinfo,
860 unsigned int protoff, 848 unsigned int protoff,
@@ -925,7 +913,6 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
925 return 0; 913 return 0;
926} 914}
927 915
928/****************************************************************************/
929static int process_callproceeding(struct sk_buff *skb, 916static int process_callproceeding(struct sk_buff *skb,
930 struct nf_conn *ct, 917 struct nf_conn *ct,
931 enum ip_conntrack_info ctinfo, 918 enum ip_conntrack_info ctinfo,
@@ -958,7 +945,6 @@ static int process_callproceeding(struct sk_buff *skb,
958 return 0; 945 return 0;
959} 946}
960 947
961/****************************************************************************/
962static int process_connect(struct sk_buff *skb, struct nf_conn *ct, 948static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
963 enum ip_conntrack_info ctinfo, 949 enum ip_conntrack_info ctinfo,
964 unsigned int protoff, 950 unsigned int protoff,
@@ -990,7 +976,6 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
990 return 0; 976 return 0;
991} 977}
992 978
993/****************************************************************************/
994static int process_alerting(struct sk_buff *skb, struct nf_conn *ct, 979static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
995 enum ip_conntrack_info ctinfo, 980 enum ip_conntrack_info ctinfo,
996 unsigned int protoff, 981 unsigned int protoff,
@@ -1022,7 +1007,6 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
1022 return 0; 1007 return 0;
1023} 1008}
1024 1009
1025/****************************************************************************/
1026static int process_facility(struct sk_buff *skb, struct nf_conn *ct, 1010static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
1027 enum ip_conntrack_info ctinfo, 1011 enum ip_conntrack_info ctinfo,
1028 unsigned int protoff, 1012 unsigned int protoff,
@@ -1063,7 +1047,6 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
1063 return 0; 1047 return 0;
1064} 1048}
1065 1049
1066/****************************************************************************/
1067static int process_progress(struct sk_buff *skb, struct nf_conn *ct, 1050static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
1068 enum ip_conntrack_info ctinfo, 1051 enum ip_conntrack_info ctinfo,
1069 unsigned int protoff, 1052 unsigned int protoff,
@@ -1095,7 +1078,6 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
1095 return 0; 1078 return 0;
1096} 1079}
1097 1080
1098/****************************************************************************/
1099static int process_q931(struct sk_buff *skb, struct nf_conn *ct, 1081static int process_q931(struct sk_buff *skb, struct nf_conn *ct,
1100 enum ip_conntrack_info ctinfo, 1082 enum ip_conntrack_info ctinfo,
1101 unsigned int protoff, unsigned char **data, int dataoff, 1083 unsigned int protoff, unsigned char **data, int dataoff,
@@ -1154,7 +1136,6 @@ static int process_q931(struct sk_buff *skb, struct nf_conn *ct,
1154 return 0; 1136 return 0;
1155} 1137}
1156 1138
1157/****************************************************************************/
1158static int q931_help(struct sk_buff *skb, unsigned int protoff, 1139static int q931_help(struct sk_buff *skb, unsigned int protoff,
1159 struct nf_conn *ct, enum ip_conntrack_info ctinfo) 1140 struct nf_conn *ct, enum ip_conntrack_info ctinfo)
1160{ 1141{
@@ -1203,7 +1184,6 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff,
1203 return NF_DROP; 1184 return NF_DROP;
1204} 1185}
1205 1186
1206/****************************************************************************/
1207static const struct nf_conntrack_expect_policy q931_exp_policy = { 1187static const struct nf_conntrack_expect_policy q931_exp_policy = {
1208 /* T.120 and H.245 */ 1188 /* T.120 and H.245 */
1209 .max_expected = H323_RTP_CHANNEL_MAX * 4 + 4, 1189 .max_expected = H323_RTP_CHANNEL_MAX * 4 + 4,
@@ -1231,7 +1211,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = {
1231 }, 1211 },
1232}; 1212};
1233 1213
1234/****************************************************************************/
1235static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff, 1214static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
1236 int *datalen) 1215 int *datalen)
1237{ 1216{
@@ -1249,7 +1228,6 @@ static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
1249 return skb_header_pointer(skb, dataoff, *datalen, h323_buffer); 1228 return skb_header_pointer(skb, dataoff, *datalen, h323_buffer);
1250} 1229}
1251 1230
1252/****************************************************************************/
1253static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, 1231static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
1254 union nf_inet_addr *addr, 1232 union nf_inet_addr *addr,
1255 __be16 port) 1233 __be16 port)
@@ -1270,7 +1248,6 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
1270 return NULL; 1248 return NULL;
1271} 1249}
1272 1250
1273/****************************************************************************/
1274static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, 1251static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
1275 enum ip_conntrack_info ctinfo, 1252 enum ip_conntrack_info ctinfo,
1276 unsigned int protoff, unsigned char **data, 1253 unsigned int protoff, unsigned char **data,
@@ -1328,7 +1305,6 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
1328 return ret; 1305 return ret;
1329} 1306}
1330 1307
1331/****************************************************************************/
1332static int process_grq(struct sk_buff *skb, struct nf_conn *ct, 1308static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
1333 enum ip_conntrack_info ctinfo, 1309 enum ip_conntrack_info ctinfo,
1334 unsigned int protoff, 1310 unsigned int protoff,
@@ -1346,7 +1322,6 @@ static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
1346 return 0; 1322 return 0;
1347} 1323}
1348 1324
1349/****************************************************************************/
1350static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, 1325static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
1351 enum ip_conntrack_info ctinfo, 1326 enum ip_conntrack_info ctinfo,
1352 unsigned int protoff, 1327 unsigned int protoff,
@@ -1391,7 +1366,6 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
1391 return ret; 1366 return ret;
1392} 1367}
1393 1368
1394/****************************************************************************/
1395static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, 1369static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
1396 enum ip_conntrack_info ctinfo, 1370 enum ip_conntrack_info ctinfo,
1397 unsigned int protoff, 1371 unsigned int protoff,
@@ -1428,7 +1402,6 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
1428 return 0; 1402 return 0;
1429} 1403}
1430 1404
1431/****************************************************************************/
1432static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, 1405static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
1433 enum ip_conntrack_info ctinfo, 1406 enum ip_conntrack_info ctinfo,
1434 unsigned int protoff, 1407 unsigned int protoff,
@@ -1480,7 +1453,6 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
1480 return 0; 1453 return 0;
1481} 1454}
1482 1455
1483/****************************************************************************/
1484static int process_urq(struct sk_buff *skb, struct nf_conn *ct, 1456static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
1485 enum ip_conntrack_info ctinfo, 1457 enum ip_conntrack_info ctinfo,
1486 unsigned int protoff, 1458 unsigned int protoff,
@@ -1514,7 +1486,6 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
1514 return 0; 1486 return 0;
1515} 1487}
1516 1488
1517/****************************************************************************/
1518static int process_arq(struct sk_buff *skb, struct nf_conn *ct, 1489static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
1519 enum ip_conntrack_info ctinfo, 1490 enum ip_conntrack_info ctinfo,
1520 unsigned int protoff, 1491 unsigned int protoff,
@@ -1559,7 +1530,6 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
1559 return 0; 1530 return 0;
1560} 1531}
1561 1532
1562/****************************************************************************/
1563static int process_acf(struct sk_buff *skb, struct nf_conn *ct, 1533static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
1564 enum ip_conntrack_info ctinfo, 1534 enum ip_conntrack_info ctinfo,
1565 unsigned int protoff, 1535 unsigned int protoff,
@@ -1608,7 +1578,6 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
1608 return ret; 1578 return ret;
1609} 1579}
1610 1580
1611/****************************************************************************/
1612static int process_lrq(struct sk_buff *skb, struct nf_conn *ct, 1581static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
1613 enum ip_conntrack_info ctinfo, 1582 enum ip_conntrack_info ctinfo,
1614 unsigned int protoff, 1583 unsigned int protoff,
@@ -1626,7 +1595,6 @@ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
1626 return 0; 1595 return 0;
1627} 1596}
1628 1597
1629/****************************************************************************/
1630static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, 1598static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
1631 enum ip_conntrack_info ctinfo, 1599 enum ip_conntrack_info ctinfo,
1632 unsigned int protoff, 1600 unsigned int protoff,
@@ -1666,7 +1634,6 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
1666 return ret; 1634 return ret;
1667} 1635}
1668 1636
1669/****************************************************************************/
1670static int process_irr(struct sk_buff *skb, struct nf_conn *ct, 1637static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
1671 enum ip_conntrack_info ctinfo, 1638 enum ip_conntrack_info ctinfo,
1672 unsigned int protoff, 1639 unsigned int protoff,
@@ -1700,7 +1667,6 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
1700 return 0; 1667 return 0;
1701} 1668}
1702 1669
1703/****************************************************************************/
1704static int process_ras(struct sk_buff *skb, struct nf_conn *ct, 1670static int process_ras(struct sk_buff *skb, struct nf_conn *ct,
1705 enum ip_conntrack_info ctinfo, 1671 enum ip_conntrack_info ctinfo,
1706 unsigned int protoff, 1672 unsigned int protoff,
@@ -1745,7 +1711,6 @@ static int process_ras(struct sk_buff *skb, struct nf_conn *ct,
1745 return 0; 1711 return 0;
1746} 1712}
1747 1713
1748/****************************************************************************/
1749static int ras_help(struct sk_buff *skb, unsigned int protoff, 1714static int ras_help(struct sk_buff *skb, unsigned int protoff,
1750 struct nf_conn *ct, enum ip_conntrack_info ctinfo) 1715 struct nf_conn *ct, enum ip_conntrack_info ctinfo)
1751{ 1716{
@@ -1788,7 +1753,6 @@ static int ras_help(struct sk_buff *skb, unsigned int protoff,
1788 return NF_DROP; 1753 return NF_DROP;
1789} 1754}
1790 1755
1791/****************************************************************************/
1792static const struct nf_conntrack_expect_policy ras_exp_policy = { 1756static const struct nf_conntrack_expect_policy ras_exp_policy = {
1793 .max_expected = 32, 1757 .max_expected = 32,
1794 .timeout = 240, 1758 .timeout = 240,
@@ -1849,7 +1813,6 @@ static void __exit h323_helper_exit(void)
1849 nf_conntrack_helper_unregister(&nf_conntrack_helper_h245); 1813 nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
1850} 1814}
1851 1815
1852/****************************************************************************/
1853static void __exit nf_conntrack_h323_fini(void) 1816static void __exit nf_conntrack_h323_fini(void)
1854{ 1817{
1855 h323_helper_exit(); 1818 h323_helper_exit();
@@ -1857,7 +1820,6 @@ static void __exit nf_conntrack_h323_fini(void)
1857 pr_debug("nf_ct_h323: fini\n"); 1820 pr_debug("nf_ct_h323: fini\n");
1858} 1821}
1859 1822
1860/****************************************************************************/
1861static int __init nf_conntrack_h323_init(void) 1823static int __init nf_conntrack_h323_init(void)
1862{ 1824{
1863 int ret; 1825 int ret;
@@ -1877,7 +1839,6 @@ err1:
1877 return ret; 1839 return ret;
1878} 1840}
1879 1841
1880/****************************************************************************/
1881module_init(nf_conntrack_h323_init); 1842module_init(nf_conntrack_h323_init);
1882module_exit(nf_conntrack_h323_fini); 1843module_exit(nf_conntrack_h323_fini);
1883 1844
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 382d49792f42..dd177ebee9aa 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -57,8 +57,6 @@
57 57
58MODULE_LICENSE("GPL"); 58MODULE_LICENSE("GPL");
59 59
60static char __initdata version[] = "0.93";
61
62static int ctnetlink_dump_tuples_proto(struct sk_buff *skb, 60static int ctnetlink_dump_tuples_proto(struct sk_buff *skb,
63 const struct nf_conntrack_tuple *tuple, 61 const struct nf_conntrack_tuple *tuple,
64 const struct nf_conntrack_l4proto *l4proto) 62 const struct nf_conntrack_l4proto *l4proto)
@@ -544,7 +542,7 @@ static size_t ctnetlink_proto_size(const struct nf_conn *ct)
544 len *= 3u; /* ORIG, REPLY, MASTER */ 542 len *= 3u; /* ORIG, REPLY, MASTER */
545 543
546 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 544 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
547 len += l4proto->nla_size; 545 len += l4proto->nlattr_size;
548 if (l4proto->nlattr_tuple_size) { 546 if (l4proto->nlattr_tuple_size) {
549 len4 = l4proto->nlattr_tuple_size(); 547 len4 = l4proto->nlattr_tuple_size();
550 len4 *= 3u; /* ORIG, REPLY, MASTER */ 548 len4 *= 3u; /* ORIG, REPLY, MASTER */
@@ -1110,6 +1108,14 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
1110 .len = NF_CT_LABELS_MAX_SIZE }, 1108 .len = NF_CT_LABELS_MAX_SIZE },
1111}; 1109};
1112 1110
1111static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
1112{
1113 if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
1114 return 0;
1115
1116 return ctnetlink_filter_match(ct, data);
1117}
1118
1113static int ctnetlink_flush_conntrack(struct net *net, 1119static int ctnetlink_flush_conntrack(struct net *net,
1114 const struct nlattr * const cda[], 1120 const struct nlattr * const cda[],
1115 u32 portid, int report) 1121 u32 portid, int report)
@@ -1122,7 +1128,7 @@ static int ctnetlink_flush_conntrack(struct net *net,
1122 return PTR_ERR(filter); 1128 return PTR_ERR(filter);
1123 } 1129 }
1124 1130
1125 nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter, 1131 nf_ct_iterate_cleanup_net(net, ctnetlink_flush_iterate, filter,
1126 portid, report); 1132 portid, report);
1127 kfree(filter); 1133 kfree(filter);
1128 1134
@@ -1168,6 +1174,11 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
1168 1174
1169 ct = nf_ct_tuplehash_to_ctrack(h); 1175 ct = nf_ct_tuplehash_to_ctrack(h);
1170 1176
1177 if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) {
1178 nf_ct_put(ct);
1179 return -EBUSY;
1180 }
1181
1171 if (cda[CTA_ID]) { 1182 if (cda[CTA_ID]) {
1172 u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID])); 1183 u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
1173 if (id != (u32)(unsigned long)ct) { 1184 if (id != (u32)(unsigned long)ct) {
@@ -3412,7 +3423,6 @@ static int __init ctnetlink_init(void)
3412{ 3423{
3413 int ret; 3424 int ret;
3414 3425
3415 pr_info("ctnetlink v%s: registering with nfnetlink.\n", version);
3416 ret = nfnetlink_subsys_register(&ctnl_subsys); 3426 ret = nfnetlink_subsys_register(&ctnl_subsys);
3417 if (ret < 0) { 3427 if (ret < 0) {
3418 pr_err("ctnetlink_init: cannot register with nfnetlink.\n"); 3428 pr_err("ctnetlink_init: cannot register with nfnetlink.\n");
@@ -3446,8 +3456,6 @@ err_out:
3446 3456
3447static void __exit ctnetlink_exit(void) 3457static void __exit ctnetlink_exit(void)
3448{ 3458{
3449 pr_info("ctnetlink: unregistering from nfnetlink.\n");
3450
3451 unregister_pernet_subsys(&ctnetlink_net_ops); 3459 unregister_pernet_subsys(&ctnetlink_net_ops);
3452 nfnetlink_subsys_unregister(&ctnl_exp_subsys); 3460 nfnetlink_subsys_unregister(&ctnl_exp_subsys);
3453 nfnetlink_subsys_unregister(&ctnl_subsys); 3461 nfnetlink_subsys_unregister(&ctnl_subsys);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index c8e9c9503a08..afdeca53e88b 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -385,14 +385,14 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net,
385 385
386/* FIXME: Allow NULL functions and sub in pointers to generic for 386/* FIXME: Allow NULL functions and sub in pointers to generic for
387 them. --RR */ 387 them. --RR */
388int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto) 388int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto)
389{ 389{
390 int ret = 0; 390 int ret = 0;
391 391
392 if (l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos)) 392 if (l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos))
393 return -EBUSY; 393 return -EBUSY;
394 394
395 if ((l4proto->to_nlattr && !l4proto->nlattr_size) || 395 if ((l4proto->to_nlattr && l4proto->nlattr_size == 0) ||
396 (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size)) 396 (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
397 return -EINVAL; 397 return -EINVAL;
398 398
@@ -428,10 +428,6 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto)
428 goto out_unlock; 428 goto out_unlock;
429 } 429 }
430 430
431 l4proto->nla_size = 0;
432 if (l4proto->nlattr_size)
433 l4proto->nla_size += l4proto->nlattr_size();
434
435 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], 431 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
436 l4proto); 432 l4proto);
437out_unlock: 433out_unlock:
@@ -502,7 +498,7 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net,
502} 498}
503EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one); 499EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
504 500
505int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[], 501int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
506 unsigned int num_proto) 502 unsigned int num_proto)
507{ 503{
508 int ret = -EINVAL, ver; 504 int ret = -EINVAL, ver;
@@ -524,7 +520,7 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
524EXPORT_SYMBOL_GPL(nf_ct_l4proto_register); 520EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
525 521
526int nf_ct_l4proto_pernet_register(struct net *net, 522int nf_ct_l4proto_pernet_register(struct net *net,
527 struct nf_conntrack_l4proto *const l4proto[], 523 const struct nf_conntrack_l4proto *const l4proto[],
528 unsigned int num_proto) 524 unsigned int num_proto)
529{ 525{
530 int ret = -EINVAL; 526 int ret = -EINVAL;
@@ -545,7 +541,7 @@ int nf_ct_l4proto_pernet_register(struct net *net,
545} 541}
546EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); 542EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
547 543
548void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[], 544void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
549 unsigned int num_proto) 545 unsigned int num_proto)
550{ 546{
551 mutex_lock(&nf_ct_proto_mutex); 547 mutex_lock(&nf_ct_proto_mutex);
@@ -555,12 +551,12 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
555 551
556 synchronize_net(); 552 synchronize_net();
557 /* Remove all contrack entries for this protocol */ 553 /* Remove all contrack entries for this protocol */
558 nf_ct_iterate_destroy(kill_l4proto, l4proto); 554 nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
559} 555}
560EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); 556EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
561 557
562void nf_ct_l4proto_pernet_unregister(struct net *net, 558void nf_ct_l4proto_pernet_unregister(struct net *net,
563 struct nf_conntrack_l4proto *const l4proto[], 559 const struct nf_conntrack_l4proto *const l4proto[],
564 unsigned int num_proto) 560 unsigned int num_proto)
565{ 561{
566 while (num_proto-- != 0) 562 while (num_proto-- != 0)
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 2a446f4a554c..abe647d5b8c6 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -654,6 +654,12 @@ static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = {
654 [CTA_PROTOINFO_DCCP_PAD] = { .type = NLA_UNSPEC }, 654 [CTA_PROTOINFO_DCCP_PAD] = { .type = NLA_UNSPEC },
655}; 655};
656 656
657#define DCCP_NLATTR_SIZE ( \
658 NLA_ALIGN(NLA_HDRLEN + 1) + \
659 NLA_ALIGN(NLA_HDRLEN + 1) + \
660 NLA_ALIGN(NLA_HDRLEN + sizeof(u64)) + \
661 NLA_ALIGN(NLA_HDRLEN + 0))
662
657static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) 663static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
658{ 664{
659 struct nlattr *attr = cda[CTA_PROTOINFO_DCCP]; 665 struct nlattr *attr = cda[CTA_PROTOINFO_DCCP];
@@ -691,13 +697,6 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
691 spin_unlock_bh(&ct->lock); 697 spin_unlock_bh(&ct->lock);
692 return 0; 698 return 0;
693} 699}
694
695static int dccp_nlattr_size(void)
696{
697 return nla_total_size(0) /* CTA_PROTOINFO_DCCP */
698 + nla_policy_len(dccp_nla_policy, CTA_PROTOINFO_DCCP_MAX + 1);
699}
700
701#endif 700#endif
702 701
703#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 702#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
@@ -862,7 +861,7 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
862 return &net->ct.nf_ct_proto.dccp.pn; 861 return &net->ct.nf_ct_proto.dccp.pn;
863} 862}
864 863
865struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = { 864const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = {
866 .l3proto = AF_INET, 865 .l3proto = AF_INET,
867 .l4proto = IPPROTO_DCCP, 866 .l4proto = IPPROTO_DCCP,
868 .pkt_to_tuple = dccp_pkt_to_tuple, 867 .pkt_to_tuple = dccp_pkt_to_tuple,
@@ -876,8 +875,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
876 .print_conntrack = dccp_print_conntrack, 875 .print_conntrack = dccp_print_conntrack,
877#endif 876#endif
878#if IS_ENABLED(CONFIG_NF_CT_NETLINK) 877#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
878 .nlattr_size = DCCP_NLATTR_SIZE,
879 .to_nlattr = dccp_to_nlattr, 879 .to_nlattr = dccp_to_nlattr,
880 .nlattr_size = dccp_nlattr_size,
881 .from_nlattr = nlattr_to_dccp, 880 .from_nlattr = nlattr_to_dccp,
882 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 881 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
883 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, 882 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
@@ -898,7 +897,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
898}; 897};
899EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4); 898EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
900 899
901struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = { 900const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
902 .l3proto = AF_INET6, 901 .l3proto = AF_INET6,
903 .l4proto = IPPROTO_DCCP, 902 .l4proto = IPPROTO_DCCP,
904 .pkt_to_tuple = dccp_pkt_to_tuple, 903 .pkt_to_tuple = dccp_pkt_to_tuple,
@@ -912,8 +911,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
912 .print_conntrack = dccp_print_conntrack, 911 .print_conntrack = dccp_print_conntrack,
913#endif 912#endif
914#if IS_ENABLED(CONFIG_NF_CT_NETLINK) 913#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
914 .nlattr_size = DCCP_NLATTR_SIZE,
915 .to_nlattr = dccp_to_nlattr, 915 .to_nlattr = dccp_to_nlattr,
916 .nlattr_size = dccp_nlattr_size,
917 .from_nlattr = nlattr_to_dccp, 916 .from_nlattr = nlattr_to_dccp,
918 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 917 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
919 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, 918 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 1f86ddf6649a..6c6896d21cd7 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -12,7 +12,7 @@
12#include <linux/netfilter.h> 12#include <linux/netfilter.h>
13#include <net/netfilter/nf_conntrack_l4proto.h> 13#include <net/netfilter/nf_conntrack_l4proto.h>
14 14
15static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ; 15static const unsigned int nf_ct_generic_timeout = 600*HZ;
16 16
17static bool nf_generic_should_process(u8 proto) 17static bool nf_generic_should_process(u8 proto)
18{ 18{
@@ -163,7 +163,7 @@ static struct nf_proto_net *generic_get_net_proto(struct net *net)
163 return &net->ct.nf_ct_proto.generic.pn; 163 return &net->ct.nf_ct_proto.generic.pn;
164} 164}
165 165
166struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = 166const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
167{ 167{
168 .l3proto = PF_UNSPEC, 168 .l3proto = PF_UNSPEC,
169 .l4proto = 255, 169 .l4proto = 255,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index a2503005d80b..d049ea5a3770 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -48,7 +48,7 @@ enum grep_conntrack {
48 GRE_CT_MAX 48 GRE_CT_MAX
49}; 49};
50 50
51static unsigned int gre_timeouts[GRE_CT_MAX] = { 51static const unsigned int gre_timeouts[GRE_CT_MAX] = {
52 [GRE_CT_UNREPLIED] = 30*HZ, 52 [GRE_CT_UNREPLIED] = 30*HZ,
53 [GRE_CT_REPLIED] = 180*HZ, 53 [GRE_CT_REPLIED] = 180*HZ,
54}; 54};
@@ -352,7 +352,7 @@ static int gre_init_net(struct net *net, u_int16_t proto)
352} 352}
353 353
354/* protocol helper struct */ 354/* protocol helper struct */
355static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { 355static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
356 .l3proto = AF_INET, 356 .l3proto = AF_INET,
357 .l4proto = IPPROTO_GRE, 357 .l4proto = IPPROTO_GRE,
358 .pkt_to_tuple = gre_pkt_to_tuple, 358 .pkt_to_tuple = gre_pkt_to_tuple,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 80faf04ddf15..fb9a35d16069 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -52,7 +52,7 @@ static const char *const sctp_conntrack_names[] = {
52#define HOURS * 60 MINS 52#define HOURS * 60 MINS
53#define DAYS * 24 HOURS 53#define DAYS * 24 HOURS
54 54
55static unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] __read_mostly = { 55static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
56 [SCTP_CONNTRACK_CLOSED] = 10 SECS, 56 [SCTP_CONNTRACK_CLOSED] = 10 SECS,
57 [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS, 57 [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
58 [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS, 58 [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
@@ -578,6 +578,11 @@ static const struct nla_policy sctp_nla_policy[CTA_PROTOINFO_SCTP_MAX+1] = {
578 [CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NLA_U32 }, 578 [CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NLA_U32 },
579}; 579};
580 580
581#define SCTP_NLATTR_SIZE ( \
582 NLA_ALIGN(NLA_HDRLEN + 1) + \
583 NLA_ALIGN(NLA_HDRLEN + 4) + \
584 NLA_ALIGN(NLA_HDRLEN + 4))
585
581static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) 586static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
582{ 587{
583 struct nlattr *attr = cda[CTA_PROTOINFO_SCTP]; 588 struct nlattr *attr = cda[CTA_PROTOINFO_SCTP];
@@ -608,12 +613,6 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
608 613
609 return 0; 614 return 0;
610} 615}
611
612static int sctp_nlattr_size(void)
613{
614 return nla_total_size(0) /* CTA_PROTOINFO_SCTP */
615 + nla_policy_len(sctp_nla_policy, CTA_PROTOINFO_SCTP_MAX + 1);
616}
617#endif 616#endif
618 617
619#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 618#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
@@ -778,7 +777,7 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net)
778 return &net->ct.nf_ct_proto.sctp.pn; 777 return &net->ct.nf_ct_proto.sctp.pn;
779} 778}
780 779
781struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { 780const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = {
782 .l3proto = PF_INET, 781 .l3proto = PF_INET,
783 .l4proto = IPPROTO_SCTP, 782 .l4proto = IPPROTO_SCTP,
784 .pkt_to_tuple = sctp_pkt_to_tuple, 783 .pkt_to_tuple = sctp_pkt_to_tuple,
@@ -793,8 +792,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
793 .can_early_drop = sctp_can_early_drop, 792 .can_early_drop = sctp_can_early_drop,
794 .me = THIS_MODULE, 793 .me = THIS_MODULE,
795#if IS_ENABLED(CONFIG_NF_CT_NETLINK) 794#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
795 .nlattr_size = SCTP_NLATTR_SIZE,
796 .to_nlattr = sctp_to_nlattr, 796 .to_nlattr = sctp_to_nlattr,
797 .nlattr_size = sctp_nlattr_size,
798 .from_nlattr = nlattr_to_sctp, 797 .from_nlattr = nlattr_to_sctp,
799 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 798 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
800 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, 799 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
@@ -815,7 +814,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
815}; 814};
816EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4); 815EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
817 816
818struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { 817const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
819 .l3proto = PF_INET6, 818 .l3proto = PF_INET6,
820 .l4proto = IPPROTO_SCTP, 819 .l4proto = IPPROTO_SCTP,
821 .pkt_to_tuple = sctp_pkt_to_tuple, 820 .pkt_to_tuple = sctp_pkt_to_tuple,
@@ -830,8 +829,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
830 .can_early_drop = sctp_can_early_drop, 829 .can_early_drop = sctp_can_early_drop,
831 .me = THIS_MODULE, 830 .me = THIS_MODULE,
832#if IS_ENABLED(CONFIG_NF_CT_NETLINK) 831#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
832 .nlattr_size = SCTP_NLATTR_SIZE,
833 .to_nlattr = sctp_to_nlattr, 833 .to_nlattr = sctp_to_nlattr,
834 .nlattr_size = sctp_nlattr_size,
835 .from_nlattr = nlattr_to_sctp, 834 .from_nlattr = nlattr_to_sctp,
836 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 835 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
837 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, 836 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 37ef35b861f2..e97cdc1cf98c 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -68,7 +68,7 @@ static const char *const tcp_conntrack_names[] = {
68#define HOURS * 60 MINS 68#define HOURS * 60 MINS
69#define DAYS * 24 HOURS 69#define DAYS * 24 HOURS
70 70
71static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = { 71static const unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] = {
72 [TCP_CONNTRACK_SYN_SENT] = 2 MINS, 72 [TCP_CONNTRACK_SYN_SENT] = 2 MINS,
73 [TCP_CONNTRACK_SYN_RECV] = 60 SECS, 73 [TCP_CONNTRACK_SYN_RECV] = 60 SECS,
74 [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS, 74 [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS,
@@ -305,6 +305,9 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
305/* Print out the private part of the conntrack. */ 305/* Print out the private part of the conntrack. */
306static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) 306static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
307{ 307{
308 if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
309 return;
310
308 seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]); 311 seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
309} 312}
310#endif 313#endif
@@ -1222,6 +1225,12 @@ static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1222 [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, 1225 [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
1223}; 1226};
1224 1227
1228#define TCP_NLATTR_SIZE ( \
1229 NLA_ALIGN(NLA_HDRLEN + 1) + \
1230 NLA_ALIGN(NLA_HDRLEN + 1) + \
1231 NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))) + \
1232 NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))))
1233
1225static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) 1234static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1226{ 1235{
1227 struct nlattr *pattr = cda[CTA_PROTOINFO_TCP]; 1236 struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
@@ -1274,12 +1283,6 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1274 return 0; 1283 return 0;
1275} 1284}
1276 1285
1277static int tcp_nlattr_size(void)
1278{
1279 return nla_total_size(0) /* CTA_PROTOINFO_TCP */
1280 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1281}
1282
1283static unsigned int tcp_nlattr_tuple_size(void) 1286static unsigned int tcp_nlattr_tuple_size(void)
1284{ 1287{
1285 static unsigned int size __read_mostly; 1288 static unsigned int size __read_mostly;
@@ -1541,7 +1544,7 @@ static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1541 return &net->ct.nf_ct_proto.tcp.pn; 1544 return &net->ct.nf_ct_proto.tcp.pn;
1542} 1545}
1543 1546
1544struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = 1547const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
1545{ 1548{
1546 .l3proto = PF_INET, 1549 .l3proto = PF_INET,
1547 .l4proto = IPPROTO_TCP, 1550 .l4proto = IPPROTO_TCP,
@@ -1557,11 +1560,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1557 .can_early_drop = tcp_can_early_drop, 1560 .can_early_drop = tcp_can_early_drop,
1558#if IS_ENABLED(CONFIG_NF_CT_NETLINK) 1561#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1559 .to_nlattr = tcp_to_nlattr, 1562 .to_nlattr = tcp_to_nlattr,
1560 .nlattr_size = tcp_nlattr_size,
1561 .from_nlattr = nlattr_to_tcp, 1563 .from_nlattr = nlattr_to_tcp,
1562 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 1564 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1563 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 1565 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
1564 .nlattr_tuple_size = tcp_nlattr_tuple_size, 1566 .nlattr_tuple_size = tcp_nlattr_tuple_size,
1567 .nlattr_size = TCP_NLATTR_SIZE,
1565 .nla_policy = nf_ct_port_nla_policy, 1568 .nla_policy = nf_ct_port_nla_policy,
1566#endif 1569#endif
1567#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 1570#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
@@ -1579,7 +1582,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1579}; 1582};
1580EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); 1583EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1581 1584
1582struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = 1585const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
1583{ 1586{
1584 .l3proto = PF_INET6, 1587 .l3proto = PF_INET6,
1585 .l4proto = IPPROTO_TCP, 1588 .l4proto = IPPROTO_TCP,
@@ -1594,8 +1597,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1594 .error = tcp_error, 1597 .error = tcp_error,
1595 .can_early_drop = tcp_can_early_drop, 1598 .can_early_drop = tcp_can_early_drop,
1596#if IS_ENABLED(CONFIG_NF_CT_NETLINK) 1599#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1600 .nlattr_size = TCP_NLATTR_SIZE,
1597 .to_nlattr = tcp_to_nlattr, 1601 .to_nlattr = tcp_to_nlattr,
1598 .nlattr_size = tcp_nlattr_size,
1599 .from_nlattr = nlattr_to_tcp, 1602 .from_nlattr = nlattr_to_tcp,
1600 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 1603 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1601 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 1604 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 3a5f727103af..fe7243970aa4 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -26,7 +26,7 @@
26#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 26#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
27#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 27#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
28 28
29static unsigned int udp_timeouts[UDP_CT_MAX] = { 29static const unsigned int udp_timeouts[UDP_CT_MAX] = {
30 [UDP_CT_UNREPLIED] = 30*HZ, 30 [UDP_CT_UNREPLIED] = 30*HZ,
31 [UDP_CT_REPLIED] = 180*HZ, 31 [UDP_CT_REPLIED] = 180*HZ,
32}; 32};
@@ -296,7 +296,7 @@ static struct nf_proto_net *udp_get_net_proto(struct net *net)
296 return &net->ct.nf_ct_proto.udp.pn; 296 return &net->ct.nf_ct_proto.udp.pn;
297} 297}
298 298
299struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = 299const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 =
300{ 300{
301 .l3proto = PF_INET, 301 .l3proto = PF_INET,
302 .l4proto = IPPROTO_UDP, 302 .l4proto = IPPROTO_UDP,
@@ -328,7 +328,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
328EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); 328EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4);
329 329
330#ifdef CONFIG_NF_CT_PROTO_UDPLITE 330#ifdef CONFIG_NF_CT_PROTO_UDPLITE
331struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = 331const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 =
332{ 332{
333 .l3proto = PF_INET, 333 .l3proto = PF_INET,
334 .l4proto = IPPROTO_UDPLITE, 334 .l4proto = IPPROTO_UDPLITE,
@@ -360,7 +360,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
360EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4); 360EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4);
361#endif 361#endif
362 362
363struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = 363const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
364{ 364{
365 .l3proto = PF_INET6, 365 .l3proto = PF_INET6,
366 .l4proto = IPPROTO_UDP, 366 .l4proto = IPPROTO_UDP,
@@ -392,7 +392,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
392EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6); 392EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6);
393 393
394#ifdef CONFIG_NF_CT_PROTO_UDPLITE 394#ifdef CONFIG_NF_CT_PROTO_UDPLITE
395struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = 395const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
396{ 396{
397 .l3proto = PF_INET6, 397 .l3proto = PF_INET6,
398 .l4proto = IPPROTO_UDPLITE, 398 .l4proto = IPPROTO_UDPLITE,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 5a101caa3e12..9123fdec5e14 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -309,10 +309,12 @@ static int ct_seq_show(struct seq_file *s, void *v)
309 WARN_ON(!l4proto); 309 WARN_ON(!l4proto);
310 310
311 ret = -ENOSPC; 311 ret = -ENOSPC;
312 seq_printf(s, "%-8s %u %-8s %u %ld ", 312 seq_printf(s, "%-8s %u %-8s %u ",
313 l3proto_name(l3proto->l3proto), nf_ct_l3num(ct), 313 l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
314 l4proto_name(l4proto->l4proto), nf_ct_protonum(ct), 314 l4proto_name(l4proto->l4proto), nf_ct_protonum(ct));
315 nf_ct_expires(ct) / HZ); 315
316 if (!test_bit(IPS_OFFLOAD_BIT, &ct->status))
317 seq_printf(s, "%ld ", nf_ct_expires(ct) / HZ);
316 318
317 if (l4proto->print_conntrack) 319 if (l4proto->print_conntrack)
318 l4proto->print_conntrack(s, ct); 320 l4proto->print_conntrack(s, ct);
@@ -339,7 +341,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
339 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) 341 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
340 goto release; 342 goto release;
341 343
342 if (test_bit(IPS_ASSURED_BIT, &ct->status)) 344 if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
345 seq_puts(s, "[OFFLOAD] ");
346 else if (test_bit(IPS_ASSURED_BIT, &ct->status))
343 seq_puts(s, "[ASSURED] "); 347 seq_puts(s, "[ASSURED] ");
344 348
345 if (seq_has_overflowed(s)) 349 if (seq_has_overflowed(s))
@@ -378,7 +382,6 @@ static int ct_open(struct inode *inode, struct file *file)
378} 382}
379 383
380static const struct file_operations ct_file_ops = { 384static const struct file_operations ct_file_ops = {
381 .owner = THIS_MODULE,
382 .open = ct_open, 385 .open = ct_open,
383 .read = seq_read, 386 .read = seq_read,
384 .llseek = seq_lseek, 387 .llseek = seq_lseek,
@@ -471,7 +474,6 @@ static int ct_cpu_seq_open(struct inode *inode, struct file *file)
471} 474}
472 475
473static const struct file_operations ct_cpu_seq_fops = { 476static const struct file_operations ct_cpu_seq_fops = {
474 .owner = THIS_MODULE,
475 .open = ct_cpu_seq_open, 477 .open = ct_cpu_seq_open,
476 .read = seq_read, 478 .read = seq_read,
477 .llseek = seq_lseek, 479 .llseek = seq_lseek,
diff --git a/net/netfilter/nf_flow_table.c b/net/netfilter/nf_flow_table.c
new file mode 100644
index 000000000000..ec410cae9307
--- /dev/null
+++ b/net/netfilter/nf_flow_table.c
@@ -0,0 +1,453 @@
1#include <linux/kernel.h>
2#include <linux/init.h>
3#include <linux/module.h>
4#include <linux/netfilter.h>
5#include <linux/rhashtable.h>
6#include <linux/netdevice.h>
7#include <net/netfilter/nf_tables.h>
8#include <net/netfilter/nf_flow_table.h>
9#include <net/netfilter/nf_conntrack.h>
10#include <net/netfilter/nf_conntrack_core.h>
11#include <net/netfilter/nf_conntrack_tuple.h>
12
13struct flow_offload_entry {
14 struct flow_offload flow;
15 struct nf_conn *ct;
16 struct rcu_head rcu_head;
17};
18
19struct flow_offload *
20flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
21{
22 struct flow_offload_entry *entry;
23 struct flow_offload *flow;
24
25 if (unlikely(nf_ct_is_dying(ct) ||
26 !atomic_inc_not_zero(&ct->ct_general.use)))
27 return NULL;
28
29 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
30 if (!entry)
31 goto err_ct_refcnt;
32
33 flow = &entry->flow;
34
35 if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
36 goto err_dst_cache_original;
37
38 if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
39 goto err_dst_cache_reply;
40
41 entry->ct = ct;
42
43 switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) {
44 case NFPROTO_IPV4:
45 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 =
46 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in;
47 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 =
48 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in;
49 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 =
50 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in;
51 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 =
52 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in;
53 break;
54 case NFPROTO_IPV6:
55 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 =
56 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
57 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 =
58 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6;
59 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 =
60 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6;
61 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 =
62 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6;
63 break;
64 }
65
66 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto =
67 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
68 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto =
69 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
70 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto =
71 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
72 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto =
73 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
74
75 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache =
76 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst;
77 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache =
78 route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst;
79
80 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port =
81 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port;
82 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port =
83 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
84 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port =
85 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port;
86 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port =
87 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
88
89 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir =
90 FLOW_OFFLOAD_DIR_ORIGINAL;
91 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir =
92 FLOW_OFFLOAD_DIR_REPLY;
93
94 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
95 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
96 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx =
97 route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
98 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
99 route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
100 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx =
101 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
102
103 if (ct->status & IPS_SRC_NAT)
104 flow->flags |= FLOW_OFFLOAD_SNAT;
105 else if (ct->status & IPS_DST_NAT)
106 flow->flags |= FLOW_OFFLOAD_DNAT;
107
108 return flow;
109
110err_dst_cache_reply:
111 dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
112err_dst_cache_original:
113 kfree(entry);
114err_ct_refcnt:
115 nf_ct_put(ct);
116
117 return NULL;
118}
119EXPORT_SYMBOL_GPL(flow_offload_alloc);
120
121void flow_offload_free(struct flow_offload *flow)
122{
123 struct flow_offload_entry *e;
124
125 dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
126 dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
127 e = container_of(flow, struct flow_offload_entry, flow);
128 nf_ct_delete(e->ct, 0, 0);
129 nf_ct_put(e->ct);
130 kfree_rcu(e, rcu_head);
131}
132EXPORT_SYMBOL_GPL(flow_offload_free);
133
134void flow_offload_dead(struct flow_offload *flow)
135{
136 flow->flags |= FLOW_OFFLOAD_DYING;
137}
138EXPORT_SYMBOL_GPL(flow_offload_dead);
139
140int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
141{
142 flow->timeout = (u32)jiffies;
143
144 rhashtable_insert_fast(&flow_table->rhashtable,
145 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
146 *flow_table->type->params);
147 rhashtable_insert_fast(&flow_table->rhashtable,
148 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
149 *flow_table->type->params);
150 return 0;
151}
152EXPORT_SYMBOL_GPL(flow_offload_add);
153
154static void flow_offload_del(struct nf_flowtable *flow_table,
155 struct flow_offload *flow)
156{
157 rhashtable_remove_fast(&flow_table->rhashtable,
158 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
159 *flow_table->type->params);
160 rhashtable_remove_fast(&flow_table->rhashtable,
161 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
162 *flow_table->type->params);
163
164 flow_offload_free(flow);
165}
166
167struct flow_offload_tuple_rhash *
168flow_offload_lookup(struct nf_flowtable *flow_table,
169 struct flow_offload_tuple *tuple)
170{
171 return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
172 *flow_table->type->params);
173}
174EXPORT_SYMBOL_GPL(flow_offload_lookup);
175
176int nf_flow_table_iterate(struct nf_flowtable *flow_table,
177 void (*iter)(struct flow_offload *flow, void *data),
178 void *data)
179{
180 struct flow_offload_tuple_rhash *tuplehash;
181 struct rhashtable_iter hti;
182 struct flow_offload *flow;
183 int err;
184
185 err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
186 if (err)
187 return err;
188
189 rhashtable_walk_start(&hti);
190
191 while ((tuplehash = rhashtable_walk_next(&hti))) {
192 if (IS_ERR(tuplehash)) {
193 err = PTR_ERR(tuplehash);
194 if (err != -EAGAIN)
195 goto out;
196
197 continue;
198 }
199 if (tuplehash->tuple.dir)
200 continue;
201
202 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
203
204 iter(flow, data);
205 }
206out:
207 rhashtable_walk_stop(&hti);
208 rhashtable_walk_exit(&hti);
209
210 return err;
211}
212EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
213
214static inline bool nf_flow_has_expired(const struct flow_offload *flow)
215{
216 return (__s32)(flow->timeout - (u32)jiffies) <= 0;
217}
218
219static inline bool nf_flow_is_dying(const struct flow_offload *flow)
220{
221 return flow->flags & FLOW_OFFLOAD_DYING;
222}
223
224static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
225{
226 struct flow_offload_tuple_rhash *tuplehash;
227 struct rhashtable_iter hti;
228 struct flow_offload *flow;
229 int err;
230
231 err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
232 if (err)
233 return 0;
234
235 rhashtable_walk_start(&hti);
236
237 while ((tuplehash = rhashtable_walk_next(&hti))) {
238 if (IS_ERR(tuplehash)) {
239 err = PTR_ERR(tuplehash);
240 if (err != -EAGAIN)
241 goto out;
242
243 continue;
244 }
245 if (tuplehash->tuple.dir)
246 continue;
247
248 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
249
250 if (nf_flow_has_expired(flow) ||
251 nf_flow_is_dying(flow))
252 flow_offload_del(flow_table, flow);
253 }
254out:
255 rhashtable_walk_stop(&hti);
256 rhashtable_walk_exit(&hti);
257
258 return 1;
259}
260
261void nf_flow_offload_work_gc(struct work_struct *work)
262{
263 struct nf_flowtable *flow_table;
264
265 flow_table = container_of(work, struct nf_flowtable, gc_work.work);
266 nf_flow_offload_gc_step(flow_table);
267 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
268}
269EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
270
271static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
272{
273 const struct flow_offload_tuple *tuple = data;
274
275 return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
276}
277
278static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
279{
280 const struct flow_offload_tuple_rhash *tuplehash = data;
281
282 return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
283}
284
285static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
286 const void *ptr)
287{
288 const struct flow_offload_tuple *tuple = arg->key;
289 const struct flow_offload_tuple_rhash *x = ptr;
290
291 if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
292 return 1;
293
294 return 0;
295}
296
297const struct rhashtable_params nf_flow_offload_rhash_params = {
298 .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
299 .hashfn = flow_offload_hash,
300 .obj_hashfn = flow_offload_hash_obj,
301 .obj_cmpfn = flow_offload_hash_cmp,
302 .automatic_shrinking = true,
303};
304EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
305
306static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
307 __be16 port, __be16 new_port)
308{
309 struct tcphdr *tcph;
310
311 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
312 skb_try_make_writable(skb, thoff + sizeof(*tcph)))
313 return -1;
314
315 tcph = (void *)(skb_network_header(skb) + thoff);
316 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
317
318 return 0;
319}
320
321static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
322 __be16 port, __be16 new_port)
323{
324 struct udphdr *udph;
325
326 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
327 skb_try_make_writable(skb, thoff + sizeof(*udph)))
328 return -1;
329
330 udph = (void *)(skb_network_header(skb) + thoff);
331 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
332 inet_proto_csum_replace2(&udph->check, skb, port,
333 new_port, true);
334 if (!udph->check)
335 udph->check = CSUM_MANGLED_0;
336 }
337
338 return 0;
339}
340
341static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
342 u8 protocol, __be16 port, __be16 new_port)
343{
344 switch (protocol) {
345 case IPPROTO_TCP:
346 if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
347 return NF_DROP;
348 break;
349 case IPPROTO_UDP:
350 if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
351 return NF_DROP;
352 break;
353 }
354
355 return 0;
356}
357
358int nf_flow_snat_port(const struct flow_offload *flow,
359 struct sk_buff *skb, unsigned int thoff,
360 u8 protocol, enum flow_offload_tuple_dir dir)
361{
362 struct flow_ports *hdr;
363 __be16 port, new_port;
364
365 if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
366 skb_try_make_writable(skb, thoff + sizeof(*hdr)))
367 return -1;
368
369 hdr = (void *)(skb_network_header(skb) + thoff);
370
371 switch (dir) {
372 case FLOW_OFFLOAD_DIR_ORIGINAL:
373 port = hdr->source;
374 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
375 hdr->source = new_port;
376 break;
377 case FLOW_OFFLOAD_DIR_REPLY:
378 port = hdr->dest;
379 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
380 hdr->dest = new_port;
381 break;
382 default:
383 return -1;
384 }
385
386 return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
387}
388EXPORT_SYMBOL_GPL(nf_flow_snat_port);
389
390int nf_flow_dnat_port(const struct flow_offload *flow,
391 struct sk_buff *skb, unsigned int thoff,
392 u8 protocol, enum flow_offload_tuple_dir dir)
393{
394 struct flow_ports *hdr;
395 __be16 port, new_port;
396
397 if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
398 skb_try_make_writable(skb, thoff + sizeof(*hdr)))
399 return -1;
400
401 hdr = (void *)(skb_network_header(skb) + thoff);
402
403 switch (dir) {
404 case FLOW_OFFLOAD_DIR_ORIGINAL:
405 port = hdr->dest;
406 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
407 hdr->dest = new_port;
408 break;
409 case FLOW_OFFLOAD_DIR_REPLY:
410 port = hdr->source;
411 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
412 hdr->source = new_port;
413 break;
414 default:
415 return -1;
416 }
417
418 return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
419}
420EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
421
422static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
423{
424 struct net_device *dev = data;
425
426 if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
427 return;
428
429 flow_offload_dead(flow);
430}
431
432static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
433 void *data)
434{
435 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
436 flush_delayed_work(&flowtable->gc_work);
437}
438
439void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
440{
441 nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
442}
443EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
444
445void nf_flow_table_free(struct nf_flowtable *flow_table)
446{
447 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
448 WARN_ON(!nf_flow_offload_gc_step(flow_table));
449}
450EXPORT_SYMBOL_GPL(nf_flow_table_free);
451
452MODULE_LICENSE("GPL");
453MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
new file mode 100644
index 000000000000..375a1881d93d
--- /dev/null
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -0,0 +1,49 @@
1#include <linux/kernel.h>
2#include <linux/init.h>
3#include <linux/module.h>
4#include <linux/netfilter.h>
5#include <linux/rhashtable.h>
6#include <net/netfilter/nf_flow_table.h>
7#include <net/netfilter/nf_tables.h>
8
9static unsigned int
10nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
11 const struct nf_hook_state *state)
12{
13 switch (skb->protocol) {
14 case htons(ETH_P_IP):
15 return nf_flow_offload_ip_hook(priv, skb, state);
16 case htons(ETH_P_IPV6):
17 return nf_flow_offload_ipv6_hook(priv, skb, state);
18 }
19
20 return NF_ACCEPT;
21}
22
23static struct nf_flowtable_type flowtable_inet = {
24 .family = NFPROTO_INET,
25 .params = &nf_flow_offload_rhash_params,
26 .gc = nf_flow_offload_work_gc,
27 .free = nf_flow_table_free,
28 .hook = nf_flow_offload_inet_hook,
29 .owner = THIS_MODULE,
30};
31
32static int __init nf_flow_inet_module_init(void)
33{
34 nft_register_flowtable_type(&flowtable_inet);
35
36 return 0;
37}
38
39static void __exit nf_flow_inet_module_exit(void)
40{
41 nft_unregister_flowtable_type(&flowtable_inet);
42}
43
44module_init(nf_flow_inet_module_init);
45module_exit(nf_flow_inet_module_exit);
46
47MODULE_LICENSE("GPL");
48MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
49MODULE_ALIAS_NF_FLOWTABLE(1); /* NFPROTO_INET */
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 44284cd2528d..18f6d7ae995b 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -10,7 +10,7 @@
10int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, 10int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
11 const struct nf_hook_entries *entries, unsigned int index, 11 const struct nf_hook_entries *entries, unsigned int index,
12 unsigned int verdict); 12 unsigned int verdict);
13unsigned int nf_queue_nf_hook_drop(struct net *net); 13void nf_queue_nf_hook_drop(struct net *net);
14 14
15/* nf_log.c */ 15/* nf_log.c */
16int __init netfilter_log_init(void); 16int __init netfilter_log_init(void);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 8bb152a7cca4..c2c1b16b7538 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -402,7 +402,6 @@ static int nflog_open(struct inode *inode, struct file *file)
402} 402}
403 403
404static const struct file_operations nflog_file_ops = { 404static const struct file_operations nflog_file_ops = {
405 .owner = THIS_MODULE,
406 .open = nflog_open, 405 .open = nflog_open,
407 .read = seq_read, 406 .read = seq_read,
408 .llseek = seq_lseek, 407 .llseek = seq_lseek,
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index f7e21953b1de..d67a96a25a68 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -10,6 +10,8 @@
10#include <linux/proc_fs.h> 10#include <linux/proc_fs.h>
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
12#include <linux/netfilter.h> 12#include <linux/netfilter.h>
13#include <linux/netfilter_ipv4.h>
14#include <linux/netfilter_ipv6.h>
13#include <linux/netfilter_bridge.h> 15#include <linux/netfilter_bridge.h>
14#include <linux/seq_file.h> 16#include <linux/seq_file.h>
15#include <linux/rcupdate.h> 17#include <linux/rcupdate.h>
@@ -96,30 +98,56 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
96} 98}
97EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); 99EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
98 100
99unsigned int nf_queue_nf_hook_drop(struct net *net) 101void nf_queue_nf_hook_drop(struct net *net)
100{ 102{
101 const struct nf_queue_handler *qh; 103 const struct nf_queue_handler *qh;
102 unsigned int count = 0;
103 104
104 rcu_read_lock(); 105 rcu_read_lock();
105 qh = rcu_dereference(net->nf.queue_handler); 106 qh = rcu_dereference(net->nf.queue_handler);
106 if (qh) 107 if (qh)
107 count = qh->nf_hook_drop(net); 108 qh->nf_hook_drop(net);
108 rcu_read_unlock(); 109 rcu_read_unlock();
109
110 return count;
111} 110}
112EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop); 111EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop);
113 112
113static void nf_ip_saveroute(const struct sk_buff *skb,
114 struct nf_queue_entry *entry)
115{
116 struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
117
118 if (entry->state.hook == NF_INET_LOCAL_OUT) {
119 const struct iphdr *iph = ip_hdr(skb);
120
121 rt_info->tos = iph->tos;
122 rt_info->daddr = iph->daddr;
123 rt_info->saddr = iph->saddr;
124 rt_info->mark = skb->mark;
125 }
126}
127
128static void nf_ip6_saveroute(const struct sk_buff *skb,
129 struct nf_queue_entry *entry)
130{
131 struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
132
133 if (entry->state.hook == NF_INET_LOCAL_OUT) {
134 const struct ipv6hdr *iph = ipv6_hdr(skb);
135
136 rt_info->daddr = iph->daddr;
137 rt_info->saddr = iph->saddr;
138 rt_info->mark = skb->mark;
139 }
140}
141
114static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, 142static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
115 const struct nf_hook_entries *entries, 143 const struct nf_hook_entries *entries,
116 unsigned int index, unsigned int queuenum) 144 unsigned int index, unsigned int queuenum)
117{ 145{
118 int status = -ENOENT; 146 int status = -ENOENT;
119 struct nf_queue_entry *entry = NULL; 147 struct nf_queue_entry *entry = NULL;
120 const struct nf_afinfo *afinfo;
121 const struct nf_queue_handler *qh; 148 const struct nf_queue_handler *qh;
122 struct net *net = state->net; 149 struct net *net = state->net;
150 unsigned int route_key_size;
123 151
124 /* QUEUE == DROP if no one is waiting, to be safe. */ 152 /* QUEUE == DROP if no one is waiting, to be safe. */
125 qh = rcu_dereference(net->nf.queue_handler); 153 qh = rcu_dereference(net->nf.queue_handler);
@@ -128,11 +156,19 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
128 goto err; 156 goto err;
129 } 157 }
130 158
131 afinfo = nf_get_afinfo(state->pf); 159 switch (state->pf) {
132 if (!afinfo) 160 case AF_INET:
133 goto err; 161 route_key_size = sizeof(struct ip_rt_info);
162 break;
163 case AF_INET6:
164 route_key_size = sizeof(struct ip6_rt_info);
165 break;
166 default:
167 route_key_size = 0;
168 break;
169 }
134 170
135 entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC); 171 entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC);
136 if (!entry) { 172 if (!entry) {
137 status = -ENOMEM; 173 status = -ENOMEM;
138 goto err; 174 goto err;
@@ -142,12 +178,21 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
142 .skb = skb, 178 .skb = skb,
143 .state = *state, 179 .state = *state,
144 .hook_index = index, 180 .hook_index = index,
145 .size = sizeof(*entry) + afinfo->route_key_size, 181 .size = sizeof(*entry) + route_key_size,
146 }; 182 };
147 183
148 nf_queue_entry_get_refs(entry); 184 nf_queue_entry_get_refs(entry);
149 skb_dst_force(skb); 185 skb_dst_force(skb);
150 afinfo->saveroute(skb, entry); 186
187 switch (entry->state.pf) {
188 case AF_INET:
189 nf_ip_saveroute(skb, entry);
190 break;
191 case AF_INET6:
192 nf_ip6_saveroute(skb, entry);
193 break;
194 }
195
151 status = qh->outfn(entry, queuenum); 196 status = qh->outfn(entry, queuenum);
152 197
153 if (status < 0) { 198 if (status < 0) {
@@ -204,13 +249,31 @@ repeat:
204 return NF_ACCEPT; 249 return NF_ACCEPT;
205} 250}
206 251
252static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum)
253{
254 switch (pf) {
255#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
256 case NFPROTO_BRIDGE:
257 return rcu_dereference(net->nf.hooks_bridge[hooknum]);
258#endif
259 case NFPROTO_IPV4:
260 return rcu_dereference(net->nf.hooks_ipv4[hooknum]);
261 case NFPROTO_IPV6:
262 return rcu_dereference(net->nf.hooks_ipv6[hooknum]);
263 default:
264 WARN_ON_ONCE(1);
265 return NULL;
266 }
267
268 return NULL;
269}
270
207/* Caller must hold rcu read-side lock */ 271/* Caller must hold rcu read-side lock */
208void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) 272void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
209{ 273{
210 const struct nf_hook_entry *hook_entry; 274 const struct nf_hook_entry *hook_entry;
211 const struct nf_hook_entries *hooks; 275 const struct nf_hook_entries *hooks;
212 struct sk_buff *skb = entry->skb; 276 struct sk_buff *skb = entry->skb;
213 const struct nf_afinfo *afinfo;
214 const struct net *net; 277 const struct net *net;
215 unsigned int i; 278 unsigned int i;
216 int err; 279 int err;
@@ -219,12 +282,12 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
219 net = entry->state.net; 282 net = entry->state.net;
220 pf = entry->state.pf; 283 pf = entry->state.pf;
221 284
222 hooks = rcu_dereference(net->nf.hooks[pf][entry->state.hook]); 285 hooks = nf_hook_entries_head(net, pf, entry->state.hook);
223 286
224 nf_queue_entry_release_refs(entry); 287 nf_queue_entry_release_refs(entry);
225 288
226 i = entry->hook_index; 289 i = entry->hook_index;
227 if (WARN_ON_ONCE(i >= hooks->num_hook_entries)) { 290 if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) {
228 kfree_skb(skb); 291 kfree_skb(skb);
229 kfree(entry); 292 kfree(entry);
230 return; 293 return;
@@ -237,8 +300,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
237 verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state); 300 verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
238 301
239 if (verdict == NF_ACCEPT) { 302 if (verdict == NF_ACCEPT) {
240 afinfo = nf_get_afinfo(entry->state.pf); 303 if (nf_reroute(skb, entry) < 0)
241 if (!afinfo || afinfo->reroute(entry->state.net, skb, entry) < 0)
242 verdict = NF_DROP; 304 verdict = NF_DROP;
243 } 305 }
244 306
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 49bd8bb16b18..92139a087260 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -317,7 +317,6 @@ static int synproxy_cpu_seq_open(struct inode *inode, struct file *file)
317} 317}
318 318
319static const struct file_operations synproxy_cpu_seq_fops = { 319static const struct file_operations synproxy_cpu_seq_fops = {
320 .owner = THIS_MODULE,
321 .open = synproxy_cpu_seq_open, 320 .open = synproxy_cpu_seq_open,
322 .read = seq_read, 321 .read = seq_read,
323 .llseek = seq_lseek, 322 .llseek = seq_lseek,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 07bd4138c84e..8b9fe30de0cd 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -17,6 +17,7 @@
17#include <linux/netfilter.h> 17#include <linux/netfilter.h>
18#include <linux/netfilter/nfnetlink.h> 18#include <linux/netfilter/nfnetlink.h>
19#include <linux/netfilter/nf_tables.h> 19#include <linux/netfilter/nf_tables.h>
20#include <net/netfilter/nf_flow_table.h>
20#include <net/netfilter/nf_tables_core.h> 21#include <net/netfilter/nf_tables_core.h>
21#include <net/netfilter/nf_tables.h> 22#include <net/netfilter/nf_tables.h>
22#include <net/net_namespace.h> 23#include <net/net_namespace.h>
@@ -24,86 +25,20 @@
24 25
25static LIST_HEAD(nf_tables_expressions); 26static LIST_HEAD(nf_tables_expressions);
26static LIST_HEAD(nf_tables_objects); 27static LIST_HEAD(nf_tables_objects);
27 28static LIST_HEAD(nf_tables_flowtables);
28/** 29static u64 table_handle;
29 * nft_register_afinfo - register nf_tables address family info
30 *
31 * @afi: address family info to register
32 *
33 * Register the address family for use with nf_tables. Returns zero on
34 * success or a negative errno code otherwise.
35 */
36int nft_register_afinfo(struct net *net, struct nft_af_info *afi)
37{
38 INIT_LIST_HEAD(&afi->tables);
39 nfnl_lock(NFNL_SUBSYS_NFTABLES);
40 list_add_tail_rcu(&afi->list, &net->nft.af_info);
41 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
42 return 0;
43}
44EXPORT_SYMBOL_GPL(nft_register_afinfo);
45
46static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi);
47
48/**
49 * nft_unregister_afinfo - unregister nf_tables address family info
50 *
51 * @afi: address family info to unregister
52 *
53 * Unregister the address family for use with nf_tables.
54 */
55void nft_unregister_afinfo(struct net *net, struct nft_af_info *afi)
56{
57 nfnl_lock(NFNL_SUBSYS_NFTABLES);
58 __nft_release_afinfo(net, afi);
59 list_del_rcu(&afi->list);
60 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
61}
62EXPORT_SYMBOL_GPL(nft_unregister_afinfo);
63
64static struct nft_af_info *nft_afinfo_lookup(struct net *net, int family)
65{
66 struct nft_af_info *afi;
67
68 list_for_each_entry(afi, &net->nft.af_info, list) {
69 if (afi->family == family)
70 return afi;
71 }
72 return NULL;
73}
74
75static struct nft_af_info *
76nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
77{
78 struct nft_af_info *afi;
79
80 afi = nft_afinfo_lookup(net, family);
81 if (afi != NULL)
82 return afi;
83#ifdef CONFIG_MODULES
84 if (autoload) {
85 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
86 request_module("nft-afinfo-%u", family);
87 nfnl_lock(NFNL_SUBSYS_NFTABLES);
88 afi = nft_afinfo_lookup(net, family);
89 if (afi != NULL)
90 return ERR_PTR(-EAGAIN);
91 }
92#endif
93 return ERR_PTR(-EAFNOSUPPORT);
94}
95 30
96static void nft_ctx_init(struct nft_ctx *ctx, 31static void nft_ctx_init(struct nft_ctx *ctx,
97 struct net *net, 32 struct net *net,
98 const struct sk_buff *skb, 33 const struct sk_buff *skb,
99 const struct nlmsghdr *nlh, 34 const struct nlmsghdr *nlh,
100 struct nft_af_info *afi, 35 u8 family,
101 struct nft_table *table, 36 struct nft_table *table,
102 struct nft_chain *chain, 37 struct nft_chain *chain,
103 const struct nlattr * const *nla) 38 const struct nlattr * const *nla)
104{ 39{
105 ctx->net = net; 40 ctx->net = net;
106 ctx->afi = afi; 41 ctx->family = family;
107 ctx->table = table; 42 ctx->table = table;
108 ctx->chain = chain; 43 ctx->chain = chain;
109 ctx->nla = nla; 44 ctx->nla = nla;
@@ -139,29 +74,26 @@ static void nft_trans_destroy(struct nft_trans *trans)
139 kfree(trans); 74 kfree(trans);
140} 75}
141 76
142static int nf_tables_register_hooks(struct net *net, 77static int nf_tables_register_hook(struct net *net,
143 const struct nft_table *table, 78 const struct nft_table *table,
144 struct nft_chain *chain, 79 struct nft_chain *chain)
145 unsigned int hook_nops)
146{ 80{
147 if (table->flags & NFT_TABLE_F_DORMANT || 81 if (table->flags & NFT_TABLE_F_DORMANT ||
148 !nft_is_base_chain(chain)) 82 !nft_is_base_chain(chain))
149 return 0; 83 return 0;
150 84
151 return nf_register_net_hooks(net, nft_base_chain(chain)->ops, 85 return nf_register_net_hook(net, &nft_base_chain(chain)->ops);
152 hook_nops);
153} 86}
154 87
155static void nf_tables_unregister_hooks(struct net *net, 88static void nf_tables_unregister_hook(struct net *net,
156 const struct nft_table *table, 89 const struct nft_table *table,
157 struct nft_chain *chain, 90 struct nft_chain *chain)
158 unsigned int hook_nops)
159{ 91{
160 if (table->flags & NFT_TABLE_F_DORMANT || 92 if (table->flags & NFT_TABLE_F_DORMANT ||
161 !nft_is_base_chain(chain)) 93 !nft_is_base_chain(chain))
162 return; 94 return;
163 95
164 nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops); 96 nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
165} 97}
166 98
167static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) 99static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
@@ -348,34 +280,99 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
348 return err; 280 return err;
349} 281}
350 282
283static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
284 struct nft_flowtable *flowtable)
285{
286 struct nft_trans *trans;
287
288 trans = nft_trans_alloc(ctx, msg_type,
289 sizeof(struct nft_trans_flowtable));
290 if (trans == NULL)
291 return -ENOMEM;
292
293 if (msg_type == NFT_MSG_NEWFLOWTABLE)
294 nft_activate_next(ctx->net, flowtable);
295
296 nft_trans_flowtable(trans) = flowtable;
297 list_add_tail(&trans->list, &ctx->net->nft.commit_list);
298
299 return 0;
300}
301
302static int nft_delflowtable(struct nft_ctx *ctx,
303 struct nft_flowtable *flowtable)
304{
305 int err;
306
307 err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
308 if (err < 0)
309 return err;
310
311 nft_deactivate_next(ctx->net, flowtable);
312 ctx->table->use--;
313
314 return err;
315}
316
351/* 317/*
352 * Tables 318 * Tables
353 */ 319 */
354 320
355static struct nft_table *nft_table_lookup(const struct nft_af_info *afi, 321static struct nft_table *nft_table_lookup(const struct net *net,
356 const struct nlattr *nla, 322 const struct nlattr *nla,
357 u8 genmask) 323 u8 family, u8 genmask)
358{ 324{
359 struct nft_table *table; 325 struct nft_table *table;
360 326
361 list_for_each_entry(table, &afi->tables, list) { 327 list_for_each_entry(table, &net->nft.tables, list) {
362 if (!nla_strcmp(nla, table->name) && 328 if (!nla_strcmp(nla, table->name) &&
329 table->family == family &&
330 nft_active_genmask(table, genmask))
331 return table;
332 }
333 return NULL;
334}
335
336static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
337 const struct nlattr *nla,
338 u8 genmask)
339{
340 struct nft_table *table;
341
342 list_for_each_entry(table, &net->nft.tables, list) {
343 if (be64_to_cpu(nla_get_be64(nla)) == table->handle &&
363 nft_active_genmask(table, genmask)) 344 nft_active_genmask(table, genmask))
364 return table; 345 return table;
365 } 346 }
366 return NULL; 347 return NULL;
367} 348}
368 349
369static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi, 350static struct nft_table *nf_tables_table_lookup(const struct net *net,
370 const struct nlattr *nla, 351 const struct nlattr *nla,
371 u8 genmask) 352 u8 family, u8 genmask)
353{
354 struct nft_table *table;
355
356 if (nla == NULL)
357 return ERR_PTR(-EINVAL);
358
359 table = nft_table_lookup(net, nla, family, genmask);
360 if (table != NULL)
361 return table;
362
363 return ERR_PTR(-ENOENT);
364}
365
366static struct nft_table *nf_tables_table_lookup_byhandle(const struct net *net,
367 const struct nlattr *nla,
368 u8 genmask)
372{ 369{
373 struct nft_table *table; 370 struct nft_table *table;
374 371
375 if (nla == NULL) 372 if (nla == NULL)
376 return ERR_PTR(-EINVAL); 373 return ERR_PTR(-EINVAL);
377 374
378 table = nft_table_lookup(afi, nla, genmask); 375 table = nft_table_lookup_byhandle(net, nla, genmask);
379 if (table != NULL) 376 if (table != NULL)
380 return table; 377 return table;
381 378
@@ -390,7 +387,7 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table)
390static const struct nf_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX]; 387static const struct nf_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX];
391 388
392static const struct nf_chain_type * 389static const struct nf_chain_type *
393__nf_tables_chain_type_lookup(int family, const struct nlattr *nla) 390__nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
394{ 391{
395 int i; 392 int i;
396 393
@@ -403,22 +400,20 @@ __nf_tables_chain_type_lookup(int family, const struct nlattr *nla)
403} 400}
404 401
405static const struct nf_chain_type * 402static const struct nf_chain_type *
406nf_tables_chain_type_lookup(const struct nft_af_info *afi, 403nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family, bool autoload)
407 const struct nlattr *nla,
408 bool autoload)
409{ 404{
410 const struct nf_chain_type *type; 405 const struct nf_chain_type *type;
411 406
412 type = __nf_tables_chain_type_lookup(afi->family, nla); 407 type = __nf_tables_chain_type_lookup(nla, family);
413 if (type != NULL) 408 if (type != NULL)
414 return type; 409 return type;
415#ifdef CONFIG_MODULES 410#ifdef CONFIG_MODULES
416 if (autoload) { 411 if (autoload) {
417 nfnl_unlock(NFNL_SUBSYS_NFTABLES); 412 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
418 request_module("nft-chain-%u-%.*s", afi->family, 413 request_module("nft-chain-%u-%.*s", family,
419 nla_len(nla), (const char *)nla_data(nla)); 414 nla_len(nla), (const char *)nla_data(nla));
420 nfnl_lock(NFNL_SUBSYS_NFTABLES); 415 nfnl_lock(NFNL_SUBSYS_NFTABLES);
421 type = __nf_tables_chain_type_lookup(afi->family, nla); 416 type = __nf_tables_chain_type_lookup(nla, family);
422 if (type != NULL) 417 if (type != NULL)
423 return ERR_PTR(-EAGAIN); 418 return ERR_PTR(-EAGAIN);
424 } 419 }
@@ -430,6 +425,7 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
430 [NFTA_TABLE_NAME] = { .type = NLA_STRING, 425 [NFTA_TABLE_NAME] = { .type = NLA_STRING,
431 .len = NFT_TABLE_MAXNAMELEN - 1 }, 426 .len = NFT_TABLE_MAXNAMELEN - 1 },
432 [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, 427 [NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
428 [NFTA_TABLE_HANDLE] = { .type = NLA_U64 },
433}; 429};
434 430
435static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, 431static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
@@ -451,7 +447,9 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
451 447
452 if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || 448 if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
453 nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || 449 nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
454 nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use))) 450 nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) ||
451 nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle),
452 NFTA_TABLE_PAD))
455 goto nla_put_failure; 453 goto nla_put_failure;
456 454
457 nlmsg_end(skb, nlh); 455 nlmsg_end(skb, nlh);
@@ -476,7 +474,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
476 goto err; 474 goto err;
477 475
478 err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq, 476 err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq,
479 event, 0, ctx->afi->family, ctx->table); 477 event, 0, ctx->family, ctx->table);
480 if (err < 0) { 478 if (err < 0) {
481 kfree_skb(skb); 479 kfree_skb(skb);
482 goto err; 480 goto err;
@@ -493,7 +491,6 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
493 struct netlink_callback *cb) 491 struct netlink_callback *cb)
494{ 492{
495 const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); 493 const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
496 const struct nft_af_info *afi;
497 const struct nft_table *table; 494 const struct nft_table *table;
498 unsigned int idx = 0, s_idx = cb->args[0]; 495 unsigned int idx = 0, s_idx = cb->args[0];
499 struct net *net = sock_net(skb->sk); 496 struct net *net = sock_net(skb->sk);
@@ -502,30 +499,27 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
502 rcu_read_lock(); 499 rcu_read_lock();
503 cb->seq = net->nft.base_seq; 500 cb->seq = net->nft.base_seq;
504 501
505 list_for_each_entry_rcu(afi, &net->nft.af_info, list) { 502 list_for_each_entry_rcu(table, &net->nft.tables, list) {
506 if (family != NFPROTO_UNSPEC && family != afi->family) 503 if (family != NFPROTO_UNSPEC && family != table->family)
507 continue; 504 continue;
508 505
509 list_for_each_entry_rcu(table, &afi->tables, list) { 506 if (idx < s_idx)
510 if (idx < s_idx) 507 goto cont;
511 goto cont; 508 if (idx > s_idx)
512 if (idx > s_idx) 509 memset(&cb->args[1], 0,
513 memset(&cb->args[1], 0, 510 sizeof(cb->args) - sizeof(cb->args[0]));
514 sizeof(cb->args) - sizeof(cb->args[0])); 511 if (!nft_is_active(net, table))
515 if (!nft_is_active(net, table)) 512 continue;
516 continue; 513 if (nf_tables_fill_table_info(skb, net,
517 if (nf_tables_fill_table_info(skb, net, 514 NETLINK_CB(cb->skb).portid,
518 NETLINK_CB(cb->skb).portid, 515 cb->nlh->nlmsg_seq,
519 cb->nlh->nlmsg_seq, 516 NFT_MSG_NEWTABLE, NLM_F_MULTI,
520 NFT_MSG_NEWTABLE, 517 table->family, table) < 0)
521 NLM_F_MULTI, 518 goto done;
522 afi->family, table) < 0) 519
523 goto done; 520 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
524
525 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
526cont: 521cont:
527 idx++; 522 idx++;
528 }
529 } 523 }
530done: 524done:
531 rcu_read_unlock(); 525 rcu_read_unlock();
@@ -540,7 +534,6 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
540{ 534{
541 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 535 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
542 u8 genmask = nft_genmask_cur(net); 536 u8 genmask = nft_genmask_cur(net);
543 const struct nft_af_info *afi;
544 const struct nft_table *table; 537 const struct nft_table *table;
545 struct sk_buff *skb2; 538 struct sk_buff *skb2;
546 int family = nfmsg->nfgen_family; 539 int family = nfmsg->nfgen_family;
@@ -553,11 +546,8 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
553 return netlink_dump_start(nlsk, skb, nlh, &c); 546 return netlink_dump_start(nlsk, skb, nlh, &c);
554 } 547 }
555 548
556 afi = nf_tables_afinfo_lookup(net, family, false); 549 table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], family,
557 if (IS_ERR(afi)) 550 genmask);
558 return PTR_ERR(afi);
559
560 table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask);
561 if (IS_ERR(table)) 551 if (IS_ERR(table))
562 return PTR_ERR(table); 552 return PTR_ERR(table);
563 553
@@ -578,10 +568,7 @@ err:
578 return err; 568 return err;
579} 569}
580 570
581static void _nf_tables_table_disable(struct net *net, 571static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt)
582 const struct nft_af_info *afi,
583 struct nft_table *table,
584 u32 cnt)
585{ 572{
586 struct nft_chain *chain; 573 struct nft_chain *chain;
587 u32 i = 0; 574 u32 i = 0;
@@ -595,14 +582,11 @@ static void _nf_tables_table_disable(struct net *net,
595 if (cnt && i++ == cnt) 582 if (cnt && i++ == cnt)
596 break; 583 break;
597 584
598 nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, 585 nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
599 afi->nops);
600 } 586 }
601} 587}
602 588
603static int nf_tables_table_enable(struct net *net, 589static int nf_tables_table_enable(struct net *net, struct nft_table *table)
604 const struct nft_af_info *afi,
605 struct nft_table *table)
606{ 590{
607 struct nft_chain *chain; 591 struct nft_chain *chain;
608 int err, i = 0; 592 int err, i = 0;
@@ -613,8 +597,7 @@ static int nf_tables_table_enable(struct net *net,
613 if (!nft_is_base_chain(chain)) 597 if (!nft_is_base_chain(chain))
614 continue; 598 continue;
615 599
616 err = nf_register_net_hooks(net, nft_base_chain(chain)->ops, 600 err = nf_register_net_hook(net, &nft_base_chain(chain)->ops);
617 afi->nops);
618 if (err < 0) 601 if (err < 0)
619 goto err; 602 goto err;
620 603
@@ -623,15 +606,13 @@ static int nf_tables_table_enable(struct net *net,
623 return 0; 606 return 0;
624err: 607err:
625 if (i) 608 if (i)
626 _nf_tables_table_disable(net, afi, table, i); 609 nft_table_disable(net, table, i);
627 return err; 610 return err;
628} 611}
629 612
630static void nf_tables_table_disable(struct net *net, 613static void nf_tables_table_disable(struct net *net, struct nft_table *table)
631 const struct nft_af_info *afi,
632 struct nft_table *table)
633{ 614{
634 _nf_tables_table_disable(net, afi, table, 0); 615 nft_table_disable(net, table, 0);
635} 616}
636 617
637static int nf_tables_updtable(struct nft_ctx *ctx) 618static int nf_tables_updtable(struct nft_ctx *ctx)
@@ -660,7 +641,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
660 nft_trans_table_enable(trans) = false; 641 nft_trans_table_enable(trans) = false;
661 } else if (!(flags & NFT_TABLE_F_DORMANT) && 642 } else if (!(flags & NFT_TABLE_F_DORMANT) &&
662 ctx->table->flags & NFT_TABLE_F_DORMANT) { 643 ctx->table->flags & NFT_TABLE_F_DORMANT) {
663 ret = nf_tables_table_enable(ctx->net, ctx->afi, ctx->table); 644 ret = nf_tables_table_enable(ctx->net, ctx->table);
664 if (ret >= 0) { 645 if (ret >= 0) {
665 ctx->table->flags &= ~NFT_TABLE_F_DORMANT; 646 ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
666 nft_trans_table_enable(trans) = true; 647 nft_trans_table_enable(trans) = true;
@@ -685,19 +666,14 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
685 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 666 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
686 u8 genmask = nft_genmask_next(net); 667 u8 genmask = nft_genmask_next(net);
687 const struct nlattr *name; 668 const struct nlattr *name;
688 struct nft_af_info *afi;
689 struct nft_table *table; 669 struct nft_table *table;
690 int family = nfmsg->nfgen_family; 670 int family = nfmsg->nfgen_family;
691 u32 flags = 0; 671 u32 flags = 0;
692 struct nft_ctx ctx; 672 struct nft_ctx ctx;
693 int err; 673 int err;
694 674
695 afi = nf_tables_afinfo_lookup(net, family, true);
696 if (IS_ERR(afi))
697 return PTR_ERR(afi);
698
699 name = nla[NFTA_TABLE_NAME]; 675 name = nla[NFTA_TABLE_NAME];
700 table = nf_tables_table_lookup(afi, name, genmask); 676 table = nf_tables_table_lookup(net, name, family, genmask);
701 if (IS_ERR(table)) { 677 if (IS_ERR(table)) {
702 if (PTR_ERR(table) != -ENOENT) 678 if (PTR_ERR(table) != -ENOENT)
703 return PTR_ERR(table); 679 return PTR_ERR(table);
@@ -707,7 +683,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
707 if (nlh->nlmsg_flags & NLM_F_REPLACE) 683 if (nlh->nlmsg_flags & NLM_F_REPLACE)
708 return -EOPNOTSUPP; 684 return -EOPNOTSUPP;
709 685
710 nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); 686 nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
711 return nf_tables_updtable(&ctx); 687 return nf_tables_updtable(&ctx);
712 } 688 }
713 689
@@ -717,47 +693,45 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
717 return -EINVAL; 693 return -EINVAL;
718 } 694 }
719 695
720 err = -EAFNOSUPPORT;
721 if (!try_module_get(afi->owner))
722 goto err1;
723
724 err = -ENOMEM; 696 err = -ENOMEM;
725 table = kzalloc(sizeof(*table), GFP_KERNEL); 697 table = kzalloc(sizeof(*table), GFP_KERNEL);
726 if (table == NULL) 698 if (table == NULL)
727 goto err2; 699 goto err_kzalloc;
728 700
729 table->name = nla_strdup(name, GFP_KERNEL); 701 table->name = nla_strdup(name, GFP_KERNEL);
730 if (table->name == NULL) 702 if (table->name == NULL)
731 goto err3; 703 goto err_strdup;
732 704
733 INIT_LIST_HEAD(&table->chains); 705 INIT_LIST_HEAD(&table->chains);
734 INIT_LIST_HEAD(&table->sets); 706 INIT_LIST_HEAD(&table->sets);
735 INIT_LIST_HEAD(&table->objects); 707 INIT_LIST_HEAD(&table->objects);
708 INIT_LIST_HEAD(&table->flowtables);
709 table->family = family;
736 table->flags = flags; 710 table->flags = flags;
711 table->handle = ++table_handle;
737 712
738 nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); 713 nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
739 err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); 714 err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
740 if (err < 0) 715 if (err < 0)
741 goto err4; 716 goto err_trans;
742 717
743 list_add_tail_rcu(&table->list, &afi->tables); 718 list_add_tail_rcu(&table->list, &net->nft.tables);
744 return 0; 719 return 0;
745err4: 720err_trans:
746 kfree(table->name); 721 kfree(table->name);
747err3: 722err_strdup:
748 kfree(table); 723 kfree(table);
749err2: 724err_kzalloc:
750 module_put(afi->owner);
751err1:
752 return err; 725 return err;
753} 726}
754 727
755static int nft_flush_table(struct nft_ctx *ctx) 728static int nft_flush_table(struct nft_ctx *ctx)
756{ 729{
757 int err; 730 struct nft_flowtable *flowtable, *nft;
758 struct nft_chain *chain, *nc; 731 struct nft_chain *chain, *nc;
759 struct nft_object *obj, *ne; 732 struct nft_object *obj, *ne;
760 struct nft_set *set, *ns; 733 struct nft_set *set, *ns;
734 int err;
761 735
762 list_for_each_entry(chain, &ctx->table->chains, list) { 736 list_for_each_entry(chain, &ctx->table->chains, list) {
763 if (!nft_is_active_next(ctx->net, chain)) 737 if (!nft_is_active_next(ctx->net, chain))
@@ -774,7 +748,7 @@ static int nft_flush_table(struct nft_ctx *ctx)
774 if (!nft_is_active_next(ctx->net, set)) 748 if (!nft_is_active_next(ctx->net, set))
775 continue; 749 continue;
776 750
777 if (set->flags & NFT_SET_ANONYMOUS && 751 if (nft_set_is_anonymous(set) &&
778 !list_empty(&set->bindings)) 752 !list_empty(&set->bindings))
779 continue; 753 continue;
780 754
@@ -783,6 +757,12 @@ static int nft_flush_table(struct nft_ctx *ctx)
783 goto out; 757 goto out;
784 } 758 }
785 759
760 list_for_each_entry_safe(flowtable, nft, &ctx->table->flowtables, list) {
761 err = nft_delflowtable(ctx, flowtable);
762 if (err < 0)
763 goto out;
764 }
765
786 list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) { 766 list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) {
787 err = nft_delobj(ctx, obj); 767 err = nft_delobj(ctx, obj);
788 if (err < 0) 768 if (err < 0)
@@ -807,30 +787,28 @@ out:
807 787
808static int nft_flush(struct nft_ctx *ctx, int family) 788static int nft_flush(struct nft_ctx *ctx, int family)
809{ 789{
810 struct nft_af_info *afi;
811 struct nft_table *table, *nt; 790 struct nft_table *table, *nt;
812 const struct nlattr * const *nla = ctx->nla; 791 const struct nlattr * const *nla = ctx->nla;
813 int err = 0; 792 int err = 0;
814 793
815 list_for_each_entry(afi, &ctx->net->nft.af_info, list) { 794 list_for_each_entry_safe(table, nt, &ctx->net->nft.tables, list) {
816 if (family != AF_UNSPEC && afi->family != family) 795 if (family != AF_UNSPEC && table->family != family)
817 continue; 796 continue;
818 797
819 ctx->afi = afi; 798 ctx->family = table->family;
820 list_for_each_entry_safe(table, nt, &afi->tables, list) {
821 if (!nft_is_active_next(ctx->net, table))
822 continue;
823 799
824 if (nla[NFTA_TABLE_NAME] && 800 if (!nft_is_active_next(ctx->net, table))
825 nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0) 801 continue;
826 continue;
827 802
828 ctx->table = table; 803 if (nla[NFTA_TABLE_NAME] &&
804 nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0)
805 continue;
829 806
830 err = nft_flush_table(ctx); 807 ctx->table = table;
831 if (err < 0) 808
832 goto out; 809 err = nft_flush_table(ctx);
833 } 810 if (err < 0)
811 goto out;
834 } 812 }
835out: 813out:
836 return err; 814 return err;
@@ -843,20 +821,23 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
843{ 821{
844 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 822 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
845 u8 genmask = nft_genmask_next(net); 823 u8 genmask = nft_genmask_next(net);
846 struct nft_af_info *afi;
847 struct nft_table *table; 824 struct nft_table *table;
848 int family = nfmsg->nfgen_family; 825 int family = nfmsg->nfgen_family;
849 struct nft_ctx ctx; 826 struct nft_ctx ctx;
850 827
851 nft_ctx_init(&ctx, net, skb, nlh, NULL, NULL, NULL, nla); 828 nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla);
852 if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL) 829 if (family == AF_UNSPEC ||
830 (!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE]))
853 return nft_flush(&ctx, family); 831 return nft_flush(&ctx, family);
854 832
855 afi = nf_tables_afinfo_lookup(net, family, false); 833 if (nla[NFTA_TABLE_HANDLE])
856 if (IS_ERR(afi)) 834 table = nf_tables_table_lookup_byhandle(net,
857 return PTR_ERR(afi); 835 nla[NFTA_TABLE_HANDLE],
836 genmask);
837 else
838 table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME],
839 family, genmask);
858 840
859 table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask);
860 if (IS_ERR(table)) 841 if (IS_ERR(table))
861 return PTR_ERR(table); 842 return PTR_ERR(table);
862 843
@@ -864,7 +845,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
864 table->use > 0) 845 table->use > 0)
865 return -EBUSY; 846 return -EBUSY;
866 847
867 ctx.afi = afi; 848 ctx.family = family;
868 ctx.table = table; 849 ctx.table = table;
869 850
870 return nft_flush_table(&ctx); 851 return nft_flush_table(&ctx);
@@ -876,7 +857,6 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
876 857
877 kfree(ctx->table->name); 858 kfree(ctx->table->name);
878 kfree(ctx->table); 859 kfree(ctx->table);
879 module_put(ctx->afi->owner);
880} 860}
881 861
882int nft_register_chain_type(const struct nf_chain_type *ctype) 862int nft_register_chain_type(const struct nf_chain_type *ctype)
@@ -1026,7 +1006,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
1026 1006
1027 if (nft_is_base_chain(chain)) { 1007 if (nft_is_base_chain(chain)) {
1028 const struct nft_base_chain *basechain = nft_base_chain(chain); 1008 const struct nft_base_chain *basechain = nft_base_chain(chain);
1029 const struct nf_hook_ops *ops = &basechain->ops[0]; 1009 const struct nf_hook_ops *ops = &basechain->ops;
1030 struct nlattr *nest; 1010 struct nlattr *nest;
1031 1011
1032 nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); 1012 nest = nla_nest_start(skb, NFTA_CHAIN_HOOK);
@@ -1077,7 +1057,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
1077 goto err; 1057 goto err;
1078 1058
1079 err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq, 1059 err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq,
1080 event, 0, ctx->afi->family, ctx->table, 1060 event, 0, ctx->family, ctx->table,
1081 ctx->chain); 1061 ctx->chain);
1082 if (err < 0) { 1062 if (err < 0) {
1083 kfree_skb(skb); 1063 kfree_skb(skb);
@@ -1095,7 +1075,6 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
1095 struct netlink_callback *cb) 1075 struct netlink_callback *cb)
1096{ 1076{
1097 const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); 1077 const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
1098 const struct nft_af_info *afi;
1099 const struct nft_table *table; 1078 const struct nft_table *table;
1100 const struct nft_chain *chain; 1079 const struct nft_chain *chain;
1101 unsigned int idx = 0, s_idx = cb->args[0]; 1080 unsigned int idx = 0, s_idx = cb->args[0];
@@ -1105,31 +1084,30 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
1105 rcu_read_lock(); 1084 rcu_read_lock();
1106 cb->seq = net->nft.base_seq; 1085 cb->seq = net->nft.base_seq;
1107 1086
1108 list_for_each_entry_rcu(afi, &net->nft.af_info, list) { 1087 list_for_each_entry_rcu(table, &net->nft.tables, list) {
1109 if (family != NFPROTO_UNSPEC && family != afi->family) 1088 if (family != NFPROTO_UNSPEC && family != table->family)
1110 continue; 1089 continue;
1111 1090
1112 list_for_each_entry_rcu(table, &afi->tables, list) { 1091 list_for_each_entry_rcu(chain, &table->chains, list) {
1113 list_for_each_entry_rcu(chain, &table->chains, list) { 1092 if (idx < s_idx)
1114 if (idx < s_idx) 1093 goto cont;
1115 goto cont; 1094 if (idx > s_idx)
1116 if (idx > s_idx) 1095 memset(&cb->args[1], 0,
1117 memset(&cb->args[1], 0, 1096 sizeof(cb->args) - sizeof(cb->args[0]));
1118 sizeof(cb->args) - sizeof(cb->args[0])); 1097 if (!nft_is_active(net, chain))
1119 if (!nft_is_active(net, chain)) 1098 continue;
1120 continue; 1099 if (nf_tables_fill_chain_info(skb, net,
1121 if (nf_tables_fill_chain_info(skb, net, 1100 NETLINK_CB(cb->skb).portid,
1122 NETLINK_CB(cb->skb).portid, 1101 cb->nlh->nlmsg_seq,
1123 cb->nlh->nlmsg_seq, 1102 NFT_MSG_NEWCHAIN,
1124 NFT_MSG_NEWCHAIN, 1103 NLM_F_MULTI,
1125 NLM_F_MULTI, 1104 table->family, table,
1126 afi->family, table, chain) < 0) 1105 chain) < 0)
1127 goto done; 1106 goto done;
1128 1107
1129 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 1108 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1130cont: 1109cont:
1131 idx++; 1110 idx++;
1132 }
1133 } 1111 }
1134 } 1112 }
1135done: 1113done:
@@ -1145,7 +1123,6 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
1145{ 1123{
1146 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 1124 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1147 u8 genmask = nft_genmask_cur(net); 1125 u8 genmask = nft_genmask_cur(net);
1148 const struct nft_af_info *afi;
1149 const struct nft_table *table; 1126 const struct nft_table *table;
1150 const struct nft_chain *chain; 1127 const struct nft_chain *chain;
1151 struct sk_buff *skb2; 1128 struct sk_buff *skb2;
@@ -1159,11 +1136,8 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
1159 return netlink_dump_start(nlsk, skb, nlh, &c); 1136 return netlink_dump_start(nlsk, skb, nlh, &c);
1160 } 1137 }
1161 1138
1162 afi = nf_tables_afinfo_lookup(net, family, false); 1139 table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family,
1163 if (IS_ERR(afi)) 1140 genmask);
1164 return PTR_ERR(afi);
1165
1166 table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
1167 if (IS_ERR(table)) 1141 if (IS_ERR(table))
1168 return PTR_ERR(table); 1142 return PTR_ERR(table);
1169 1143
@@ -1227,13 +1201,13 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
1227static void nft_chain_stats_replace(struct nft_base_chain *chain, 1201static void nft_chain_stats_replace(struct nft_base_chain *chain,
1228 struct nft_stats __percpu *newstats) 1202 struct nft_stats __percpu *newstats)
1229{ 1203{
1204 struct nft_stats __percpu *oldstats;
1205
1230 if (newstats == NULL) 1206 if (newstats == NULL)
1231 return; 1207 return;
1232 1208
1233 if (chain->stats) { 1209 if (chain->stats) {
1234 struct nft_stats __percpu *oldstats = 1210 oldstats = nfnl_dereference(chain->stats, NFNL_SUBSYS_NFTABLES);
1235 nft_dereference(chain->stats);
1236
1237 rcu_assign_pointer(chain->stats, newstats); 1211 rcu_assign_pointer(chain->stats, newstats);
1238 synchronize_rcu(); 1212 synchronize_rcu();
1239 free_percpu(oldstats); 1213 free_percpu(oldstats);
@@ -1252,8 +1226,8 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
1252 free_percpu(basechain->stats); 1226 free_percpu(basechain->stats);
1253 if (basechain->stats) 1227 if (basechain->stats)
1254 static_branch_dec(&nft_counters_enabled); 1228 static_branch_dec(&nft_counters_enabled);
1255 if (basechain->ops[0].dev != NULL) 1229 if (basechain->ops.dev != NULL)
1256 dev_put(basechain->ops[0].dev); 1230 dev_put(basechain->ops.dev);
1257 kfree(chain->name); 1231 kfree(chain->name);
1258 kfree(basechain); 1232 kfree(basechain);
1259 } else { 1233 } else {
@@ -1264,15 +1238,15 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
1264 1238
1265struct nft_chain_hook { 1239struct nft_chain_hook {
1266 u32 num; 1240 u32 num;
1267 u32 priority; 1241 s32 priority;
1268 const struct nf_chain_type *type; 1242 const struct nf_chain_type *type;
1269 struct net_device *dev; 1243 struct net_device *dev;
1270}; 1244};
1271 1245
1272static int nft_chain_parse_hook(struct net *net, 1246static int nft_chain_parse_hook(struct net *net,
1273 const struct nlattr * const nla[], 1247 const struct nlattr * const nla[],
1274 struct nft_af_info *afi, 1248 struct nft_chain_hook *hook, u8 family,
1275 struct nft_chain_hook *hook, bool create) 1249 bool create)
1276{ 1250{
1277 struct nlattr *ha[NFTA_HOOK_MAX + 1]; 1251 struct nlattr *ha[NFTA_HOOK_MAX + 1];
1278 const struct nf_chain_type *type; 1252 const struct nf_chain_type *type;
@@ -1289,27 +1263,29 @@ static int nft_chain_parse_hook(struct net *net,
1289 return -EINVAL; 1263 return -EINVAL;
1290 1264
1291 hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); 1265 hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
1292 if (hook->num >= afi->nhooks)
1293 return -EINVAL;
1294
1295 hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); 1266 hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
1296 1267
1297 type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT]; 1268 type = chain_type[family][NFT_CHAIN_T_DEFAULT];
1298 if (nla[NFTA_CHAIN_TYPE]) { 1269 if (nla[NFTA_CHAIN_TYPE]) {
1299 type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE], 1270 type = nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE],
1300 create); 1271 family, create);
1301 if (IS_ERR(type)) 1272 if (IS_ERR(type))
1302 return PTR_ERR(type); 1273 return PTR_ERR(type);
1303 } 1274 }
1304 if (!(type->hook_mask & (1 << hook->num))) 1275 if (!(type->hook_mask & (1 << hook->num)))
1305 return -EOPNOTSUPP; 1276 return -EOPNOTSUPP;
1277
1278 if (type->type == NFT_CHAIN_T_NAT &&
1279 hook->priority <= NF_IP_PRI_CONNTRACK)
1280 return -EOPNOTSUPP;
1281
1306 if (!try_module_get(type->owner)) 1282 if (!try_module_get(type->owner))
1307 return -ENOENT; 1283 return -ENOENT;
1308 1284
1309 hook->type = type; 1285 hook->type = type;
1310 1286
1311 hook->dev = NULL; 1287 hook->dev = NULL;
1312 if (afi->flags & NFT_AF_NEEDS_DEV) { 1288 if (family == NFPROTO_NETDEV) {
1313 char ifname[IFNAMSIZ]; 1289 char ifname[IFNAMSIZ];
1314 1290
1315 if (!ha[NFTA_HOOK_DEV]) { 1291 if (!ha[NFTA_HOOK_DEV]) {
@@ -1344,12 +1320,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
1344{ 1320{
1345 const struct nlattr * const *nla = ctx->nla; 1321 const struct nlattr * const *nla = ctx->nla;
1346 struct nft_table *table = ctx->table; 1322 struct nft_table *table = ctx->table;
1347 struct nft_af_info *afi = ctx->afi;
1348 struct nft_base_chain *basechain; 1323 struct nft_base_chain *basechain;
1349 struct nft_stats __percpu *stats; 1324 struct nft_stats __percpu *stats;
1350 struct net *net = ctx->net; 1325 struct net *net = ctx->net;
1351 struct nft_chain *chain; 1326 struct nft_chain *chain;
1352 unsigned int i;
1353 int err; 1327 int err;
1354 1328
1355 if (table->use == UINT_MAX) 1329 if (table->use == UINT_MAX)
@@ -1358,9 +1332,8 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
1358 if (nla[NFTA_CHAIN_HOOK]) { 1332 if (nla[NFTA_CHAIN_HOOK]) {
1359 struct nft_chain_hook hook; 1333 struct nft_chain_hook hook;
1360 struct nf_hook_ops *ops; 1334 struct nf_hook_ops *ops;
1361 nf_hookfn *hookfn;
1362 1335
1363 err = nft_chain_parse_hook(net, nla, afi, &hook, create); 1336 err = nft_chain_parse_hook(net, nla, &hook, family, create);
1364 if (err < 0) 1337 if (err < 0)
1365 return err; 1338 return err;
1366 1339
@@ -1384,23 +1357,19 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
1384 static_branch_inc(&nft_counters_enabled); 1357 static_branch_inc(&nft_counters_enabled);
1385 } 1358 }
1386 1359
1387 hookfn = hook.type->hooks[hook.num];
1388 basechain->type = hook.type; 1360 basechain->type = hook.type;
1389 chain = &basechain->chain; 1361 chain = &basechain->chain;
1390 1362
1391 for (i = 0; i < afi->nops; i++) { 1363 ops = &basechain->ops;
1392 ops = &basechain->ops[i]; 1364 ops->pf = family;
1393 ops->pf = family; 1365 ops->hooknum = hook.num;
1394 ops->hooknum = hook.num; 1366 ops->priority = hook.priority;
1395 ops->priority = hook.priority; 1367 ops->priv = chain;
1396 ops->priv = chain; 1368 ops->hook = hook.type->hooks[ops->hooknum];
1397 ops->hook = afi->hooks[ops->hooknum]; 1369 ops->dev = hook.dev;
1398 ops->dev = hook.dev; 1370
1399 if (hookfn) 1371 if (basechain->type->type == NFT_CHAIN_T_NAT)
1400 ops->hook = hookfn; 1372 ops->nat_hook = true;
1401 if (afi->hook_ops_init)
1402 afi->hook_ops_init(ops, i);
1403 }
1404 1373
1405 chain->flags |= NFT_BASE_CHAIN; 1374 chain->flags |= NFT_BASE_CHAIN;
1406 basechain->policy = policy; 1375 basechain->policy = policy;
@@ -1418,7 +1387,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
1418 goto err1; 1387 goto err1;
1419 } 1388 }
1420 1389
1421 err = nf_tables_register_hooks(net, table, chain, afi->nops); 1390 err = nf_tables_register_hook(net, table, chain);
1422 if (err < 0) 1391 if (err < 0)
1423 goto err1; 1392 goto err1;
1424 1393
@@ -1432,7 +1401,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
1432 1401
1433 return 0; 1402 return 0;
1434err2: 1403err2:
1435 nf_tables_unregister_hooks(net, table, chain, afi->nops); 1404 nf_tables_unregister_hook(net, table, chain);
1436err1: 1405err1:
1437 nf_tables_chain_destroy(chain); 1406 nf_tables_chain_destroy(chain);
1438 1407
@@ -1445,20 +1414,19 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
1445 const struct nlattr * const *nla = ctx->nla; 1414 const struct nlattr * const *nla = ctx->nla;
1446 struct nft_table *table = ctx->table; 1415 struct nft_table *table = ctx->table;
1447 struct nft_chain *chain = ctx->chain; 1416 struct nft_chain *chain = ctx->chain;
1448 struct nft_af_info *afi = ctx->afi;
1449 struct nft_base_chain *basechain; 1417 struct nft_base_chain *basechain;
1450 struct nft_stats *stats = NULL; 1418 struct nft_stats *stats = NULL;
1451 struct nft_chain_hook hook; 1419 struct nft_chain_hook hook;
1452 const struct nlattr *name; 1420 const struct nlattr *name;
1453 struct nf_hook_ops *ops; 1421 struct nf_hook_ops *ops;
1454 struct nft_trans *trans; 1422 struct nft_trans *trans;
1455 int err, i; 1423 int err;
1456 1424
1457 if (nla[NFTA_CHAIN_HOOK]) { 1425 if (nla[NFTA_CHAIN_HOOK]) {
1458 if (!nft_is_base_chain(chain)) 1426 if (!nft_is_base_chain(chain))
1459 return -EBUSY; 1427 return -EBUSY;
1460 1428
1461 err = nft_chain_parse_hook(ctx->net, nla, ctx->afi, &hook, 1429 err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family,
1462 create); 1430 create);
1463 if (err < 0) 1431 if (err < 0)
1464 return err; 1432 return err;
@@ -1469,14 +1437,12 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
1469 return -EBUSY; 1437 return -EBUSY;
1470 } 1438 }
1471 1439
1472 for (i = 0; i < afi->nops; i++) { 1440 ops = &basechain->ops;
1473 ops = &basechain->ops[i]; 1441 if (ops->hooknum != hook.num ||
1474 if (ops->hooknum != hook.num || 1442 ops->priority != hook.priority ||
1475 ops->priority != hook.priority || 1443 ops->dev != hook.dev) {
1476 ops->dev != hook.dev) { 1444 nft_chain_release_hook(&hook);
1477 nft_chain_release_hook(&hook); 1445 return -EBUSY;
1478 return -EBUSY;
1479 }
1480 } 1446 }
1481 nft_chain_release_hook(&hook); 1447 nft_chain_release_hook(&hook);
1482 } 1448 }
@@ -1539,7 +1505,6 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
1539 const struct nlattr * uninitialized_var(name); 1505 const struct nlattr * uninitialized_var(name);
1540 u8 genmask = nft_genmask_next(net); 1506 u8 genmask = nft_genmask_next(net);
1541 int family = nfmsg->nfgen_family; 1507 int family = nfmsg->nfgen_family;
1542 struct nft_af_info *afi;
1543 struct nft_table *table; 1508 struct nft_table *table;
1544 struct nft_chain *chain; 1509 struct nft_chain *chain;
1545 u8 policy = NF_ACCEPT; 1510 u8 policy = NF_ACCEPT;
@@ -1549,11 +1514,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
1549 1514
1550 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; 1515 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
1551 1516
1552 afi = nf_tables_afinfo_lookup(net, family, true); 1517 table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family,
1553 if (IS_ERR(afi)) 1518 genmask);
1554 return PTR_ERR(afi);
1555
1556 table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
1557 if (IS_ERR(table)) 1519 if (IS_ERR(table))
1558 return PTR_ERR(table); 1520 return PTR_ERR(table);
1559 1521
@@ -1593,7 +1555,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
1593 } 1555 }
1594 } 1556 }
1595 1557
1596 nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); 1558 nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
1597 1559
1598 if (chain != NULL) { 1560 if (chain != NULL) {
1599 if (nlh->nlmsg_flags & NLM_F_EXCL) 1561 if (nlh->nlmsg_flags & NLM_F_EXCL)
@@ -1614,24 +1576,26 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
1614{ 1576{
1615 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 1577 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1616 u8 genmask = nft_genmask_next(net); 1578 u8 genmask = nft_genmask_next(net);
1617 struct nft_af_info *afi;
1618 struct nft_table *table; 1579 struct nft_table *table;
1619 struct nft_chain *chain; 1580 struct nft_chain *chain;
1620 struct nft_rule *rule; 1581 struct nft_rule *rule;
1621 int family = nfmsg->nfgen_family; 1582 int family = nfmsg->nfgen_family;
1622 struct nft_ctx ctx; 1583 struct nft_ctx ctx;
1584 u64 handle;
1623 u32 use; 1585 u32 use;
1624 int err; 1586 int err;
1625 1587
1626 afi = nf_tables_afinfo_lookup(net, family, false); 1588 table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family,
1627 if (IS_ERR(afi)) 1589 genmask);
1628 return PTR_ERR(afi);
1629
1630 table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
1631 if (IS_ERR(table)) 1590 if (IS_ERR(table))
1632 return PTR_ERR(table); 1591 return PTR_ERR(table);
1633 1592
1634 chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); 1593 if (nla[NFTA_CHAIN_HANDLE]) {
1594 handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
1595 chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
1596 } else {
1597 chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
1598 }
1635 if (IS_ERR(chain)) 1599 if (IS_ERR(chain))
1636 return PTR_ERR(chain); 1600 return PTR_ERR(chain);
1637 1601
@@ -1639,7 +1603,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
1639 chain->use > 0) 1603 chain->use > 0)
1640 return -EBUSY; 1604 return -EBUSY;
1641 1605
1642 nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); 1606 nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
1643 1607
1644 use = chain->use; 1608 use = chain->use;
1645 list_for_each_entry(rule, &chain->rules, list) { 1609 list_for_each_entry(rule, &chain->rules, list) {
@@ -1804,7 +1768,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
1804 if (err < 0) 1768 if (err < 0)
1805 return err; 1769 return err;
1806 1770
1807 type = nft_expr_type_get(ctx->afi->family, tb[NFTA_EXPR_NAME]); 1771 type = nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]);
1808 if (IS_ERR(type)) 1772 if (IS_ERR(type))
1809 return PTR_ERR(type); 1773 return PTR_ERR(type);
1810 1774
@@ -2027,7 +1991,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx,
2027 goto err; 1991 goto err;
2028 1992
2029 err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, 1993 err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq,
2030 event, 0, ctx->afi->family, ctx->table, 1994 event, 0, ctx->family, ctx->table,
2031 ctx->chain, rule); 1995 ctx->chain, rule);
2032 if (err < 0) { 1996 if (err < 0) {
2033 kfree_skb(skb); 1997 kfree_skb(skb);
@@ -2051,7 +2015,6 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
2051{ 2015{
2052 const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); 2016 const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
2053 const struct nft_rule_dump_ctx *ctx = cb->data; 2017 const struct nft_rule_dump_ctx *ctx = cb->data;
2054 const struct nft_af_info *afi;
2055 const struct nft_table *table; 2018 const struct nft_table *table;
2056 const struct nft_chain *chain; 2019 const struct nft_chain *chain;
2057 const struct nft_rule *rule; 2020 const struct nft_rule *rule;
@@ -2062,39 +2025,37 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
2062 rcu_read_lock(); 2025 rcu_read_lock();
2063 cb->seq = net->nft.base_seq; 2026 cb->seq = net->nft.base_seq;
2064 2027
2065 list_for_each_entry_rcu(afi, &net->nft.af_info, list) { 2028 list_for_each_entry_rcu(table, &net->nft.tables, list) {
2066 if (family != NFPROTO_UNSPEC && family != afi->family) 2029 if (family != NFPROTO_UNSPEC && family != table->family)
2067 continue; 2030 continue;
2068 2031
2069 list_for_each_entry_rcu(table, &afi->tables, list) { 2032 if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0)
2070 if (ctx && ctx->table && 2033 continue;
2071 strcmp(ctx->table, table->name) != 0) 2034
2035 list_for_each_entry_rcu(chain, &table->chains, list) {
2036 if (ctx && ctx->chain &&
2037 strcmp(ctx->chain, chain->name) != 0)
2072 continue; 2038 continue;
2073 2039
2074 list_for_each_entry_rcu(chain, &table->chains, list) { 2040 list_for_each_entry_rcu(rule, &chain->rules, list) {
2075 if (ctx && ctx->chain && 2041 if (!nft_is_active(net, rule))
2076 strcmp(ctx->chain, chain->name) != 0) 2042 goto cont;
2077 continue; 2043 if (idx < s_idx)
2078 2044 goto cont;
2079 list_for_each_entry_rcu(rule, &chain->rules, list) { 2045 if (idx > s_idx)
2080 if (!nft_is_active(net, rule)) 2046 memset(&cb->args[1], 0,
2081 goto cont; 2047 sizeof(cb->args) - sizeof(cb->args[0]));
2082 if (idx < s_idx) 2048 if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
2083 goto cont; 2049 cb->nlh->nlmsg_seq,
2084 if (idx > s_idx) 2050 NFT_MSG_NEWRULE,
2085 memset(&cb->args[1], 0, 2051 NLM_F_MULTI | NLM_F_APPEND,
2086 sizeof(cb->args) - sizeof(cb->args[0])); 2052 table->family,
2087 if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid, 2053 table, chain, rule) < 0)
2088 cb->nlh->nlmsg_seq, 2054 goto done;
2089 NFT_MSG_NEWRULE, 2055
2090 NLM_F_MULTI | NLM_F_APPEND, 2056 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2091 afi->family, table, chain, rule) < 0)
2092 goto done;
2093
2094 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2095cont: 2057cont:
2096 idx++; 2058 idx++;
2097 }
2098 } 2059 }
2099 } 2060 }
2100 } 2061 }
@@ -2124,7 +2085,6 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
2124{ 2085{
2125 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 2086 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2126 u8 genmask = nft_genmask_cur(net); 2087 u8 genmask = nft_genmask_cur(net);
2127 const struct nft_af_info *afi;
2128 const struct nft_table *table; 2088 const struct nft_table *table;
2129 const struct nft_chain *chain; 2089 const struct nft_chain *chain;
2130 const struct nft_rule *rule; 2090 const struct nft_rule *rule;
@@ -2168,11 +2128,8 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
2168 return netlink_dump_start(nlsk, skb, nlh, &c); 2128 return netlink_dump_start(nlsk, skb, nlh, &c);
2169 } 2129 }
2170 2130
2171 afi = nf_tables_afinfo_lookup(net, family, false); 2131 table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family,
2172 if (IS_ERR(afi)) 2132 genmask);
2173 return PTR_ERR(afi);
2174
2175 table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
2176 if (IS_ERR(table)) 2133 if (IS_ERR(table))
2177 return PTR_ERR(table); 2134 return PTR_ERR(table);
2178 2135
@@ -2229,7 +2186,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
2229{ 2186{
2230 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 2187 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2231 u8 genmask = nft_genmask_next(net); 2188 u8 genmask = nft_genmask_next(net);
2232 struct nft_af_info *afi; 2189 int family = nfmsg->nfgen_family;
2233 struct nft_table *table; 2190 struct nft_table *table;
2234 struct nft_chain *chain; 2191 struct nft_chain *chain;
2235 struct nft_rule *rule, *old_rule = NULL; 2192 struct nft_rule *rule, *old_rule = NULL;
@@ -2245,11 +2202,8 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
2245 2202
2246 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; 2203 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
2247 2204
2248 afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); 2205 table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family,
2249 if (IS_ERR(afi)) 2206 genmask);
2250 return PTR_ERR(afi);
2251
2252 table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
2253 if (IS_ERR(table)) 2207 if (IS_ERR(table))
2254 return PTR_ERR(table); 2208 return PTR_ERR(table);
2255 2209
@@ -2288,7 +2242,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
2288 return PTR_ERR(old_rule); 2242 return PTR_ERR(old_rule);
2289 } 2243 }
2290 2244
2291 nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); 2245 nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
2292 2246
2293 n = 0; 2247 n = 0;
2294 size = 0; 2248 size = 0;
@@ -2412,18 +2366,14 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
2412{ 2366{
2413 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 2367 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2414 u8 genmask = nft_genmask_next(net); 2368 u8 genmask = nft_genmask_next(net);
2415 struct nft_af_info *afi;
2416 struct nft_table *table; 2369 struct nft_table *table;
2417 struct nft_chain *chain = NULL; 2370 struct nft_chain *chain = NULL;
2418 struct nft_rule *rule; 2371 struct nft_rule *rule;
2419 int family = nfmsg->nfgen_family, err = 0; 2372 int family = nfmsg->nfgen_family, err = 0;
2420 struct nft_ctx ctx; 2373 struct nft_ctx ctx;
2421 2374
2422 afi = nf_tables_afinfo_lookup(net, family, false); 2375 table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family,
2423 if (IS_ERR(afi)) 2376 genmask);
2424 return PTR_ERR(afi);
2425
2426 table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
2427 if (IS_ERR(table)) 2377 if (IS_ERR(table))
2428 return PTR_ERR(table); 2378 return PTR_ERR(table);
2429 2379
@@ -2434,7 +2384,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
2434 return PTR_ERR(chain); 2384 return PTR_ERR(chain);
2435 } 2385 }
2436 2386
2437 nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); 2387 nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
2438 2388
2439 if (chain) { 2389 if (chain) {
2440 if (nla[NFTA_RULE_HANDLE]) { 2390 if (nla[NFTA_RULE_HANDLE]) {
@@ -2601,6 +2551,7 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
2601 [NFTA_SET_USERDATA] = { .type = NLA_BINARY, 2551 [NFTA_SET_USERDATA] = { .type = NLA_BINARY,
2602 .len = NFT_USERDATA_MAXLEN }, 2552 .len = NFT_USERDATA_MAXLEN },
2603 [NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 }, 2553 [NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 },
2554 [NFTA_SET_HANDLE] = { .type = NLA_U64 },
2604}; 2555};
2605 2556
2606static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { 2557static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
@@ -2614,26 +2565,17 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
2614 u8 genmask) 2565 u8 genmask)
2615{ 2566{
2616 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 2567 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2617 struct nft_af_info *afi = NULL; 2568 int family = nfmsg->nfgen_family;
2618 struct nft_table *table = NULL; 2569 struct nft_table *table = NULL;
2619 2570
2620 if (nfmsg->nfgen_family != NFPROTO_UNSPEC) {
2621 afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
2622 if (IS_ERR(afi))
2623 return PTR_ERR(afi);
2624 }
2625
2626 if (nla[NFTA_SET_TABLE] != NULL) { 2571 if (nla[NFTA_SET_TABLE] != NULL) {
2627 if (afi == NULL) 2572 table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE],
2628 return -EAFNOSUPPORT; 2573 family, genmask);
2629
2630 table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE],
2631 genmask);
2632 if (IS_ERR(table)) 2574 if (IS_ERR(table))
2633 return PTR_ERR(table); 2575 return PTR_ERR(table);
2634 } 2576 }
2635 2577
2636 nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla); 2578 nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
2637 return 0; 2579 return 0;
2638} 2580}
2639 2581
@@ -2653,6 +2595,22 @@ static struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
2653 return ERR_PTR(-ENOENT); 2595 return ERR_PTR(-ENOENT);
2654} 2596}
2655 2597
2598static struct nft_set *nf_tables_set_lookup_byhandle(const struct nft_table *table,
2599 const struct nlattr *nla, u8 genmask)
2600{
2601 struct nft_set *set;
2602
2603 if (nla == NULL)
2604 return ERR_PTR(-EINVAL);
2605
2606 list_for_each_entry(set, &table->sets, list) {
2607 if (be64_to_cpu(nla_get_be64(nla)) == set->handle &&
2608 nft_active_genmask(set, genmask))
2609 return set;
2610 }
2611 return ERR_PTR(-ENOENT);
2612}
2613
2656static struct nft_set *nf_tables_set_lookup_byid(const struct net *net, 2614static struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
2657 const struct nlattr *nla, 2615 const struct nlattr *nla,
2658 u8 genmask) 2616 u8 genmask)
@@ -2760,7 +2718,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
2760 goto nla_put_failure; 2718 goto nla_put_failure;
2761 2719
2762 nfmsg = nlmsg_data(nlh); 2720 nfmsg = nlmsg_data(nlh);
2763 nfmsg->nfgen_family = ctx->afi->family; 2721 nfmsg->nfgen_family = ctx->family;
2764 nfmsg->version = NFNETLINK_V0; 2722 nfmsg->version = NFNETLINK_V0;
2765 nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff); 2723 nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff);
2766 2724
@@ -2768,6 +2726,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
2768 goto nla_put_failure; 2726 goto nla_put_failure;
2769 if (nla_put_string(skb, NFTA_SET_NAME, set->name)) 2727 if (nla_put_string(skb, NFTA_SET_NAME, set->name))
2770 goto nla_put_failure; 2728 goto nla_put_failure;
2729 if (nla_put_be64(skb, NFTA_SET_HANDLE, cpu_to_be64(set->handle),
2730 NFTA_SET_PAD))
2731 goto nla_put_failure;
2771 if (set->flags != 0) 2732 if (set->flags != 0)
2772 if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags))) 2733 if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags)))
2773 goto nla_put_failure; 2734 goto nla_put_failure;
@@ -2852,10 +2813,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
2852{ 2813{
2853 const struct nft_set *set; 2814 const struct nft_set *set;
2854 unsigned int idx, s_idx = cb->args[0]; 2815 unsigned int idx, s_idx = cb->args[0];
2855 struct nft_af_info *afi;
2856 struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; 2816 struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
2857 struct net *net = sock_net(skb->sk); 2817 struct net *net = sock_net(skb->sk);
2858 int cur_family = cb->args[3];
2859 struct nft_ctx *ctx = cb->data, ctx_set; 2818 struct nft_ctx *ctx = cb->data, ctx_set;
2860 2819
2861 if (cb->args[1]) 2820 if (cb->args[1])
@@ -2864,51 +2823,44 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
2864 rcu_read_lock(); 2823 rcu_read_lock();
2865 cb->seq = net->nft.base_seq; 2824 cb->seq = net->nft.base_seq;
2866 2825
2867 list_for_each_entry_rcu(afi, &net->nft.af_info, list) { 2826 list_for_each_entry_rcu(table, &net->nft.tables, list) {
2868 if (ctx->afi && ctx->afi != afi) 2827 if (ctx->family != NFPROTO_UNSPEC &&
2828 ctx->family != table->family)
2829 continue;
2830
2831 if (ctx->table && ctx->table != table)
2869 continue; 2832 continue;
2870 2833
2871 if (cur_family) { 2834 if (cur_table) {
2872 if (afi->family != cur_family) 2835 if (cur_table != table)
2873 continue; 2836 continue;
2874 2837
2875 cur_family = 0; 2838 cur_table = NULL;
2876 } 2839 }
2877 list_for_each_entry_rcu(table, &afi->tables, list) { 2840 idx = 0;
2878 if (ctx->table && ctx->table != table) 2841 list_for_each_entry_rcu(set, &table->sets, list) {
2879 continue; 2842 if (idx < s_idx)
2843 goto cont;
2844 if (!nft_is_active(net, set))
2845 goto cont;
2880 2846
2881 if (cur_table) { 2847 ctx_set = *ctx;
2882 if (cur_table != table) 2848 ctx_set.table = table;
2883 continue; 2849 ctx_set.family = table->family;
2884 2850
2885 cur_table = NULL; 2851 if (nf_tables_fill_set(skb, &ctx_set, set,
2852 NFT_MSG_NEWSET,
2853 NLM_F_MULTI) < 0) {
2854 cb->args[0] = idx;
2855 cb->args[2] = (unsigned long) table;
2856 goto done;
2886 } 2857 }
2887 idx = 0; 2858 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2888 list_for_each_entry_rcu(set, &table->sets, list) {
2889 if (idx < s_idx)
2890 goto cont;
2891 if (!nft_is_active(net, set))
2892 goto cont;
2893
2894 ctx_set = *ctx;
2895 ctx_set.table = table;
2896 ctx_set.afi = afi;
2897 if (nf_tables_fill_set(skb, &ctx_set, set,
2898 NFT_MSG_NEWSET,
2899 NLM_F_MULTI) < 0) {
2900 cb->args[0] = idx;
2901 cb->args[2] = (unsigned long) table;
2902 cb->args[3] = afi->family;
2903 goto done;
2904 }
2905 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2906cont: 2859cont:
2907 idx++; 2860 idx++;
2908 }
2909 if (s_idx)
2910 s_idx = 0;
2911 } 2861 }
2862 if (s_idx)
2863 s_idx = 0;
2912 } 2864 }
2913 cb->args[1] = 1; 2865 cb->args[1] = 1;
2914done: 2866done:
@@ -3006,8 +2958,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
3006{ 2958{
3007 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 2959 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
3008 u8 genmask = nft_genmask_next(net); 2960 u8 genmask = nft_genmask_next(net);
2961 int family = nfmsg->nfgen_family;
3009 const struct nft_set_ops *ops; 2962 const struct nft_set_ops *ops;
3010 struct nft_af_info *afi;
3011 struct nft_table *table; 2963 struct nft_table *table;
3012 struct nft_set *set; 2964 struct nft_set *set;
3013 struct nft_ctx ctx; 2965 struct nft_ctx ctx;
@@ -3114,15 +3066,12 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
3114 3066
3115 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; 3067 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
3116 3068
3117 afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); 3069 table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], family,
3118 if (IS_ERR(afi)) 3070 genmask);
3119 return PTR_ERR(afi);
3120
3121 table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], genmask);
3122 if (IS_ERR(table)) 3071 if (IS_ERR(table))
3123 return PTR_ERR(table); 3072 return PTR_ERR(table);
3124 3073
3125 nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); 3074 nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
3126 3075
3127 set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask); 3076 set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask);
3128 if (IS_ERR(set)) { 3077 if (IS_ERR(set)) {
@@ -3188,6 +3137,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
3188 set->udata = udata; 3137 set->udata = udata;
3189 set->timeout = timeout; 3138 set->timeout = timeout;
3190 set->gc_int = gc_int; 3139 set->gc_int = gc_int;
3140 set->handle = nf_tables_alloc_handle(table);
3191 3141
3192 err = ops->init(set, &desc, nla); 3142 err = ops->init(set, &desc, nla);
3193 if (err < 0) 3143 if (err < 0)
@@ -3245,7 +3195,10 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
3245 if (err < 0) 3195 if (err < 0)
3246 return err; 3196 return err;
3247 3197
3248 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); 3198 if (nla[NFTA_SET_HANDLE])
3199 set = nf_tables_set_lookup_byhandle(ctx.table, nla[NFTA_SET_HANDLE], genmask);
3200 else
3201 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
3249 if (IS_ERR(set)) 3202 if (IS_ERR(set))
3250 return PTR_ERR(set); 3203 return PTR_ERR(set);
3251 3204
@@ -3277,7 +3230,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
3277 struct nft_set_binding *i; 3230 struct nft_set_binding *i;
3278 struct nft_set_iter iter; 3231 struct nft_set_iter iter;
3279 3232
3280 if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS) 3233 if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
3281 return -EBUSY; 3234 return -EBUSY;
3282 3235
3283 if (binding->flags & NFT_SET_MAP) { 3236 if (binding->flags & NFT_SET_MAP) {
@@ -3312,7 +3265,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
3312{ 3265{
3313 list_del_rcu(&binding->list); 3266 list_del_rcu(&binding->list);
3314 3267
3315 if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS && 3268 if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
3316 nft_is_active(ctx->net, set)) 3269 nft_is_active(ctx->net, set))
3317 nf_tables_set_destroy(ctx, set); 3270 nf_tables_set_destroy(ctx, set);
3318} 3271}
@@ -3380,19 +3333,15 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
3380 u8 genmask) 3333 u8 genmask)
3381{ 3334{
3382 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 3335 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
3383 struct nft_af_info *afi; 3336 int family = nfmsg->nfgen_family;
3384 struct nft_table *table; 3337 struct nft_table *table;
3385 3338
3386 afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); 3339 table = nf_tables_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE],
3387 if (IS_ERR(afi)) 3340 family, genmask);
3388 return PTR_ERR(afi);
3389
3390 table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE],
3391 genmask);
3392 if (IS_ERR(table)) 3341 if (IS_ERR(table))
3393 return PTR_ERR(table); 3342 return PTR_ERR(table);
3394 3343
3395 nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla); 3344 nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
3396 return 0; 3345 return 0;
3397} 3346}
3398 3347
@@ -3497,7 +3446,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
3497{ 3446{
3498 struct nft_set_dump_ctx *dump_ctx = cb->data; 3447 struct nft_set_dump_ctx *dump_ctx = cb->data;
3499 struct net *net = sock_net(skb->sk); 3448 struct net *net = sock_net(skb->sk);
3500 struct nft_af_info *afi;
3501 struct nft_table *table; 3449 struct nft_table *table;
3502 struct nft_set *set; 3450 struct nft_set *set;
3503 struct nft_set_dump_args args; 3451 struct nft_set_dump_args args;
@@ -3509,21 +3457,19 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
3509 int event; 3457 int event;
3510 3458
3511 rcu_read_lock(); 3459 rcu_read_lock();
3512 list_for_each_entry_rcu(afi, &net->nft.af_info, list) { 3460 list_for_each_entry_rcu(table, &net->nft.tables, list) {
3513 if (afi != dump_ctx->ctx.afi) 3461 if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
3462 dump_ctx->ctx.family != table->family)
3514 continue; 3463 continue;
3515 3464
3516 list_for_each_entry_rcu(table, &afi->tables, list) { 3465 if (table != dump_ctx->ctx.table)
3517 if (table != dump_ctx->ctx.table) 3466 continue;
3518 continue;
3519 3467
3520 list_for_each_entry_rcu(set, &table->sets, list) { 3468 list_for_each_entry_rcu(set, &table->sets, list) {
3521 if (set == dump_ctx->set) { 3469 if (set == dump_ctx->set) {
3522 set_found = true; 3470 set_found = true;
3523 break; 3471 break;
3524 }
3525 } 3472 }
3526 break;
3527 } 3473 }
3528 break; 3474 break;
3529 } 3475 }
@@ -3543,7 +3489,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
3543 goto nla_put_failure; 3489 goto nla_put_failure;
3544 3490
3545 nfmsg = nlmsg_data(nlh); 3491 nfmsg = nlmsg_data(nlh);
3546 nfmsg->nfgen_family = afi->family; 3492 nfmsg->nfgen_family = table->family;
3547 nfmsg->version = NFNETLINK_V0; 3493 nfmsg->version = NFNETLINK_V0;
3548 nfmsg->res_id = htons(net->nft.base_seq & 0xffff); 3494 nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
3549 3495
@@ -3606,7 +3552,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
3606 goto nla_put_failure; 3552 goto nla_put_failure;
3607 3553
3608 nfmsg = nlmsg_data(nlh); 3554 nfmsg = nlmsg_data(nlh);
3609 nfmsg->nfgen_family = ctx->afi->family; 3555 nfmsg->nfgen_family = ctx->family;
3610 nfmsg->version = NFNETLINK_V0; 3556 nfmsg->version = NFNETLINK_V0;
3611 nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff); 3557 nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff);
3612 3558
@@ -3963,7 +3909,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
3963 list_for_each_entry(binding, &set->bindings, list) { 3909 list_for_each_entry(binding, &set->bindings, list) {
3964 struct nft_ctx bind_ctx = { 3910 struct nft_ctx bind_ctx = {
3965 .net = ctx->net, 3911 .net = ctx->net,
3966 .afi = ctx->afi, 3912 .family = ctx->family,
3967 .table = ctx->table, 3913 .table = ctx->table,
3968 .chain = (struct nft_chain *)binding->chain, 3914 .chain = (struct nft_chain *)binding->chain,
3969 }; 3915 };
@@ -4382,6 +4328,21 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
4382} 4328}
4383EXPORT_SYMBOL_GPL(nf_tables_obj_lookup); 4329EXPORT_SYMBOL_GPL(nf_tables_obj_lookup);
4384 4330
4331struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table,
4332 const struct nlattr *nla,
4333 u32 objtype, u8 genmask)
4334{
4335 struct nft_object *obj;
4336
4337 list_for_each_entry(obj, &table->objects, list) {
4338 if (be64_to_cpu(nla_get_be64(nla)) == obj->handle &&
4339 objtype == obj->ops->type->type &&
4340 nft_active_genmask(obj, genmask))
4341 return obj;
4342 }
4343 return ERR_PTR(-ENOENT);
4344}
4345
4385static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = { 4346static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
4386 [NFTA_OBJ_TABLE] = { .type = NLA_STRING, 4347 [NFTA_OBJ_TABLE] = { .type = NLA_STRING,
4387 .len = NFT_TABLE_MAXNAMELEN - 1 }, 4348 .len = NFT_TABLE_MAXNAMELEN - 1 },
@@ -4389,6 +4350,7 @@ static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
4389 .len = NFT_OBJ_MAXNAMELEN - 1 }, 4350 .len = NFT_OBJ_MAXNAMELEN - 1 },
4390 [NFTA_OBJ_TYPE] = { .type = NLA_U32 }, 4351 [NFTA_OBJ_TYPE] = { .type = NLA_U32 },
4391 [NFTA_OBJ_DATA] = { .type = NLA_NESTED }, 4352 [NFTA_OBJ_DATA] = { .type = NLA_NESTED },
4353 [NFTA_OBJ_HANDLE] = { .type = NLA_U64},
4392}; 4354};
4393 4355
4394static struct nft_object *nft_obj_init(const struct nft_ctx *ctx, 4356static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
@@ -4494,7 +4456,6 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
4494 const struct nft_object_type *type; 4456 const struct nft_object_type *type;
4495 u8 genmask = nft_genmask_next(net); 4457 u8 genmask = nft_genmask_next(net);
4496 int family = nfmsg->nfgen_family; 4458 int family = nfmsg->nfgen_family;
4497 struct nft_af_info *afi;
4498 struct nft_table *table; 4459 struct nft_table *table;
4499 struct nft_object *obj; 4460 struct nft_object *obj;
4500 struct nft_ctx ctx; 4461 struct nft_ctx ctx;
@@ -4506,11 +4467,8 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
4506 !nla[NFTA_OBJ_DATA]) 4467 !nla[NFTA_OBJ_DATA])
4507 return -EINVAL; 4468 return -EINVAL;
4508 4469
4509 afi = nf_tables_afinfo_lookup(net, family, true); 4470 table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
4510 if (IS_ERR(afi)) 4471 genmask);
4511 return PTR_ERR(afi);
4512
4513 table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask);
4514 if (IS_ERR(table)) 4472 if (IS_ERR(table))
4515 return PTR_ERR(table); 4473 return PTR_ERR(table);
4516 4474
@@ -4528,7 +4486,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
4528 return 0; 4486 return 0;
4529 } 4487 }
4530 4488
4531 nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); 4489 nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
4532 4490
4533 type = nft_obj_type_get(objtype); 4491 type = nft_obj_type_get(objtype);
4534 if (IS_ERR(type)) 4492 if (IS_ERR(type))
@@ -4540,6 +4498,8 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
4540 goto err1; 4498 goto err1;
4541 } 4499 }
4542 obj->table = table; 4500 obj->table = table;
4501 obj->handle = nf_tables_alloc_handle(table);
4502
4543 obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL); 4503 obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
4544 if (!obj->name) { 4504 if (!obj->name) {
4545 err = -ENOMEM; 4505 err = -ENOMEM;
@@ -4586,7 +4546,9 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
4586 nla_put_string(skb, NFTA_OBJ_NAME, obj->name) || 4546 nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
4587 nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || 4547 nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
4588 nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || 4548 nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
4589 nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset)) 4549 nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) ||
4550 nla_put_be64(skb, NFTA_OBJ_HANDLE, cpu_to_be64(obj->handle),
4551 NFTA_OBJ_PAD))
4590 goto nla_put_failure; 4552 goto nla_put_failure;
4591 4553
4592 nlmsg_end(skb, nlh); 4554 nlmsg_end(skb, nlh);
@@ -4605,7 +4567,6 @@ struct nft_obj_filter {
4605static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) 4567static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
4606{ 4568{
4607 const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); 4569 const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
4608 const struct nft_af_info *afi;
4609 const struct nft_table *table; 4570 const struct nft_table *table;
4610 unsigned int idx = 0, s_idx = cb->args[0]; 4571 unsigned int idx = 0, s_idx = cb->args[0];
4611 struct nft_obj_filter *filter = cb->data; 4572 struct nft_obj_filter *filter = cb->data;
@@ -4620,38 +4581,37 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
4620 rcu_read_lock(); 4581 rcu_read_lock();
4621 cb->seq = net->nft.base_seq; 4582 cb->seq = net->nft.base_seq;
4622 4583
4623 list_for_each_entry_rcu(afi, &net->nft.af_info, list) { 4584 list_for_each_entry_rcu(table, &net->nft.tables, list) {
4624 if (family != NFPROTO_UNSPEC && family != afi->family) 4585 if (family != NFPROTO_UNSPEC && family != table->family)
4625 continue; 4586 continue;
4626 4587
4627 list_for_each_entry_rcu(table, &afi->tables, list) { 4588 list_for_each_entry_rcu(obj, &table->objects, list) {
4628 list_for_each_entry_rcu(obj, &table->objects, list) { 4589 if (!nft_is_active(net, obj))
4629 if (!nft_is_active(net, obj)) 4590 goto cont;
4630 goto cont; 4591 if (idx < s_idx)
4631 if (idx < s_idx) 4592 goto cont;
4632 goto cont; 4593 if (idx > s_idx)
4633 if (idx > s_idx) 4594 memset(&cb->args[1], 0,
4634 memset(&cb->args[1], 0, 4595 sizeof(cb->args) - sizeof(cb->args[0]));
4635 sizeof(cb->args) - sizeof(cb->args[0])); 4596 if (filter && filter->table[0] &&
4636 if (filter && filter->table[0] && 4597 strcmp(filter->table, table->name))
4637 strcmp(filter->table, table->name)) 4598 goto cont;
4638 goto cont; 4599 if (filter &&
4639 if (filter && 4600 filter->type != NFT_OBJECT_UNSPEC &&
4640 filter->type != NFT_OBJECT_UNSPEC && 4601 obj->ops->type->type != filter->type)
4641 obj->ops->type->type != filter->type) 4602 goto cont;
4642 goto cont;
4643 4603
4644 if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid, 4604 if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
4645 cb->nlh->nlmsg_seq, 4605 cb->nlh->nlmsg_seq,
4646 NFT_MSG_NEWOBJ, 4606 NFT_MSG_NEWOBJ,
4647 NLM_F_MULTI | NLM_F_APPEND, 4607 NLM_F_MULTI | NLM_F_APPEND,
4648 afi->family, table, obj, reset) < 0) 4608 table->family, table,
4649 goto done; 4609 obj, reset) < 0)
4610 goto done;
4650 4611
4651 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 4612 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
4652cont: 4613cont:
4653 idx++; 4614 idx++;
4654 }
4655 } 4615 }
4656 } 4616 }
4657done: 4617done:
@@ -4703,7 +4663,6 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
4703 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 4663 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
4704 u8 genmask = nft_genmask_cur(net); 4664 u8 genmask = nft_genmask_cur(net);
4705 int family = nfmsg->nfgen_family; 4665 int family = nfmsg->nfgen_family;
4706 const struct nft_af_info *afi;
4707 const struct nft_table *table; 4666 const struct nft_table *table;
4708 struct nft_object *obj; 4667 struct nft_object *obj;
4709 struct sk_buff *skb2; 4668 struct sk_buff *skb2;
@@ -4734,11 +4693,8 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
4734 !nla[NFTA_OBJ_TYPE]) 4693 !nla[NFTA_OBJ_TYPE])
4735 return -EINVAL; 4694 return -EINVAL;
4736 4695
4737 afi = nf_tables_afinfo_lookup(net, family, false); 4696 table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
4738 if (IS_ERR(afi)) 4697 genmask);
4739 return PTR_ERR(afi);
4740
4741 table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask);
4742 if (IS_ERR(table)) 4698 if (IS_ERR(table))
4743 return PTR_ERR(table); 4699 return PTR_ERR(table);
4744 4700
@@ -4784,32 +4740,33 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
4784 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 4740 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
4785 u8 genmask = nft_genmask_next(net); 4741 u8 genmask = nft_genmask_next(net);
4786 int family = nfmsg->nfgen_family; 4742 int family = nfmsg->nfgen_family;
4787 struct nft_af_info *afi;
4788 struct nft_table *table; 4743 struct nft_table *table;
4789 struct nft_object *obj; 4744 struct nft_object *obj;
4790 struct nft_ctx ctx; 4745 struct nft_ctx ctx;
4791 u32 objtype; 4746 u32 objtype;
4792 4747
4793 if (!nla[NFTA_OBJ_TYPE] || 4748 if (!nla[NFTA_OBJ_TYPE] ||
4794 !nla[NFTA_OBJ_NAME]) 4749 (!nla[NFTA_OBJ_NAME] && !nla[NFTA_OBJ_HANDLE]))
4795 return -EINVAL; 4750 return -EINVAL;
4796 4751
4797 afi = nf_tables_afinfo_lookup(net, family, true); 4752 table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
4798 if (IS_ERR(afi)) 4753 genmask);
4799 return PTR_ERR(afi);
4800
4801 table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask);
4802 if (IS_ERR(table)) 4754 if (IS_ERR(table))
4803 return PTR_ERR(table); 4755 return PTR_ERR(table);
4804 4756
4805 objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); 4757 objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
4806 obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); 4758 if (nla[NFTA_OBJ_HANDLE])
4759 obj = nf_tables_obj_lookup_byhandle(table, nla[NFTA_OBJ_HANDLE],
4760 objtype, genmask);
4761 else
4762 obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME],
4763 objtype, genmask);
4807 if (IS_ERR(obj)) 4764 if (IS_ERR(obj))
4808 return PTR_ERR(obj); 4765 return PTR_ERR(obj);
4809 if (obj->use > 0) 4766 if (obj->use > 0)
4810 return -EBUSY; 4767 return -EBUSY;
4811 4768
4812 nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); 4769 nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
4813 4770
4814 return nft_delobj(&ctx, obj); 4771 return nft_delobj(&ctx, obj);
4815} 4772}
@@ -4847,7 +4804,608 @@ static void nf_tables_obj_notify(const struct nft_ctx *ctx,
4847 struct nft_object *obj, int event) 4804 struct nft_object *obj, int event)
4848{ 4805{
4849 nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event, 4806 nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event,
4850 ctx->afi->family, ctx->report, GFP_KERNEL); 4807 ctx->family, ctx->report, GFP_KERNEL);
4808}
4809
4810/*
4811 * Flow tables
4812 */
4813void nft_register_flowtable_type(struct nf_flowtable_type *type)
4814{
4815 nfnl_lock(NFNL_SUBSYS_NFTABLES);
4816 list_add_tail_rcu(&type->list, &nf_tables_flowtables);
4817 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
4818}
4819EXPORT_SYMBOL_GPL(nft_register_flowtable_type);
4820
4821void nft_unregister_flowtable_type(struct nf_flowtable_type *type)
4822{
4823 nfnl_lock(NFNL_SUBSYS_NFTABLES);
4824 list_del_rcu(&type->list);
4825 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
4826}
4827EXPORT_SYMBOL_GPL(nft_unregister_flowtable_type);
4828
4829static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = {
4830 [NFTA_FLOWTABLE_TABLE] = { .type = NLA_STRING,
4831 .len = NFT_NAME_MAXLEN - 1 },
4832 [NFTA_FLOWTABLE_NAME] = { .type = NLA_STRING,
4833 .len = NFT_NAME_MAXLEN - 1 },
4834 [NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED },
4835 [NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 },
4836};
4837
4838struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
4839 const struct nlattr *nla,
4840 u8 genmask)
4841{
4842 struct nft_flowtable *flowtable;
4843
4844 list_for_each_entry(flowtable, &table->flowtables, list) {
4845 if (!nla_strcmp(nla, flowtable->name) &&
4846 nft_active_genmask(flowtable, genmask))
4847 return flowtable;
4848 }
4849 return ERR_PTR(-ENOENT);
4850}
4851EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup);
4852
4853struct nft_flowtable *
4854nf_tables_flowtable_lookup_byhandle(const struct nft_table *table,
4855 const struct nlattr *nla, u8 genmask)
4856{
4857 struct nft_flowtable *flowtable;
4858
4859 list_for_each_entry(flowtable, &table->flowtables, list) {
4860 if (be64_to_cpu(nla_get_be64(nla)) == flowtable->handle &&
4861 nft_active_genmask(flowtable, genmask))
4862 return flowtable;
4863 }
4864 return ERR_PTR(-ENOENT);
4865}
4866
4867#define NFT_FLOWTABLE_DEVICE_MAX 8
4868
4869static int nf_tables_parse_devices(const struct nft_ctx *ctx,
4870 const struct nlattr *attr,
4871 struct net_device *dev_array[], int *len)
4872{
4873 const struct nlattr *tmp;
4874 struct net_device *dev;
4875 char ifname[IFNAMSIZ];
4876 int rem, n = 0, err;
4877
4878 nla_for_each_nested(tmp, attr, rem) {
4879 if (nla_type(tmp) != NFTA_DEVICE_NAME) {
4880 err = -EINVAL;
4881 goto err1;
4882 }
4883
4884 nla_strlcpy(ifname, tmp, IFNAMSIZ);
4885 dev = dev_get_by_name(ctx->net, ifname);
4886 if (!dev) {
4887 err = -ENOENT;
4888 goto err1;
4889 }
4890
4891 dev_array[n++] = dev;
4892 if (n == NFT_FLOWTABLE_DEVICE_MAX) {
4893 err = -EFBIG;
4894 goto err1;
4895 }
4896 }
4897 if (!len)
4898 return -EINVAL;
4899
4900 err = 0;
4901err1:
4902 *len = n;
4903 return err;
4904}
4905
4906static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = {
4907 [NFTA_FLOWTABLE_HOOK_NUM] = { .type = NLA_U32 },
4908 [NFTA_FLOWTABLE_HOOK_PRIORITY] = { .type = NLA_U32 },
4909 [NFTA_FLOWTABLE_HOOK_DEVS] = { .type = NLA_NESTED },
4910};
4911
4912static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
4913 const struct nlattr *attr,
4914 struct nft_flowtable *flowtable)
4915{
4916 struct net_device *dev_array[NFT_FLOWTABLE_DEVICE_MAX];
4917 struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1];
4918 struct nf_hook_ops *ops;
4919 int hooknum, priority;
4920 int err, n = 0, i;
4921
4922 err = nla_parse_nested(tb, NFTA_FLOWTABLE_HOOK_MAX, attr,
4923 nft_flowtable_hook_policy, NULL);
4924 if (err < 0)
4925 return err;
4926
4927 if (!tb[NFTA_FLOWTABLE_HOOK_NUM] ||
4928 !tb[NFTA_FLOWTABLE_HOOK_PRIORITY] ||
4929 !tb[NFTA_FLOWTABLE_HOOK_DEVS])
4930 return -EINVAL;
4931
4932 hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM]));
4933 if (hooknum != NF_NETDEV_INGRESS)
4934 return -EINVAL;
4935
4936 priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
4937
4938 err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS],
4939 dev_array, &n);
4940 if (err < 0)
4941 goto err1;
4942
4943 ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL);
4944 if (!ops) {
4945 err = -ENOMEM;
4946 goto err1;
4947 }
4948
4949 flowtable->hooknum = hooknum;
4950 flowtable->priority = priority;
4951 flowtable->ops = ops;
4952 flowtable->ops_len = n;
4953
4954 for (i = 0; i < n; i++) {
4955 flowtable->ops[i].pf = NFPROTO_NETDEV;
4956 flowtable->ops[i].hooknum = hooknum;
4957 flowtable->ops[i].priority = priority;
4958 flowtable->ops[i].priv = &flowtable->data.rhashtable;
4959 flowtable->ops[i].hook = flowtable->data.type->hook;
4960 flowtable->ops[i].dev = dev_array[i];
4961 }
4962
4963 err = 0;
4964err1:
4965 for (i = 0; i < n; i++)
4966 dev_put(dev_array[i]);
4967
4968 return err;
4969}
4970
4971static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
4972{
4973 const struct nf_flowtable_type *type;
4974
4975 list_for_each_entry(type, &nf_tables_flowtables, list) {
4976 if (family == type->family)
4977 return type;
4978 }
4979 return NULL;
4980}
4981
4982static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family)
4983{
4984 const struct nf_flowtable_type *type;
4985
4986 type = __nft_flowtable_type_get(family);
4987 if (type != NULL && try_module_get(type->owner))
4988 return type;
4989
4990#ifdef CONFIG_MODULES
4991 if (type == NULL) {
4992 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
4993 request_module("nf-flowtable-%u", family);
4994 nfnl_lock(NFNL_SUBSYS_NFTABLES);
4995 if (__nft_flowtable_type_get(family))
4996 return ERR_PTR(-EAGAIN);
4997 }
4998#endif
4999 return ERR_PTR(-ENOENT);
5000}
5001
5002void nft_flow_table_iterate(struct net *net,
5003 void (*iter)(struct nf_flowtable *flowtable, void *data),
5004 void *data)
5005{
5006 struct nft_flowtable *flowtable;
5007 const struct nft_table *table;
5008
5009 nfnl_lock(NFNL_SUBSYS_NFTABLES);
5010 list_for_each_entry(table, &net->nft.tables, list) {
5011 list_for_each_entry(flowtable, &table->flowtables, list) {
5012 iter(&flowtable->data, data);
5013 }
5014 }
5015 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
5016}
5017EXPORT_SYMBOL_GPL(nft_flow_table_iterate);
5018
5019static void nft_unregister_flowtable_net_hooks(struct net *net,
5020 struct nft_flowtable *flowtable)
5021{
5022 int i;
5023
5024 for (i = 0; i < flowtable->ops_len; i++) {
5025 if (!flowtable->ops[i].dev)
5026 continue;
5027
5028 nf_unregister_net_hook(net, &flowtable->ops[i]);
5029 }
5030}
5031
5032static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
5033 struct sk_buff *skb,
5034 const struct nlmsghdr *nlh,
5035 const struct nlattr * const nla[],
5036 struct netlink_ext_ack *extack)
5037{
5038 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
5039 const struct nf_flowtable_type *type;
5040 u8 genmask = nft_genmask_next(net);
5041 int family = nfmsg->nfgen_family;
5042 struct nft_flowtable *flowtable;
5043 struct nft_table *table;
5044 struct nft_ctx ctx;
5045 int err, i, k;
5046
5047 if (!nla[NFTA_FLOWTABLE_TABLE] ||
5048 !nla[NFTA_FLOWTABLE_NAME] ||
5049 !nla[NFTA_FLOWTABLE_HOOK])
5050 return -EINVAL;
5051
5052 table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
5053 family, genmask);
5054 if (IS_ERR(table))
5055 return PTR_ERR(table);
5056
5057 flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
5058 genmask);
5059 if (IS_ERR(flowtable)) {
5060 err = PTR_ERR(flowtable);
5061 if (err != -ENOENT)
5062 return err;
5063 } else {
5064 if (nlh->nlmsg_flags & NLM_F_EXCL)
5065 return -EEXIST;
5066
5067 return 0;
5068 }
5069
5070 nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
5071
5072 flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
5073 if (!flowtable)
5074 return -ENOMEM;
5075
5076 flowtable->table = table;
5077 flowtable->handle = nf_tables_alloc_handle(table);
5078
5079 flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
5080 if (!flowtable->name) {
5081 err = -ENOMEM;
5082 goto err1;
5083 }
5084
5085 type = nft_flowtable_type_get(family);
5086 if (IS_ERR(type)) {
5087 err = PTR_ERR(type);
5088 goto err2;
5089 }
5090
5091 flowtable->data.type = type;
5092 err = rhashtable_init(&flowtable->data.rhashtable, type->params);
5093 if (err < 0)
5094 goto err3;
5095
5096 err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
5097 flowtable);
5098 if (err < 0)
5099 goto err3;
5100
5101 for (i = 0; i < flowtable->ops_len; i++) {
5102 err = nf_register_net_hook(net, &flowtable->ops[i]);
5103 if (err < 0)
5104 goto err4;
5105 }
5106
5107 err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
5108 if (err < 0)
5109 goto err5;
5110
5111 INIT_DEFERRABLE_WORK(&flowtable->data.gc_work, type->gc);
5112 queue_delayed_work(system_power_efficient_wq,
5113 &flowtable->data.gc_work, HZ);
5114
5115 list_add_tail_rcu(&flowtable->list, &table->flowtables);
5116 table->use++;
5117
5118 return 0;
5119err5:
5120 i = flowtable->ops_len;
5121err4:
5122 for (k = i - 1; k >= 0; k--)
5123 nf_unregister_net_hook(net, &flowtable->ops[i]);
5124
5125 kfree(flowtable->ops);
5126err3:
5127 module_put(type->owner);
5128err2:
5129 kfree(flowtable->name);
5130err1:
5131 kfree(flowtable);
5132 return err;
5133}
5134
5135static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
5136 struct sk_buff *skb,
5137 const struct nlmsghdr *nlh,
5138 const struct nlattr * const nla[],
5139 struct netlink_ext_ack *extack)
5140{
5141 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
5142 u8 genmask = nft_genmask_next(net);
5143 int family = nfmsg->nfgen_family;
5144 struct nft_flowtable *flowtable;
5145 struct nft_table *table;
5146 struct nft_ctx ctx;
5147
5148 table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
5149 family, genmask);
5150 if (IS_ERR(table))
5151 return PTR_ERR(table);
5152
5153 if (nla[NFTA_FLOWTABLE_HANDLE])
5154 flowtable = nf_tables_flowtable_lookup_byhandle(table,
5155 nla[NFTA_FLOWTABLE_HANDLE],
5156 genmask);
5157 else
5158 flowtable = nf_tables_flowtable_lookup(table,
5159 nla[NFTA_FLOWTABLE_NAME],
5160 genmask);
5161 if (IS_ERR(flowtable))
5162 return PTR_ERR(flowtable);
5163 if (flowtable->use > 0)
5164 return -EBUSY;
5165
5166 nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
5167
5168 return nft_delflowtable(&ctx, flowtable);
5169}
5170
5171static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
5172 u32 portid, u32 seq, int event,
5173 u32 flags, int family,
5174 struct nft_flowtable *flowtable)
5175{
5176 struct nlattr *nest, *nest_devs;
5177 struct nfgenmsg *nfmsg;
5178 struct nlmsghdr *nlh;
5179 int i;
5180
5181 event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
5182 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
5183 if (nlh == NULL)
5184 goto nla_put_failure;
5185
5186 nfmsg = nlmsg_data(nlh);
5187 nfmsg->nfgen_family = family;
5188 nfmsg->version = NFNETLINK_V0;
5189 nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
5190
5191 if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) ||
5192 nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
5193 nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
5194 nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle),
5195 NFTA_FLOWTABLE_PAD))
5196 goto nla_put_failure;
5197
5198 nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);
5199 if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) ||
5200 nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority)))
5201 goto nla_put_failure;
5202
5203 nest_devs = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK_DEVS);
5204 if (!nest_devs)
5205 goto nla_put_failure;
5206
5207 for (i = 0; i < flowtable->ops_len; i++) {
5208 if (flowtable->ops[i].dev &&
5209 nla_put_string(skb, NFTA_DEVICE_NAME,
5210 flowtable->ops[i].dev->name))
5211 goto nla_put_failure;
5212 }
5213 nla_nest_end(skb, nest_devs);
5214 nla_nest_end(skb, nest);
5215
5216 nlmsg_end(skb, nlh);
5217 return 0;
5218
5219nla_put_failure:
5220 nlmsg_trim(skb, nlh);
5221 return -1;
5222}
5223
5224struct nft_flowtable_filter {
5225 char *table;
5226};
5227
5228static int nf_tables_dump_flowtable(struct sk_buff *skb,
5229 struct netlink_callback *cb)
5230{
5231 const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
5232 struct nft_flowtable_filter *filter = cb->data;
5233 unsigned int idx = 0, s_idx = cb->args[0];
5234 struct net *net = sock_net(skb->sk);
5235 int family = nfmsg->nfgen_family;
5236 struct nft_flowtable *flowtable;
5237 const struct nft_table *table;
5238
5239 rcu_read_lock();
5240 cb->seq = net->nft.base_seq;
5241
5242 list_for_each_entry_rcu(table, &net->nft.tables, list) {
5243 if (family != NFPROTO_UNSPEC && family != table->family)
5244 continue;
5245
5246 list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
5247 if (!nft_is_active(net, flowtable))
5248 goto cont;
5249 if (idx < s_idx)
5250 goto cont;
5251 if (idx > s_idx)
5252 memset(&cb->args[1], 0,
5253 sizeof(cb->args) - sizeof(cb->args[0]));
5254 if (filter && filter->table[0] &&
5255 strcmp(filter->table, table->name))
5256 goto cont;
5257
5258 if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid,
5259 cb->nlh->nlmsg_seq,
5260 NFT_MSG_NEWFLOWTABLE,
5261 NLM_F_MULTI | NLM_F_APPEND,
5262 table->family, flowtable) < 0)
5263 goto done;
5264
5265 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
5266cont:
5267 idx++;
5268 }
5269 }
5270done:
5271 rcu_read_unlock();
5272
5273 cb->args[0] = idx;
5274 return skb->len;
5275}
5276
5277static int nf_tables_dump_flowtable_done(struct netlink_callback *cb)
5278{
5279 struct nft_flowtable_filter *filter = cb->data;
5280
5281 if (!filter)
5282 return 0;
5283
5284 kfree(filter->table);
5285 kfree(filter);
5286
5287 return 0;
5288}
5289
5290static struct nft_flowtable_filter *
5291nft_flowtable_filter_alloc(const struct nlattr * const nla[])
5292{
5293 struct nft_flowtable_filter *filter;
5294
5295 filter = kzalloc(sizeof(*filter), GFP_KERNEL);
5296 if (!filter)
5297 return ERR_PTR(-ENOMEM);
5298
5299 if (nla[NFTA_FLOWTABLE_TABLE]) {
5300 filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE],
5301 GFP_KERNEL);
5302 if (!filter->table) {
5303 kfree(filter);
5304 return ERR_PTR(-ENOMEM);
5305 }
5306 }
5307 return filter;
5308}
5309
5310static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
5311 struct sk_buff *skb,
5312 const struct nlmsghdr *nlh,
5313 const struct nlattr * const nla[],
5314 struct netlink_ext_ack *extack)
5315{
5316 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
5317 u8 genmask = nft_genmask_cur(net);
5318 int family = nfmsg->nfgen_family;
5319 struct nft_flowtable *flowtable;
5320 const struct nft_table *table;
5321 struct sk_buff *skb2;
5322 int err;
5323
5324 if (nlh->nlmsg_flags & NLM_F_DUMP) {
5325 struct netlink_dump_control c = {
5326 .dump = nf_tables_dump_flowtable,
5327 .done = nf_tables_dump_flowtable_done,
5328 };
5329
5330 if (nla[NFTA_FLOWTABLE_TABLE]) {
5331 struct nft_flowtable_filter *filter;
5332
5333 filter = nft_flowtable_filter_alloc(nla);
5334 if (IS_ERR(filter))
5335 return -ENOMEM;
5336
5337 c.data = filter;
5338 }
5339 return netlink_dump_start(nlsk, skb, nlh, &c);
5340 }
5341
5342 if (!nla[NFTA_FLOWTABLE_NAME])
5343 return -EINVAL;
5344
5345 table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
5346 family, genmask);
5347 if (IS_ERR(table))
5348 return PTR_ERR(table);
5349
5350 flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
5351 genmask);
5352 if (IS_ERR(flowtable))
5353 return PTR_ERR(flowtable);
5354
5355 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
5356 if (!skb2)
5357 return -ENOMEM;
5358
5359 err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid,
5360 nlh->nlmsg_seq,
5361 NFT_MSG_NEWFLOWTABLE, 0, family,
5362 flowtable);
5363 if (err < 0)
5364 goto err;
5365
5366 return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
5367err:
5368 kfree_skb(skb2);
5369 return err;
5370}
5371
5372static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
5373 struct nft_flowtable *flowtable,
5374 int event)
5375{
5376 struct sk_buff *skb;
5377 int err;
5378
5379 if (ctx->report &&
5380 !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
5381 return;
5382
5383 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
5384 if (skb == NULL)
5385 goto err;
5386
5387 err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid,
5388 ctx->seq, event, 0,
5389 ctx->family, flowtable);
5390 if (err < 0) {
5391 kfree_skb(skb);
5392 goto err;
5393 }
5394
5395 nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
5396 ctx->report, GFP_KERNEL);
5397 return;
5398err:
5399 nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
5400}
5401
5402static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
5403{
5404 cancel_delayed_work_sync(&flowtable->data.gc_work);
5405 kfree(flowtable->name);
5406 flowtable->data.type->free(&flowtable->data);
5407 rhashtable_destroy(&flowtable->data.rhashtable);
5408 module_put(flowtable->data.type->owner);
4851} 5409}
4852 5410
4853static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, 5411static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
@@ -4880,6 +5438,46 @@ nla_put_failure:
4880 return -EMSGSIZE; 5438 return -EMSGSIZE;
4881} 5439}
4882 5440
5441static void nft_flowtable_event(unsigned long event, struct net_device *dev,
5442 struct nft_flowtable *flowtable)
5443{
5444 int i;
5445
5446 for (i = 0; i < flowtable->ops_len; i++) {
5447 if (flowtable->ops[i].dev != dev)
5448 continue;
5449
5450 nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
5451 flowtable->ops[i].dev = NULL;
5452 break;
5453 }
5454}
5455
5456static int nf_tables_flowtable_event(struct notifier_block *this,
5457 unsigned long event, void *ptr)
5458{
5459 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
5460 struct nft_flowtable *flowtable;
5461 struct nft_table *table;
5462
5463 if (event != NETDEV_UNREGISTER)
5464 return 0;
5465
5466 nfnl_lock(NFNL_SUBSYS_NFTABLES);
5467 list_for_each_entry(table, &dev_net(dev)->nft.tables, list) {
5468 list_for_each_entry(flowtable, &table->flowtables, list) {
5469 nft_flowtable_event(event, dev, flowtable);
5470 }
5471 }
5472 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
5473
5474 return NOTIFY_DONE;
5475}
5476
5477static struct notifier_block nf_tables_flowtable_notifier = {
5478 .notifier_call = nf_tables_flowtable_event,
5479};
5480
4883static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb, 5481static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb,
4884 int event) 5482 int event)
4885{ 5483{
@@ -5032,6 +5630,21 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
5032 .attr_count = NFTA_OBJ_MAX, 5630 .attr_count = NFTA_OBJ_MAX,
5033 .policy = nft_obj_policy, 5631 .policy = nft_obj_policy,
5034 }, 5632 },
5633 [NFT_MSG_NEWFLOWTABLE] = {
5634 .call_batch = nf_tables_newflowtable,
5635 .attr_count = NFTA_FLOWTABLE_MAX,
5636 .policy = nft_flowtable_policy,
5637 },
5638 [NFT_MSG_GETFLOWTABLE] = {
5639 .call = nf_tables_getflowtable,
5640 .attr_count = NFTA_FLOWTABLE_MAX,
5641 .policy = nft_flowtable_policy,
5642 },
5643 [NFT_MSG_DELFLOWTABLE] = {
5644 .call_batch = nf_tables_delflowtable,
5645 .attr_count = NFTA_FLOWTABLE_MAX,
5646 .policy = nft_flowtable_policy,
5647 },
5035}; 5648};
5036 5649
5037static void nft_chain_commit_update(struct nft_trans *trans) 5650static void nft_chain_commit_update(struct nft_trans *trans)
@@ -5077,6 +5690,9 @@ static void nf_tables_commit_release(struct nft_trans *trans)
5077 case NFT_MSG_DELOBJ: 5690 case NFT_MSG_DELOBJ:
5078 nft_obj_destroy(nft_trans_obj(trans)); 5691 nft_obj_destroy(nft_trans_obj(trans));
5079 break; 5692 break;
5693 case NFT_MSG_DELFLOWTABLE:
5694 nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
5695 break;
5080 } 5696 }
5081 kfree(trans); 5697 kfree(trans);
5082} 5698}
@@ -5103,7 +5719,6 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
5103 if (nft_trans_table_update(trans)) { 5719 if (nft_trans_table_update(trans)) {
5104 if (!nft_trans_table_enable(trans)) { 5720 if (!nft_trans_table_enable(trans)) {
5105 nf_tables_table_disable(net, 5721 nf_tables_table_disable(net,
5106 trans->ctx.afi,
5107 trans->ctx.table); 5722 trans->ctx.table);
5108 trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; 5723 trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
5109 } 5724 }
@@ -5129,10 +5744,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
5129 case NFT_MSG_DELCHAIN: 5744 case NFT_MSG_DELCHAIN:
5130 list_del_rcu(&trans->ctx.chain->list); 5745 list_del_rcu(&trans->ctx.chain->list);
5131 nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN); 5746 nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
5132 nf_tables_unregister_hooks(trans->ctx.net, 5747 nf_tables_unregister_hook(trans->ctx.net,
5133 trans->ctx.table, 5748 trans->ctx.table,
5134 trans->ctx.chain, 5749 trans->ctx.chain);
5135 trans->ctx.afi->nops);
5136 break; 5750 break;
5137 case NFT_MSG_NEWRULE: 5751 case NFT_MSG_NEWRULE:
5138 nft_clear(trans->ctx.net, nft_trans_rule(trans)); 5752 nft_clear(trans->ctx.net, nft_trans_rule(trans));
@@ -5152,7 +5766,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
5152 /* This avoids hitting -EBUSY when deleting the table 5766 /* This avoids hitting -EBUSY when deleting the table
5153 * from the transaction. 5767 * from the transaction.
5154 */ 5768 */
5155 if (nft_trans_set(trans)->flags & NFT_SET_ANONYMOUS && 5769 if (nft_set_is_anonymous(nft_trans_set(trans)) &&
5156 !list_empty(&nft_trans_set(trans)->bindings)) 5770 !list_empty(&nft_trans_set(trans)->bindings))
5157 trans->ctx.table->use--; 5771 trans->ctx.table->use--;
5158 5772
@@ -5195,6 +5809,21 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
5195 nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), 5809 nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
5196 NFT_MSG_DELOBJ); 5810 NFT_MSG_DELOBJ);
5197 break; 5811 break;
5812 case NFT_MSG_NEWFLOWTABLE:
5813 nft_clear(net, nft_trans_flowtable(trans));
5814 nf_tables_flowtable_notify(&trans->ctx,
5815 nft_trans_flowtable(trans),
5816 NFT_MSG_NEWFLOWTABLE);
5817 nft_trans_destroy(trans);
5818 break;
5819 case NFT_MSG_DELFLOWTABLE:
5820 list_del_rcu(&nft_trans_flowtable(trans)->list);
5821 nf_tables_flowtable_notify(&trans->ctx,
5822 nft_trans_flowtable(trans),
5823 NFT_MSG_DELFLOWTABLE);
5824 nft_unregister_flowtable_net_hooks(net,
5825 nft_trans_flowtable(trans));
5826 break;
5198 } 5827 }
5199 } 5828 }
5200 5829
@@ -5232,6 +5861,9 @@ static void nf_tables_abort_release(struct nft_trans *trans)
5232 case NFT_MSG_NEWOBJ: 5861 case NFT_MSG_NEWOBJ:
5233 nft_obj_destroy(nft_trans_obj(trans)); 5862 nft_obj_destroy(nft_trans_obj(trans));
5234 break; 5863 break;
5864 case NFT_MSG_NEWFLOWTABLE:
5865 nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
5866 break;
5235 } 5867 }
5236 kfree(trans); 5868 kfree(trans);
5237} 5869}
@@ -5248,7 +5880,6 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
5248 if (nft_trans_table_update(trans)) { 5880 if (nft_trans_table_update(trans)) {
5249 if (nft_trans_table_enable(trans)) { 5881 if (nft_trans_table_enable(trans)) {
5250 nf_tables_table_disable(net, 5882 nf_tables_table_disable(net,
5251 trans->ctx.afi,
5252 trans->ctx.table); 5883 trans->ctx.table);
5253 trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; 5884 trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
5254 } 5885 }
@@ -5269,10 +5900,9 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
5269 } else { 5900 } else {
5270 trans->ctx.table->use--; 5901 trans->ctx.table->use--;
5271 list_del_rcu(&trans->ctx.chain->list); 5902 list_del_rcu(&trans->ctx.chain->list);
5272 nf_tables_unregister_hooks(trans->ctx.net, 5903 nf_tables_unregister_hook(trans->ctx.net,
5273 trans->ctx.table, 5904 trans->ctx.table,
5274 trans->ctx.chain, 5905 trans->ctx.chain);
5275 trans->ctx.afi->nops);
5276 } 5906 }
5277 break; 5907 break;
5278 case NFT_MSG_DELCHAIN: 5908 case NFT_MSG_DELCHAIN:
@@ -5322,6 +5952,17 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
5322 nft_clear(trans->ctx.net, nft_trans_obj(trans)); 5952 nft_clear(trans->ctx.net, nft_trans_obj(trans));
5323 nft_trans_destroy(trans); 5953 nft_trans_destroy(trans);
5324 break; 5954 break;
5955 case NFT_MSG_NEWFLOWTABLE:
5956 trans->ctx.table->use--;
5957 list_del_rcu(&nft_trans_flowtable(trans)->list);
5958 nft_unregister_flowtable_net_hooks(net,
5959 nft_trans_flowtable(trans));
5960 break;
5961 case NFT_MSG_DELFLOWTABLE:
5962 trans->ctx.table->use++;
5963 nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
5964 nft_trans_destroy(trans);
5965 break;
5325 } 5966 }
5326 } 5967 }
5327 5968
@@ -5373,7 +6014,7 @@ int nft_chain_validate_hooks(const struct nft_chain *chain,
5373 if (nft_is_base_chain(chain)) { 6014 if (nft_is_base_chain(chain)) {
5374 basechain = nft_base_chain(chain); 6015 basechain = nft_base_chain(chain);
5375 6016
5376 if ((1 << basechain->ops[0].hooknum) & hook_flags) 6017 if ((1 << basechain->ops.hooknum) & hook_flags)
5377 return 0; 6018 return 0;
5378 6019
5379 return -EOPNOTSUPP; 6020 return -EOPNOTSUPP;
@@ -5841,28 +6482,13 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
5841} 6482}
5842EXPORT_SYMBOL_GPL(nft_data_dump); 6483EXPORT_SYMBOL_GPL(nft_data_dump);
5843 6484
5844static int __net_init nf_tables_init_net(struct net *net)
5845{
5846 INIT_LIST_HEAD(&net->nft.af_info);
5847 INIT_LIST_HEAD(&net->nft.commit_list);
5848 net->nft.base_seq = 1;
5849 return 0;
5850}
5851
5852static void __net_exit nf_tables_exit_net(struct net *net)
5853{
5854 WARN_ON_ONCE(!list_empty(&net->nft.af_info));
5855 WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
5856}
5857
5858int __nft_release_basechain(struct nft_ctx *ctx) 6485int __nft_release_basechain(struct nft_ctx *ctx)
5859{ 6486{
5860 struct nft_rule *rule, *nr; 6487 struct nft_rule *rule, *nr;
5861 6488
5862 BUG_ON(!nft_is_base_chain(ctx->chain)); 6489 BUG_ON(!nft_is_base_chain(ctx->chain));
5863 6490
5864 nf_tables_unregister_hooks(ctx->net, ctx->chain->table, ctx->chain, 6491 nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
5865 ctx->afi->nops);
5866 list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { 6492 list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
5867 list_del(&rule->list); 6493 list_del(&rule->list);
5868 ctx->chain->use--; 6494 ctx->chain->use--;
@@ -5876,9 +6502,9 @@ int __nft_release_basechain(struct nft_ctx *ctx)
5876} 6502}
5877EXPORT_SYMBOL_GPL(__nft_release_basechain); 6503EXPORT_SYMBOL_GPL(__nft_release_basechain);
5878 6504
5879/* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */ 6505static void __nft_release_tables(struct net *net)
5880static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
5881{ 6506{
6507 struct nft_flowtable *flowtable, *nf;
5882 struct nft_table *table, *nt; 6508 struct nft_table *table, *nt;
5883 struct nft_chain *chain, *nc; 6509 struct nft_chain *chain, *nc;
5884 struct nft_object *obj, *ne; 6510 struct nft_object *obj, *ne;
@@ -5886,13 +6512,16 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
5886 struct nft_set *set, *ns; 6512 struct nft_set *set, *ns;
5887 struct nft_ctx ctx = { 6513 struct nft_ctx ctx = {
5888 .net = net, 6514 .net = net,
5889 .afi = afi,
5890 }; 6515 };
5891 6516
5892 list_for_each_entry_safe(table, nt, &afi->tables, list) { 6517 list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
6518 ctx.family = table->family;
6519
5893 list_for_each_entry(chain, &table->chains, list) 6520 list_for_each_entry(chain, &table->chains, list)
5894 nf_tables_unregister_hooks(net, table, chain, 6521 nf_tables_unregister_hook(net, table, chain);
5895 afi->nops); 6522 list_for_each_entry(flowtable, &table->flowtables, list)
6523 nf_unregister_net_hooks(net, flowtable->ops,
6524 flowtable->ops_len);
5896 /* No packets are walking on these chains anymore. */ 6525 /* No packets are walking on these chains anymore. */
5897 ctx.table = table; 6526 ctx.table = table;
5898 list_for_each_entry(chain, &table->chains, list) { 6527 list_for_each_entry(chain, &table->chains, list) {
@@ -5903,6 +6532,11 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
5903 nf_tables_rule_destroy(&ctx, rule); 6532 nf_tables_rule_destroy(&ctx, rule);
5904 } 6533 }
5905 } 6534 }
6535 list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
6536 list_del(&flowtable->list);
6537 table->use--;
6538 nf_tables_flowtable_destroy(flowtable);
6539 }
5906 list_for_each_entry_safe(set, ns, &table->sets, list) { 6540 list_for_each_entry_safe(set, ns, &table->sets, list) {
5907 list_del(&set->list); 6541 list_del(&set->list);
5908 table->use--; 6542 table->use--;
@@ -5923,6 +6557,21 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
5923 } 6557 }
5924} 6558}
5925 6559
6560static int __net_init nf_tables_init_net(struct net *net)
6561{
6562 INIT_LIST_HEAD(&net->nft.tables);
6563 INIT_LIST_HEAD(&net->nft.commit_list);
6564 net->nft.base_seq = 1;
6565 return 0;
6566}
6567
6568static void __net_exit nf_tables_exit_net(struct net *net)
6569{
6570 __nft_release_tables(net);
6571 WARN_ON_ONCE(!list_empty(&net->nft.tables));
6572 WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
6573}
6574
5926static struct pernet_operations nf_tables_net_ops = { 6575static struct pernet_operations nf_tables_net_ops = {
5927 .init = nf_tables_init_net, 6576 .init = nf_tables_init_net,
5928 .exit = nf_tables_exit_net, 6577 .exit = nf_tables_exit_net,
@@ -5947,7 +6596,8 @@ static int __init nf_tables_module_init(void)
5947 if (err < 0) 6596 if (err < 0)
5948 goto err3; 6597 goto err3;
5949 6598
5950 pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n"); 6599 register_netdevice_notifier(&nf_tables_flowtable_notifier);
6600
5951 return register_pernet_subsys(&nf_tables_net_ops); 6601 return register_pernet_subsys(&nf_tables_net_ops);
5952err3: 6602err3:
5953 nf_tables_core_module_exit(); 6603 nf_tables_core_module_exit();
@@ -5961,6 +6611,7 @@ static void __exit nf_tables_module_exit(void)
5961{ 6611{
5962 unregister_pernet_subsys(&nf_tables_net_ops); 6612 unregister_pernet_subsys(&nf_tables_net_ops);
5963 nfnetlink_subsys_unregister(&nf_tables_subsys); 6613 nfnetlink_subsys_unregister(&nf_tables_subsys);
6614 unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
5964 rcu_barrier(); 6615 rcu_barrier();
5965 nf_tables_core_module_exit(); 6616 nf_tables_core_module_exit();
5966 kfree(info); 6617 kfree(info);
diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c
index f713cc205669..e30c7da09d0d 100644
--- a/net/netfilter/nf_tables_inet.c
+++ b/net/netfilter/nf_tables_inet.c
@@ -9,6 +9,7 @@
9#include <linux/init.h> 9#include <linux/init.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/ip.h> 11#include <linux/ip.h>
12#include <linux/ipv6.h>
12#include <linux/netfilter_ipv4.h> 13#include <linux/netfilter_ipv4.h>
13#include <linux/netfilter_ipv6.h> 14#include <linux/netfilter_ipv6.h>
14#include <net/netfilter/nf_tables.h> 15#include <net/netfilter/nf_tables.h>
@@ -16,56 +17,27 @@
16#include <net/netfilter/nf_tables_ipv6.h> 17#include <net/netfilter/nf_tables_ipv6.h>
17#include <net/ip.h> 18#include <net/ip.h>
18 19
19static void nft_inet_hook_ops_init(struct nf_hook_ops *ops, unsigned int n) 20static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb,
21 const struct nf_hook_state *state)
20{ 22{
21 struct nft_af_info *afi; 23 struct nft_pktinfo pkt;
22 24
23 if (n == 1) 25 nft_set_pktinfo(&pkt, skb, state);
24 afi = &nft_af_ipv4; 26
25 else 27 switch (state->pf) {
26 afi = &nft_af_ipv6; 28 case NFPROTO_IPV4:
27 29 nft_set_pktinfo_ipv4(&pkt, skb);
28 ops->pf = afi->family; 30 break;
29 if (afi->hooks[ops->hooknum]) 31 case NFPROTO_IPV6:
30 ops->hook = afi->hooks[ops->hooknum]; 32 nft_set_pktinfo_ipv6(&pkt, skb);
31} 33 break;
32 34 default:
33static struct nft_af_info nft_af_inet __read_mostly = { 35 break;
34 .family = NFPROTO_INET, 36 }
35 .nhooks = NF_INET_NUMHOOKS, 37
36 .owner = THIS_MODULE, 38 return nft_do_chain(&pkt, priv);
37 .nops = 2,
38 .hook_ops_init = nft_inet_hook_ops_init,
39};
40
41static int __net_init nf_tables_inet_init_net(struct net *net)
42{
43 net->nft.inet = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
44 if (net->nft.inet == NULL)
45 return -ENOMEM;
46 memcpy(net->nft.inet, &nft_af_inet, sizeof(nft_af_inet));
47
48 if (nft_register_afinfo(net, net->nft.inet) < 0)
49 goto err;
50
51 return 0;
52
53err:
54 kfree(net->nft.inet);
55 return -ENOMEM;
56}
57
58static void __net_exit nf_tables_inet_exit_net(struct net *net)
59{
60 nft_unregister_afinfo(net, net->nft.inet);
61 kfree(net->nft.inet);
62} 39}
63 40
64static struct pernet_operations nf_tables_inet_net_ops = {
65 .init = nf_tables_inet_init_net,
66 .exit = nf_tables_inet_exit_net,
67};
68
69static const struct nf_chain_type filter_inet = { 41static const struct nf_chain_type filter_inet = {
70 .name = "filter", 42 .name = "filter",
71 .type = NFT_CHAIN_T_DEFAULT, 43 .type = NFT_CHAIN_T_DEFAULT,
@@ -76,26 +48,22 @@ static const struct nf_chain_type filter_inet = {
76 (1 << NF_INET_FORWARD) | 48 (1 << NF_INET_FORWARD) |
77 (1 << NF_INET_PRE_ROUTING) | 49 (1 << NF_INET_PRE_ROUTING) |
78 (1 << NF_INET_POST_ROUTING), 50 (1 << NF_INET_POST_ROUTING),
51 .hooks = {
52 [NF_INET_LOCAL_IN] = nft_do_chain_inet,
53 [NF_INET_LOCAL_OUT] = nft_do_chain_inet,
54 [NF_INET_FORWARD] = nft_do_chain_inet,
55 [NF_INET_PRE_ROUTING] = nft_do_chain_inet,
56 [NF_INET_POST_ROUTING] = nft_do_chain_inet,
57 },
79}; 58};
80 59
81static int __init nf_tables_inet_init(void) 60static int __init nf_tables_inet_init(void)
82{ 61{
83 int ret; 62 return nft_register_chain_type(&filter_inet);
84
85 ret = nft_register_chain_type(&filter_inet);
86 if (ret < 0)
87 return ret;
88
89 ret = register_pernet_subsys(&nf_tables_inet_net_ops);
90 if (ret < 0)
91 nft_unregister_chain_type(&filter_inet);
92
93 return ret;
94} 63}
95 64
96static void __exit nf_tables_inet_exit(void) 65static void __exit nf_tables_inet_exit(void)
97{ 66{
98 unregister_pernet_subsys(&nf_tables_inet_net_ops);
99 nft_unregister_chain_type(&filter_inet); 67 nft_unregister_chain_type(&filter_inet);
100} 68}
101 69
@@ -104,4 +72,4 @@ module_exit(nf_tables_inet_exit);
104 72
105MODULE_LICENSE("GPL"); 73MODULE_LICENSE("GPL");
106MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 74MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
107MODULE_ALIAS_NFT_FAMILY(1); 75MODULE_ALIAS_NFT_CHAIN(1, "filter");
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 403432988313..4041fafca934 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -21,66 +21,32 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb,
21{ 21{
22 struct nft_pktinfo pkt; 22 struct nft_pktinfo pkt;
23 23
24 nft_set_pktinfo(&pkt, skb, state);
25
24 switch (skb->protocol) { 26 switch (skb->protocol) {
25 case htons(ETH_P_IP): 27 case htons(ETH_P_IP):
26 nft_set_pktinfo_ipv4_validate(&pkt, skb, state); 28 nft_set_pktinfo_ipv4_validate(&pkt, skb);
27 break; 29 break;
28 case htons(ETH_P_IPV6): 30 case htons(ETH_P_IPV6):
29 nft_set_pktinfo_ipv6_validate(&pkt, skb, state); 31 nft_set_pktinfo_ipv6_validate(&pkt, skb);
30 break; 32 break;
31 default: 33 default:
32 nft_set_pktinfo_unspec(&pkt, skb, state); 34 nft_set_pktinfo_unspec(&pkt, skb);
33 break; 35 break;
34 } 36 }
35 37
36 return nft_do_chain(&pkt, priv); 38 return nft_do_chain(&pkt, priv);
37} 39}
38 40
39static struct nft_af_info nft_af_netdev __read_mostly = {
40 .family = NFPROTO_NETDEV,
41 .nhooks = NF_NETDEV_NUMHOOKS,
42 .owner = THIS_MODULE,
43 .flags = NFT_AF_NEEDS_DEV,
44 .nops = 1,
45 .hooks = {
46 [NF_NETDEV_INGRESS] = nft_do_chain_netdev,
47 },
48};
49
50static int nf_tables_netdev_init_net(struct net *net)
51{
52 net->nft.netdev = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
53 if (net->nft.netdev == NULL)
54 return -ENOMEM;
55
56 memcpy(net->nft.netdev, &nft_af_netdev, sizeof(nft_af_netdev));
57
58 if (nft_register_afinfo(net, net->nft.netdev) < 0)
59 goto err;
60
61 return 0;
62err:
63 kfree(net->nft.netdev);
64 return -ENOMEM;
65}
66
67static void nf_tables_netdev_exit_net(struct net *net)
68{
69 nft_unregister_afinfo(net, net->nft.netdev);
70 kfree(net->nft.netdev);
71}
72
73static struct pernet_operations nf_tables_netdev_net_ops = {
74 .init = nf_tables_netdev_init_net,
75 .exit = nf_tables_netdev_exit_net,
76};
77
78static const struct nf_chain_type nft_filter_chain_netdev = { 41static const struct nf_chain_type nft_filter_chain_netdev = {
79 .name = "filter", 42 .name = "filter",
80 .type = NFT_CHAIN_T_DEFAULT, 43 .type = NFT_CHAIN_T_DEFAULT,
81 .family = NFPROTO_NETDEV, 44 .family = NFPROTO_NETDEV,
82 .owner = THIS_MODULE, 45 .owner = THIS_MODULE,
83 .hook_mask = (1 << NF_NETDEV_INGRESS), 46 .hook_mask = (1 << NF_NETDEV_INGRESS),
47 .hooks = {
48 [NF_NETDEV_INGRESS] = nft_do_chain_netdev,
49 },
84}; 50};
85 51
86static void nft_netdev_event(unsigned long event, struct net_device *dev, 52static void nft_netdev_event(unsigned long event, struct net_device *dev,
@@ -96,7 +62,7 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
96 __nft_release_basechain(ctx); 62 __nft_release_basechain(ctx);
97 break; 63 break;
98 case NETDEV_CHANGENAME: 64 case NETDEV_CHANGENAME:
99 if (dev->ifindex != basechain->ops[0].dev->ifindex) 65 if (dev->ifindex != basechain->ops.dev->ifindex)
100 return; 66 return;
101 67
102 strncpy(basechain->dev_name, dev->name, IFNAMSIZ); 68 strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
@@ -108,7 +74,6 @@ static int nf_tables_netdev_event(struct notifier_block *this,
108 unsigned long event, void *ptr) 74 unsigned long event, void *ptr)
109{ 75{
110 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 76 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
111 struct nft_af_info *afi;
112 struct nft_table *table; 77 struct nft_table *table;
113 struct nft_chain *chain, *nr; 78 struct nft_chain *chain, *nr;
114 struct nft_ctx ctx = { 79 struct nft_ctx ctx = {
@@ -120,20 +85,18 @@ static int nf_tables_netdev_event(struct notifier_block *this,
120 return NOTIFY_DONE; 85 return NOTIFY_DONE;
121 86
122 nfnl_lock(NFNL_SUBSYS_NFTABLES); 87 nfnl_lock(NFNL_SUBSYS_NFTABLES);
123 list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) { 88 list_for_each_entry(table, &ctx.net->nft.tables, list) {
124 ctx.afi = afi; 89 if (table->family != NFPROTO_NETDEV)
125 if (afi->family != NFPROTO_NETDEV)
126 continue; 90 continue;
127 91
128 list_for_each_entry(table, &afi->tables, list) { 92 ctx.family = table->family;
129 ctx.table = table; 93 ctx.table = table;
130 list_for_each_entry_safe(chain, nr, &table->chains, list) { 94 list_for_each_entry_safe(chain, nr, &table->chains, list) {
131 if (!nft_is_base_chain(chain)) 95 if (!nft_is_base_chain(chain))
132 continue; 96 continue;
133 97
134 ctx.chain = chain; 98 ctx.chain = chain;
135 nft_netdev_event(event, dev, &ctx); 99 nft_netdev_event(event, dev, &ctx);
136 }
137 } 100 }
138 } 101 }
139 nfnl_unlock(NFNL_SUBSYS_NFTABLES); 102 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
@@ -153,27 +116,21 @@ static int __init nf_tables_netdev_init(void)
153 if (ret) 116 if (ret)
154 return ret; 117 return ret;
155 118
156 ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
157 if (ret)
158 goto err1;
159
160 ret = register_netdevice_notifier(&nf_tables_netdev_notifier); 119 ret = register_netdevice_notifier(&nf_tables_netdev_notifier);
161 if (ret) 120 if (ret)
162 goto err2; 121 goto err_register_netdevice_notifier;
163 122
164 return 0; 123 return 0;
165 124
166err2: 125err_register_netdevice_notifier:
167 unregister_pernet_subsys(&nf_tables_netdev_net_ops);
168err1:
169 nft_unregister_chain_type(&nft_filter_chain_netdev); 126 nft_unregister_chain_type(&nft_filter_chain_netdev);
127
170 return ret; 128 return ret;
171} 129}
172 130
173static void __exit nf_tables_netdev_exit(void) 131static void __exit nf_tables_netdev_exit(void)
174{ 132{
175 unregister_netdevice_notifier(&nf_tables_netdev_notifier); 133 unregister_netdevice_notifier(&nf_tables_netdev_notifier);
176 unregister_pernet_subsys(&nf_tables_netdev_net_ops);
177 nft_unregister_chain_type(&nft_filter_chain_netdev); 134 nft_unregister_chain_type(&nft_filter_chain_netdev);
178} 135}
179 136
@@ -182,4 +139,4 @@ module_exit(nf_tables_netdev_exit);
182 139
183MODULE_LICENSE("GPL"); 140MODULE_LICENSE("GPL");
184MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); 141MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
185MODULE_ALIAS_NFT_FAMILY(5); /* NFPROTO_NETDEV */ 142MODULE_ALIAS_NFT_CHAIN(5, "filter"); /* NFPROTO_NETDEV */
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 733d3e4a30d8..03ead8a9e90c 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -37,8 +37,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
37 rcu_dereference_protected(table[(id)].subsys, \ 37 rcu_dereference_protected(table[(id)].subsys, \
38 lockdep_nfnl_is_held((id))) 38 lockdep_nfnl_is_held((id)))
39 39
40static char __initdata nfversion[] = "0.30";
41
42static struct { 40static struct {
43 struct mutex mutex; 41 struct mutex mutex;
44 const struct nfnetlink_subsystem __rcu *subsys; 42 const struct nfnetlink_subsystem __rcu *subsys;
@@ -580,13 +578,11 @@ static int __init nfnetlink_init(void)
580 for (i=0; i<NFNL_SUBSYS_COUNT; i++) 578 for (i=0; i<NFNL_SUBSYS_COUNT; i++)
581 mutex_init(&table[i].mutex); 579 mutex_init(&table[i].mutex);
582 580
583 pr_info("Netfilter messages via NETLINK v%s.\n", nfversion);
584 return register_pernet_subsys(&nfnetlink_net_ops); 581 return register_pernet_subsys(&nfnetlink_net_ops);
585} 582}
586 583
587static void __exit nfnetlink_exit(void) 584static void __exit nfnetlink_exit(void)
588{ 585{
589 pr_info("Removing netfilter NETLINK layer.\n");
590 unregister_pernet_subsys(&nfnetlink_net_ops); 586 unregister_pernet_subsys(&nfnetlink_net_ops);
591} 587}
592module_init(nfnetlink_init); 588module_init(nfnetlink_init);
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index c45e6d4358ab..88d427f9f9e6 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -527,7 +527,6 @@ static int __init nfnl_acct_init(void)
527 goto err_out; 527 goto err_out;
528 } 528 }
529 529
530 pr_info("nfnl_acct: registering with nfnetlink.\n");
531 ret = nfnetlink_subsys_register(&nfnl_acct_subsys); 530 ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
532 if (ret < 0) { 531 if (ret < 0) {
533 pr_err("nfnl_acct_init: cannot register with nfnetlink.\n"); 532 pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
@@ -543,7 +542,6 @@ err_out:
543 542
544static void __exit nfnl_acct_exit(void) 543static void __exit nfnl_acct_exit(void)
545{ 544{
546 pr_info("nfnl_acct: unregistering from nfnetlink.\n");
547 nfnetlink_subsys_unregister(&nfnl_acct_subsys); 545 nfnetlink_subsys_unregister(&nfnl_acct_subsys);
548 unregister_pernet_subsys(&nfnl_acct_ops); 546 unregister_pernet_subsys(&nfnl_acct_ops);
549} 547}
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 32b1c0b44e79..95b04702a655 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -615,8 +615,6 @@ err_out:
615 615
616static void __exit cttimeout_exit(void) 616static void __exit cttimeout_exit(void)
617{ 617{
618 pr_info("cttimeout: unregistering from nfnetlink.\n");
619
620 nfnetlink_subsys_unregister(&cttimeout_subsys); 618 nfnetlink_subsys_unregister(&cttimeout_subsys);
621 619
622 unregister_pernet_subsys(&cttimeout_ops); 620 unregister_pernet_subsys(&cttimeout_ops);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index e955bec0acc6..7b46aa4c478d 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1054,7 +1054,6 @@ static int nful_open(struct inode *inode, struct file *file)
1054} 1054}
1055 1055
1056static const struct file_operations nful_file_ops = { 1056static const struct file_operations nful_file_ops = {
1057 .owner = THIS_MODULE,
1058 .open = nful_open, 1057 .open = nful_open,
1059 .read = seq_read, 1058 .read = seq_read,
1060 .llseek = seq_lseek, 1059 .llseek = seq_lseek,
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index c09b36755ed7..8bba23160a68 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -941,23 +941,18 @@ static struct notifier_block nfqnl_dev_notifier = {
941 .notifier_call = nfqnl_rcv_dev_event, 941 .notifier_call = nfqnl_rcv_dev_event,
942}; 942};
943 943
944static unsigned int nfqnl_nf_hook_drop(struct net *net) 944static void nfqnl_nf_hook_drop(struct net *net)
945{ 945{
946 struct nfnl_queue_net *q = nfnl_queue_pernet(net); 946 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
947 unsigned int instances = 0;
948 int i; 947 int i;
949 948
950 for (i = 0; i < INSTANCE_BUCKETS; i++) { 949 for (i = 0; i < INSTANCE_BUCKETS; i++) {
951 struct nfqnl_instance *inst; 950 struct nfqnl_instance *inst;
952 struct hlist_head *head = &q->instance_table[i]; 951 struct hlist_head *head = &q->instance_table[i];
953 952
954 hlist_for_each_entry_rcu(inst, head, hlist) { 953 hlist_for_each_entry_rcu(inst, head, hlist)
955 nfqnl_flush(inst, NULL, 0); 954 nfqnl_flush(inst, NULL, 0);
956 instances++;
957 }
958 } 955 }
959
960 return instances;
961} 956}
962 957
963static int 958static int
@@ -1482,7 +1477,6 @@ static int nfqnl_open(struct inode *inode, struct file *file)
1482} 1477}
1483 1478
1484static const struct file_operations nfqnl_file_ops = { 1479static const struct file_operations nfqnl_file_ops = {
1485 .owner = THIS_MODULE,
1486 .open = nfqnl_open, 1480 .open = nfqnl_open,
1487 .read = seq_read, 1481 .read = seq_read,
1488 .llseek = seq_lseek, 1482 .llseek = seq_lseek,
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index c2945eb3397c..fa90a8402845 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -44,6 +44,7 @@ static void nft_cmp_eval(const struct nft_expr *expr,
44 case NFT_CMP_LT: 44 case NFT_CMP_LT:
45 if (d == 0) 45 if (d == 0)
46 goto mismatch; 46 goto mismatch;
47 /* fall through */
47 case NFT_CMP_LTE: 48 case NFT_CMP_LTE:
48 if (d > 0) 49 if (d > 0)
49 goto mismatch; 50 goto mismatch;
@@ -51,6 +52,7 @@ static void nft_cmp_eval(const struct nft_expr *expr,
51 case NFT_CMP_GT: 52 case NFT_CMP_GT:
52 if (d == 0) 53 if (d == 0)
53 goto mismatch; 54 goto mismatch;
55 /* fall through */
54 case NFT_CMP_GTE: 56 case NFT_CMP_GTE:
55 if (d < 0) 57 if (d < 0)
56 goto mismatch; 58 goto mismatch;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index b89f4f65b2a0..8e23726b9081 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -144,7 +144,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
144{ 144{
145 par->net = ctx->net; 145 par->net = ctx->net;
146 par->table = ctx->table->name; 146 par->table = ctx->table->name;
147 switch (ctx->afi->family) { 147 switch (ctx->family) {
148 case AF_INET: 148 case AF_INET:
149 entry->e4.ip.proto = proto; 149 entry->e4.ip.proto = proto;
150 entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; 150 entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
@@ -169,13 +169,13 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
169 if (nft_is_base_chain(ctx->chain)) { 169 if (nft_is_base_chain(ctx->chain)) {
170 const struct nft_base_chain *basechain = 170 const struct nft_base_chain *basechain =
171 nft_base_chain(ctx->chain); 171 nft_base_chain(ctx->chain);
172 const struct nf_hook_ops *ops = &basechain->ops[0]; 172 const struct nf_hook_ops *ops = &basechain->ops;
173 173
174 par->hook_mask = 1 << ops->hooknum; 174 par->hook_mask = 1 << ops->hooknum;
175 } else { 175 } else {
176 par->hook_mask = 0; 176 par->hook_mask = 0;
177 } 177 }
178 par->family = ctx->afi->family; 178 par->family = ctx->family;
179 par->nft_compat = true; 179 par->nft_compat = true;
180} 180}
181 181
@@ -267,7 +267,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
267 par.net = ctx->net; 267 par.net = ctx->net;
268 par.target = target; 268 par.target = target;
269 par.targinfo = info; 269 par.targinfo = info;
270 par.family = ctx->afi->family; 270 par.family = ctx->family;
271 if (par.target->destroy != NULL) 271 if (par.target->destroy != NULL)
272 par.target->destroy(&par); 272 par.target->destroy(&par);
273 273
@@ -302,7 +302,7 @@ static int nft_target_validate(const struct nft_ctx *ctx,
302 if (nft_is_base_chain(ctx->chain)) { 302 if (nft_is_base_chain(ctx->chain)) {
303 const struct nft_base_chain *basechain = 303 const struct nft_base_chain *basechain =
304 nft_base_chain(ctx->chain); 304 nft_base_chain(ctx->chain);
305 const struct nf_hook_ops *ops = &basechain->ops[0]; 305 const struct nf_hook_ops *ops = &basechain->ops;
306 306
307 hook_mask = 1 << ops->hooknum; 307 hook_mask = 1 << ops->hooknum;
308 if (target->hooks && !(hook_mask & target->hooks)) 308 if (target->hooks && !(hook_mask & target->hooks))
@@ -358,7 +358,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
358{ 358{
359 par->net = ctx->net; 359 par->net = ctx->net;
360 par->table = ctx->table->name; 360 par->table = ctx->table->name;
361 switch (ctx->afi->family) { 361 switch (ctx->family) {
362 case AF_INET: 362 case AF_INET:
363 entry->e4.ip.proto = proto; 363 entry->e4.ip.proto = proto;
364 entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; 364 entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
@@ -383,13 +383,13 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
383 if (nft_is_base_chain(ctx->chain)) { 383 if (nft_is_base_chain(ctx->chain)) {
384 const struct nft_base_chain *basechain = 384 const struct nft_base_chain *basechain =
385 nft_base_chain(ctx->chain); 385 nft_base_chain(ctx->chain);
386 const struct nf_hook_ops *ops = &basechain->ops[0]; 386 const struct nf_hook_ops *ops = &basechain->ops;
387 387
388 par->hook_mask = 1 << ops->hooknum; 388 par->hook_mask = 1 << ops->hooknum;
389 } else { 389 } else {
390 par->hook_mask = 0; 390 par->hook_mask = 0;
391 } 391 }
392 par->family = ctx->afi->family; 392 par->family = ctx->family;
393 par->nft_compat = true; 393 par->nft_compat = true;
394} 394}
395 395
@@ -446,7 +446,7 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
446 par.net = ctx->net; 446 par.net = ctx->net;
447 par.match = match; 447 par.match = match;
448 par.matchinfo = info; 448 par.matchinfo = info;
449 par.family = ctx->afi->family; 449 par.family = ctx->family;
450 if (par.match->destroy != NULL) 450 if (par.match->destroy != NULL)
451 par.match->destroy(&par); 451 par.match->destroy(&par);
452 452
@@ -481,7 +481,7 @@ static int nft_match_validate(const struct nft_ctx *ctx,
481 if (nft_is_base_chain(ctx->chain)) { 481 if (nft_is_base_chain(ctx->chain)) {
482 const struct nft_base_chain *basechain = 482 const struct nft_base_chain *basechain =
483 nft_base_chain(ctx->chain); 483 nft_base_chain(ctx->chain);
484 const struct nf_hook_ops *ops = &basechain->ops[0]; 484 const struct nf_hook_ops *ops = &basechain->ops;
485 485
486 hook_mask = 1 << ops->hooknum; 486 hook_mask = 1 << ops->hooknum;
487 if (match->hooks && !(hook_mask & match->hooks)) 487 if (match->hooks && !(hook_mask & match->hooks))
@@ -648,7 +648,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
648 648
649 mt_name = nla_data(tb[NFTA_MATCH_NAME]); 649 mt_name = nla_data(tb[NFTA_MATCH_NAME]);
650 rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV])); 650 rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV]));
651 family = ctx->afi->family; 651 family = ctx->family;
652 652
653 /* Re-use the existing match if it's already loaded. */ 653 /* Re-use the existing match if it's already loaded. */
654 list_for_each_entry(nft_match, &nft_match_list, head) { 654 list_for_each_entry(nft_match, &nft_match_list, head) {
@@ -733,7 +733,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
733 733
734 tg_name = nla_data(tb[NFTA_TARGET_NAME]); 734 tg_name = nla_data(tb[NFTA_TARGET_NAME]);
735 rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV])); 735 rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV]));
736 family = ctx->afi->family; 736 family = ctx->family;
737 737
738 /* Re-use the existing target if it's already loaded. */ 738 /* Re-use the existing target if it's already loaded. */
739 list_for_each_entry(nft_target, &nft_target_list, head) { 739 list_for_each_entry(nft_target, &nft_target_list, head) {
@@ -812,8 +812,6 @@ static int __init nft_compat_module_init(void)
812 goto err_target; 812 goto err_target;
813 } 813 }
814 814
815 pr_info("nf_tables_compat: (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>\n");
816
817 return ret; 815 return ret;
818 816
819err_target: 817err_target:
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 2647b895f4b0..6ab274b14484 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -405,7 +405,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
405 if (tb[NFTA_CT_DIRECTION] == NULL) 405 if (tb[NFTA_CT_DIRECTION] == NULL)
406 return -EINVAL; 406 return -EINVAL;
407 407
408 switch (ctx->afi->family) { 408 switch (ctx->family) {
409 case NFPROTO_IPV4: 409 case NFPROTO_IPV4:
410 len = FIELD_SIZEOF(struct nf_conntrack_tuple, 410 len = FIELD_SIZEOF(struct nf_conntrack_tuple,
411 src.u3.ip); 411 src.u3.ip);
@@ -456,7 +456,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
456 if (err < 0) 456 if (err < 0)
457 return err; 457 return err;
458 458
459 err = nf_ct_netns_get(ctx->net, ctx->afi->family); 459 err = nf_ct_netns_get(ctx->net, ctx->family);
460 if (err < 0) 460 if (err < 0)
461 return err; 461 return err;
462 462
@@ -550,7 +550,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
550 if (err < 0) 550 if (err < 0)
551 goto err1; 551 goto err1;
552 552
553 err = nf_ct_netns_get(ctx->net, ctx->afi->family); 553 err = nf_ct_netns_get(ctx->net, ctx->family);
554 if (err < 0) 554 if (err < 0)
555 goto err1; 555 goto err1;
556 556
@@ -564,7 +564,7 @@ err1:
564static void nft_ct_get_destroy(const struct nft_ctx *ctx, 564static void nft_ct_get_destroy(const struct nft_ctx *ctx,
565 const struct nft_expr *expr) 565 const struct nft_expr *expr)
566{ 566{
567 nf_ct_netns_put(ctx->net, ctx->afi->family); 567 nf_ct_netns_put(ctx->net, ctx->family);
568} 568}
569 569
570static void nft_ct_set_destroy(const struct nft_ctx *ctx, 570static void nft_ct_set_destroy(const struct nft_ctx *ctx,
@@ -573,7 +573,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx,
573 struct nft_ct *priv = nft_expr_priv(expr); 573 struct nft_ct *priv = nft_expr_priv(expr);
574 574
575 __nft_ct_set_destroy(ctx, priv); 575 __nft_ct_set_destroy(ctx, priv);
576 nf_ct_netns_put(ctx->net, ctx->afi->family); 576 nf_ct_netns_put(ctx->net, ctx->family);
577} 577}
578 578
579static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) 579static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -734,7 +734,7 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
734 struct nft_ct_helper_obj *priv = nft_obj_data(obj); 734 struct nft_ct_helper_obj *priv = nft_obj_data(obj);
735 struct nf_conntrack_helper *help4, *help6; 735 struct nf_conntrack_helper *help4, *help6;
736 char name[NF_CT_HELPER_NAME_LEN]; 736 char name[NF_CT_HELPER_NAME_LEN];
737 int family = ctx->afi->family; 737 int family = ctx->family;
738 738
739 if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO]) 739 if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO])
740 return -EINVAL; 740 return -EINVAL;
@@ -753,14 +753,14 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
753 753
754 switch (family) { 754 switch (family) {
755 case NFPROTO_IPV4: 755 case NFPROTO_IPV4:
756 if (ctx->afi->family == NFPROTO_IPV6) 756 if (ctx->family == NFPROTO_IPV6)
757 return -EINVAL; 757 return -EINVAL;
758 758
759 help4 = nf_conntrack_helper_try_module_get(name, family, 759 help4 = nf_conntrack_helper_try_module_get(name, family,
760 priv->l4proto); 760 priv->l4proto);
761 break; 761 break;
762 case NFPROTO_IPV6: 762 case NFPROTO_IPV6:
763 if (ctx->afi->family == NFPROTO_IPV4) 763 if (ctx->family == NFPROTO_IPV4)
764 return -EINVAL; 764 return -EINVAL;
765 765
766 help6 = nf_conntrack_helper_try_module_get(name, family, 766 help6 = nf_conntrack_helper_try_module_get(name, family,
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 66221ad891a9..fc83e29d6634 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -164,7 +164,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
164 } 164 }
165 165
166 priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); 166 priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]);
167 err = nft_validate_register_load(priv->sreg_key, set->klen);; 167 err = nft_validate_register_load(priv->sreg_key, set->klen);
168 if (err < 0) 168 if (err < 0)
169 return err; 169 return err;
170 170
@@ -184,7 +184,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
184 if (tb[NFTA_DYNSET_EXPR] != NULL) { 184 if (tb[NFTA_DYNSET_EXPR] != NULL) {
185 if (!(set->flags & NFT_SET_EVAL)) 185 if (!(set->flags & NFT_SET_EVAL))
186 return -EINVAL; 186 return -EINVAL;
187 if (!(set->flags & NFT_SET_ANONYMOUS)) 187 if (!nft_set_is_anonymous(set))
188 return -EOPNOTSUPP; 188 return -EOPNOTSUPP;
189 189
190 priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]); 190 priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
new file mode 100644
index 000000000000..b65829b2be22
--- /dev/null
+++ b/net/netfilter/nft_flow_offload.c
@@ -0,0 +1,242 @@
1#include <linux/kernel.h>
2#include <linux/module.h>
3#include <linux/init.h>
4#include <linux/netlink.h>
5#include <linux/netfilter.h>
6#include <linux/workqueue.h>
7#include <linux/spinlock.h>
8#include <linux/netfilter/nf_tables.h>
9#include <net/ip.h> /* for ipv4 options. */
10#include <net/netfilter/nf_tables.h>
11#include <net/netfilter/nf_tables_core.h>
12#include <net/netfilter/nf_conntrack_core.h>
13#include <linux/netfilter/nf_conntrack_common.h>
14#include <net/netfilter/nf_flow_table.h>
15
16struct nft_flow_offload {
17 struct nft_flowtable *flowtable;
18};
19
20static int nft_flow_route(const struct nft_pktinfo *pkt,
21 const struct nf_conn *ct,
22 struct nf_flow_route *route,
23 enum ip_conntrack_dir dir)
24{
25 struct dst_entry *this_dst = skb_dst(pkt->skb);
26 struct dst_entry *other_dst = NULL;
27 struct flowi fl;
28
29 memset(&fl, 0, sizeof(fl));
30 switch (nft_pf(pkt)) {
31 case NFPROTO_IPV4:
32 fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
33 break;
34 case NFPROTO_IPV6:
35 fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
36 break;
37 }
38
39 nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
40 if (!other_dst)
41 return -ENOENT;
42
43 route->tuple[dir].dst = this_dst;
44 route->tuple[dir].ifindex = nft_in(pkt)->ifindex;
45 route->tuple[!dir].dst = other_dst;
46 route->tuple[!dir].ifindex = nft_out(pkt)->ifindex;
47
48 return 0;
49}
50
51static bool nft_flow_offload_skip(struct sk_buff *skb)
52{
53 struct ip_options *opt = &(IPCB(skb)->opt);
54
55 if (unlikely(opt->optlen))
56 return true;
57 if (skb_sec_path(skb))
58 return true;
59
60 return false;
61}
62
63static void nft_flow_offload_eval(const struct nft_expr *expr,
64 struct nft_regs *regs,
65 const struct nft_pktinfo *pkt)
66{
67 struct nft_flow_offload *priv = nft_expr_priv(expr);
68 struct nf_flowtable *flowtable = &priv->flowtable->data;
69 enum ip_conntrack_info ctinfo;
70 struct nf_flow_route route;
71 struct flow_offload *flow;
72 enum ip_conntrack_dir dir;
73 struct nf_conn *ct;
74 int ret;
75
76 if (nft_flow_offload_skip(pkt->skb))
77 goto out;
78
79 ct = nf_ct_get(pkt->skb, &ctinfo);
80 if (!ct)
81 goto out;
82
83 switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
84 case IPPROTO_TCP:
85 case IPPROTO_UDP:
86 break;
87 default:
88 goto out;
89 }
90
91 if (test_bit(IPS_HELPER_BIT, &ct->status))
92 goto out;
93
94 if (ctinfo == IP_CT_NEW ||
95 ctinfo == IP_CT_RELATED)
96 goto out;
97
98 if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
99 goto out;
100
101 dir = CTINFO2DIR(ctinfo);
102 if (nft_flow_route(pkt, ct, &route, dir) < 0)
103 goto err_flow_route;
104
105 flow = flow_offload_alloc(ct, &route);
106 if (!flow)
107 goto err_flow_alloc;
108
109 ret = flow_offload_add(flowtable, flow);
110 if (ret < 0)
111 goto err_flow_add;
112
113 return;
114
115err_flow_add:
116 flow_offload_free(flow);
117err_flow_alloc:
118 dst_release(route.tuple[!dir].dst);
119err_flow_route:
120 clear_bit(IPS_OFFLOAD_BIT, &ct->status);
121out:
122 regs->verdict.code = NFT_BREAK;
123}
124
125static int nft_flow_offload_validate(const struct nft_ctx *ctx,
126 const struct nft_expr *expr,
127 const struct nft_data **data)
128{
129 unsigned int hook_mask = (1 << NF_INET_FORWARD);
130
131 return nft_chain_validate_hooks(ctx->chain, hook_mask);
132}
133
134static int nft_flow_offload_init(const struct nft_ctx *ctx,
135 const struct nft_expr *expr,
136 const struct nlattr * const tb[])
137{
138 struct nft_flow_offload *priv = nft_expr_priv(expr);
139 u8 genmask = nft_genmask_next(ctx->net);
140 struct nft_flowtable *flowtable;
141
142 if (!tb[NFTA_FLOW_TABLE_NAME])
143 return -EINVAL;
144
145 flowtable = nf_tables_flowtable_lookup(ctx->table,
146 tb[NFTA_FLOW_TABLE_NAME],
147 genmask);
148 if (IS_ERR(flowtable))
149 return PTR_ERR(flowtable);
150
151 priv->flowtable = flowtable;
152 flowtable->use++;
153
154 return nf_ct_netns_get(ctx->net, ctx->family);
155}
156
157static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
158 const struct nft_expr *expr)
159{
160 struct nft_flow_offload *priv = nft_expr_priv(expr);
161
162 priv->flowtable->use--;
163 nf_ct_netns_put(ctx->net, ctx->family);
164}
165
166static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr)
167{
168 struct nft_flow_offload *priv = nft_expr_priv(expr);
169
170 if (nla_put_string(skb, NFTA_FLOW_TABLE_NAME, priv->flowtable->name))
171 goto nla_put_failure;
172
173 return 0;
174
175nla_put_failure:
176 return -1;
177}
178
179static struct nft_expr_type nft_flow_offload_type;
180static const struct nft_expr_ops nft_flow_offload_ops = {
181 .type = &nft_flow_offload_type,
182 .size = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)),
183 .eval = nft_flow_offload_eval,
184 .init = nft_flow_offload_init,
185 .destroy = nft_flow_offload_destroy,
186 .validate = nft_flow_offload_validate,
187 .dump = nft_flow_offload_dump,
188};
189
190static struct nft_expr_type nft_flow_offload_type __read_mostly = {
191 .name = "flow_offload",
192 .ops = &nft_flow_offload_ops,
193 .maxattr = NFTA_FLOW_MAX,
194 .owner = THIS_MODULE,
195};
196
197static int flow_offload_netdev_event(struct notifier_block *this,
198 unsigned long event, void *ptr)
199{
200 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
201
202 if (event != NETDEV_DOWN)
203 return NOTIFY_DONE;
204
205 nf_flow_table_cleanup(dev_net(dev), dev);
206
207 return NOTIFY_DONE;
208}
209
210static struct notifier_block flow_offload_netdev_notifier = {
211 .notifier_call = flow_offload_netdev_event,
212};
213
214static int __init nft_flow_offload_module_init(void)
215{
216 int err;
217
218 register_netdevice_notifier(&flow_offload_netdev_notifier);
219
220 err = nft_register_expr(&nft_flow_offload_type);
221 if (err < 0)
222 goto register_expr;
223
224 return 0;
225
226register_expr:
227 unregister_netdevice_notifier(&flow_offload_netdev_notifier);
228 return err;
229}
230
231static void __exit nft_flow_offload_module_exit(void)
232{
233 nft_unregister_expr(&nft_flow_offload_type);
234 unregister_netdevice_notifier(&flow_offload_netdev_notifier);
235}
236
237module_init(nft_flow_offload_module_init);
238module_exit(nft_flow_offload_module_exit);
239
240MODULE_LICENSE("GPL");
241MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
242MODULE_ALIAS_NFT_EXPR("flow_offload");
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 6f6e64423643..a27be36dc0af 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -112,7 +112,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
112 break; 112 break;
113 } 113 }
114 114
115 err = nf_logger_find_get(ctx->afi->family, li->type); 115 err = nf_logger_find_get(ctx->family, li->type);
116 if (err < 0) 116 if (err < 0)
117 goto err1; 117 goto err1;
118 118
@@ -133,7 +133,7 @@ static void nft_log_destroy(const struct nft_ctx *ctx,
133 if (priv->prefix != nft_log_null_prefix) 133 if (priv->prefix != nft_log_null_prefix)
134 kfree(priv->prefix); 134 kfree(priv->prefix);
135 135
136 nf_logger_put(ctx->afi->family, li->type); 136 nf_logger_put(ctx->family, li->type);
137} 137}
138 138
139static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) 139static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 6ac03d4266c9..9d8655bc1bea 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -73,7 +73,7 @@ int nft_masq_init(const struct nft_ctx *ctx,
73 } 73 }
74 } 74 }
75 75
76 return nf_ct_netns_get(ctx->net, ctx->afi->family); 76 return nf_ct_netns_get(ctx->net, ctx->family);
77} 77}
78EXPORT_SYMBOL_GPL(nft_masq_init); 78EXPORT_SYMBOL_GPL(nft_masq_init);
79 79
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 5a60eb23a7ed..8fb91940e2e7 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -210,6 +210,11 @@ void nft_meta_get_eval(const struct nft_expr *expr,
210 *dest = prandom_u32_state(state); 210 *dest = prandom_u32_state(state);
211 break; 211 break;
212 } 212 }
213#ifdef CONFIG_XFRM
214 case NFT_META_SECPATH:
215 nft_reg_store8(dest, !!skb->sp);
216 break;
217#endif
213 default: 218 default:
214 WARN_ON(1); 219 WARN_ON(1);
215 goto err; 220 goto err;
@@ -308,6 +313,11 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
308 prandom_init_once(&nft_prandom_state); 313 prandom_init_once(&nft_prandom_state);
309 len = sizeof(u32); 314 len = sizeof(u32);
310 break; 315 break;
316#ifdef CONFIG_XFRM
317 case NFT_META_SECPATH:
318 len = sizeof(u8);
319 break;
320#endif
311 default: 321 default:
312 return -EOPNOTSUPP; 322 return -EOPNOTSUPP;
313 } 323 }
@@ -318,6 +328,38 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
318} 328}
319EXPORT_SYMBOL_GPL(nft_meta_get_init); 329EXPORT_SYMBOL_GPL(nft_meta_get_init);
320 330
331static int nft_meta_get_validate(const struct nft_ctx *ctx,
332 const struct nft_expr *expr,
333 const struct nft_data **data)
334{
335#ifdef CONFIG_XFRM
336 const struct nft_meta *priv = nft_expr_priv(expr);
337 unsigned int hooks;
338
339 if (priv->key != NFT_META_SECPATH)
340 return 0;
341
342 switch (ctx->family) {
343 case NFPROTO_NETDEV:
344 hooks = 1 << NF_NETDEV_INGRESS;
345 break;
346 case NFPROTO_IPV4:
347 case NFPROTO_IPV6:
348 case NFPROTO_INET:
349 hooks = (1 << NF_INET_PRE_ROUTING) |
350 (1 << NF_INET_LOCAL_IN) |
351 (1 << NF_INET_FORWARD);
352 break;
353 default:
354 return -EOPNOTSUPP;
355 }
356
357 return nft_chain_validate_hooks(ctx->chain, hooks);
358#else
359 return 0;
360#endif
361}
362
321int nft_meta_set_validate(const struct nft_ctx *ctx, 363int nft_meta_set_validate(const struct nft_ctx *ctx,
322 const struct nft_expr *expr, 364 const struct nft_expr *expr,
323 const struct nft_data **data) 365 const struct nft_data **data)
@@ -328,7 +370,7 @@ int nft_meta_set_validate(const struct nft_ctx *ctx,
328 if (priv->key != NFT_META_PKTTYPE) 370 if (priv->key != NFT_META_PKTTYPE)
329 return 0; 371 return 0;
330 372
331 switch (ctx->afi->family) { 373 switch (ctx->family) {
332 case NFPROTO_BRIDGE: 374 case NFPROTO_BRIDGE:
333 hooks = 1 << NF_BR_PRE_ROUTING; 375 hooks = 1 << NF_BR_PRE_ROUTING;
334 break; 376 break;
@@ -434,6 +476,7 @@ static const struct nft_expr_ops nft_meta_get_ops = {
434 .eval = nft_meta_get_eval, 476 .eval = nft_meta_get_eval,
435 .init = nft_meta_get_init, 477 .init = nft_meta_get_init,
436 .dump = nft_meta_get_dump, 478 .dump = nft_meta_get_dump,
479 .validate = nft_meta_get_validate,
437}; 480};
438 481
439static const struct nft_expr_ops nft_meta_set_ops = { 482static const struct nft_expr_ops nft_meta_set_ops = {
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index ed548d06b6dd..1f36954c2ba9 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -142,7 +142,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
142 return -EINVAL; 142 return -EINVAL;
143 143
144 family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY])); 144 family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
145 if (family != ctx->afi->family) 145 if (family != ctx->family)
146 return -EOPNOTSUPP; 146 return -EOPNOTSUPP;
147 147
148 switch (family) { 148 switch (family) {
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 1e66538bf0ff..c64cbe78dee7 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -75,7 +75,7 @@ int nft_redir_init(const struct nft_ctx *ctx,
75 return -EINVAL; 75 return -EINVAL;
76 } 76 }
77 77
78 return nf_ct_netns_get(ctx->net, ctx->afi->family); 78 return nf_ct_netns_get(ctx->net, ctx->family);
79} 79}
80EXPORT_SYMBOL_GPL(nft_redir_init); 80EXPORT_SYMBOL_GPL(nft_redir_init);
81 81
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index a6b7d05aeacf..11a2071b6dd4 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -27,7 +27,7 @@ static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skb
27{ 27{
28 u32 minlen = sizeof(struct ipv6hdr), mtu = dst_mtu(skbdst); 28 u32 minlen = sizeof(struct ipv6hdr), mtu = dst_mtu(skbdst);
29 const struct sk_buff *skb = pkt->skb; 29 const struct sk_buff *skb = pkt->skb;
30 const struct nf_afinfo *ai; 30 struct dst_entry *dst = NULL;
31 struct flowi fl; 31 struct flowi fl;
32 32
33 memset(&fl, 0, sizeof(fl)); 33 memset(&fl, 0, sizeof(fl));
@@ -43,15 +43,10 @@ static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skb
43 break; 43 break;
44 } 44 }
45 45
46 ai = nf_get_afinfo(nft_pf(pkt)); 46 nf_route(nft_net(pkt), &dst, &fl, false, nft_pf(pkt));
47 if (ai) { 47 if (dst) {
48 struct dst_entry *dst = NULL; 48 mtu = min(mtu, dst_mtu(dst));
49 49 dst_release(dst);
50 ai->route(nft_net(pkt), &dst, &fl, false);
51 if (dst) {
52 mtu = min(mtu, dst_mtu(dst));
53 dst_release(dst);
54 }
55 } 50 }
56 51
57 if (mtu <= minlen || mtu > 0xffff) 52 if (mtu <= minlen || mtu > 0xffff)
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index f8166c1d5430..3f1624ee056f 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -251,11 +251,7 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
251 if (err) 251 if (err)
252 return; 252 return;
253 253
254 err = rhashtable_walk_start(&hti); 254 rhashtable_walk_start(&hti);
255 if (err && err != -EAGAIN) {
256 iter->err = err;
257 goto out;
258 }
259 255
260 while ((he = rhashtable_walk_next(&hti))) { 256 while ((he = rhashtable_walk_next(&hti))) {
261 if (IS_ERR(he)) { 257 if (IS_ERR(he)) {
@@ -306,9 +302,7 @@ static void nft_rhash_gc(struct work_struct *work)
306 if (err) 302 if (err)
307 goto schedule; 303 goto schedule;
308 304
309 err = rhashtable_walk_start(&hti); 305 rhashtable_walk_start(&hti);
310 if (err && err != -EAGAIN)
311 goto out;
312 306
313 while ((he = rhashtable_walk_next(&hti))) { 307 while ((he = rhashtable_walk_next(&hti))) {
314 if (IS_ERR(he)) { 308 if (IS_ERR(he)) {
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
new file mode 100644
index 000000000000..0b660c568156
--- /dev/null
+++ b/net/netfilter/utils.c
@@ -0,0 +1,90 @@
1#include <linux/kernel.h>
2#include <linux/netfilter.h>
3#include <linux/netfilter_ipv4.h>
4#include <linux/netfilter_ipv6.h>
5#include <net/netfilter/nf_queue.h>
6
7__sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
8 unsigned int dataoff, u_int8_t protocol,
9 unsigned short family)
10{
11 const struct nf_ipv6_ops *v6ops;
12 __sum16 csum = 0;
13
14 switch (family) {
15 case AF_INET:
16 csum = nf_ip_checksum(skb, hook, dataoff, protocol);
17 break;
18 case AF_INET6:
19 v6ops = rcu_dereference(nf_ipv6_ops);
20 if (v6ops)
21 csum = v6ops->checksum(skb, hook, dataoff, protocol);
22 break;
23 }
24
25 return csum;
26}
27EXPORT_SYMBOL_GPL(nf_checksum);
28
29__sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
30 unsigned int dataoff, unsigned int len,
31 u_int8_t protocol, unsigned short family)
32{
33 const struct nf_ipv6_ops *v6ops;
34 __sum16 csum = 0;
35
36 switch (family) {
37 case AF_INET:
38 csum = nf_ip_checksum_partial(skb, hook, dataoff, len,
39 protocol);
40 break;
41 case AF_INET6:
42 v6ops = rcu_dereference(nf_ipv6_ops);
43 if (v6ops)
44 csum = v6ops->checksum_partial(skb, hook, dataoff, len,
45 protocol);
46 break;
47 }
48
49 return csum;
50}
51EXPORT_SYMBOL_GPL(nf_checksum_partial);
52
53int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
54 bool strict, unsigned short family)
55{
56 const struct nf_ipv6_ops *v6ops;
57 int ret = 0;
58
59 switch (family) {
60 case AF_INET:
61 ret = nf_ip_route(net, dst, fl, strict);
62 break;
63 case AF_INET6:
64 v6ops = rcu_dereference(nf_ipv6_ops);
65 if (v6ops)
66 ret = v6ops->route(net, dst, fl, strict);
67 break;
68 }
69
70 return ret;
71}
72EXPORT_SYMBOL_GPL(nf_route);
73
74int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry)
75{
76 const struct nf_ipv6_ops *v6ops;
77 int ret = 0;
78
79 switch (entry->state.pf) {
80 case AF_INET:
81 ret = nf_ip_reroute(skb, entry);
82 break;
83 case AF_INET6:
84 v6ops = rcu_dereference(nf_ipv6_ops);
85 if (v6ops)
86 ret = v6ops->reroute(skb, entry);
87 break;
88 }
89 return ret;
90}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 55802e97f906..2f685ee1f9c8 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -39,7 +39,6 @@ MODULE_LICENSE("GPL");
39MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 39MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
40MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); 40MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
41 41
42#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
43#define XT_PCPU_BLOCK_SIZE 4096 42#define XT_PCPU_BLOCK_SIZE 4096
44 43
45struct compat_delta { 44struct compat_delta {
@@ -210,6 +209,9 @@ xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision)
210{ 209{
211 struct xt_match *match; 210 struct xt_match *match;
212 211
212 if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
213 return ERR_PTR(-EINVAL);
214
213 match = xt_find_match(nfproto, name, revision); 215 match = xt_find_match(nfproto, name, revision);
214 if (IS_ERR(match)) { 216 if (IS_ERR(match)) {
215 request_module("%st_%s", xt_prefix[nfproto], name); 217 request_module("%st_%s", xt_prefix[nfproto], name);
@@ -252,6 +254,9 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
252{ 254{
253 struct xt_target *target; 255 struct xt_target *target;
254 256
257 if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
258 return ERR_PTR(-EINVAL);
259
255 target = xt_find_target(af, name, revision); 260 target = xt_find_target(af, name, revision);
256 if (IS_ERR(target)) { 261 if (IS_ERR(target)) {
257 request_module("%st_%s", xt_prefix[af], name); 262 request_module("%st_%s", xt_prefix[af], name);
@@ -1000,10 +1005,15 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
1000 return NULL; 1005 return NULL;
1001 1006
1002 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ 1007 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1003 if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages) 1008 if ((size >> PAGE_SHIFT) + 2 > totalram_pages)
1004 return NULL; 1009 return NULL;
1005 1010
1006 info = kvmalloc(sz, GFP_KERNEL); 1011 /* __GFP_NORETRY is not fully supported by kvmalloc but it should
1012 * work reasonably well if sz is too large and bail out rather
1013 * than shoot all processes down before realizing there is nothing
1014 * more to reclaim.
1015 */
1016 info = kvmalloc(sz, GFP_KERNEL | __GFP_NORETRY);
1007 if (!info) 1017 if (!info)
1008 return NULL; 1018 return NULL;
1009 1019
@@ -1027,7 +1037,7 @@ void xt_free_table_info(struct xt_table_info *info)
1027} 1037}
1028EXPORT_SYMBOL(xt_free_table_info); 1038EXPORT_SYMBOL(xt_free_table_info);
1029 1039
1030/* Find table by name, grabs mutex & ref. Returns NULL on error. */ 1040/* Find table by name, grabs mutex & ref. Returns ERR_PTR on error. */
1031struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, 1041struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
1032 const char *name) 1042 const char *name)
1033{ 1043{
@@ -1043,17 +1053,17 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
1043 1053
1044 /* Table doesn't exist in this netns, re-try init */ 1054 /* Table doesn't exist in this netns, re-try init */
1045 list_for_each_entry(t, &init_net.xt.tables[af], list) { 1055 list_for_each_entry(t, &init_net.xt.tables[af], list) {
1056 int err;
1057
1046 if (strcmp(t->name, name)) 1058 if (strcmp(t->name, name))
1047 continue; 1059 continue;
1048 if (!try_module_get(t->me)) { 1060 if (!try_module_get(t->me))
1049 mutex_unlock(&xt[af].mutex); 1061 goto out;
1050 return NULL;
1051 }
1052
1053 mutex_unlock(&xt[af].mutex); 1062 mutex_unlock(&xt[af].mutex);
1054 if (t->table_init(net) != 0) { 1063 err = t->table_init(net);
1064 if (err < 0) {
1055 module_put(t->me); 1065 module_put(t->me);
1056 return NULL; 1066 return ERR_PTR(err);
1057 } 1067 }
1058 1068
1059 found = t; 1069 found = t;
@@ -1073,10 +1083,28 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
1073 module_put(found->me); 1083 module_put(found->me);
1074 out: 1084 out:
1075 mutex_unlock(&xt[af].mutex); 1085 mutex_unlock(&xt[af].mutex);
1076 return NULL; 1086 return ERR_PTR(-ENOENT);
1077} 1087}
1078EXPORT_SYMBOL_GPL(xt_find_table_lock); 1088EXPORT_SYMBOL_GPL(xt_find_table_lock);
1079 1089
1090struct xt_table *xt_request_find_table_lock(struct net *net, u_int8_t af,
1091 const char *name)
1092{
1093 struct xt_table *t = xt_find_table_lock(net, af, name);
1094
1095#ifdef CONFIG_MODULES
1096 if (IS_ERR(t)) {
1097 int err = request_module("%stable_%s", xt_prefix[af], name);
1098 if (err < 0)
1099 return ERR_PTR(err);
1100 t = xt_find_table_lock(net, af, name);
1101 }
1102#endif
1103
1104 return t;
1105}
1106EXPORT_SYMBOL_GPL(xt_request_find_table_lock);
1107
1080void xt_table_unlock(struct xt_table *table) 1108void xt_table_unlock(struct xt_table *table)
1081{ 1109{
1082 mutex_unlock(&xt[table->af].mutex); 1110 mutex_unlock(&xt[table->af].mutex);
@@ -1344,7 +1372,6 @@ static int xt_table_open(struct inode *inode, struct file *file)
1344} 1372}
1345 1373
1346static const struct file_operations xt_table_ops = { 1374static const struct file_operations xt_table_ops = {
1347 .owner = THIS_MODULE,
1348 .open = xt_table_open, 1375 .open = xt_table_open,
1349 .read = seq_read, 1376 .read = seq_read,
1350 .llseek = seq_lseek, 1377 .llseek = seq_lseek,
@@ -1397,7 +1424,7 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
1397 trav->curr = trav->curr->next; 1424 trav->curr = trav->curr->next;
1398 if (trav->curr != trav->head) 1425 if (trav->curr != trav->head)
1399 break; 1426 break;
1400 /* fallthru, _stop will unlock */ 1427 /* fall through */
1401 default: 1428 default:
1402 return NULL; 1429 return NULL;
1403 } 1430 }
@@ -1480,7 +1507,6 @@ static int xt_match_open(struct inode *inode, struct file *file)
1480} 1507}
1481 1508
1482static const struct file_operations xt_match_ops = { 1509static const struct file_operations xt_match_ops = {
1483 .owner = THIS_MODULE,
1484 .open = xt_match_open, 1510 .open = xt_match_open,
1485 .read = seq_read, 1511 .read = seq_read,
1486 .llseek = seq_lseek, 1512 .llseek = seq_lseek,
@@ -1533,7 +1559,6 @@ static int xt_target_open(struct inode *inode, struct file *file)
1533} 1559}
1534 1560
1535static const struct file_operations xt_target_ops = { 1561static const struct file_operations xt_target_ops = {
1536 .owner = THIS_MODULE,
1537 .open = xt_target_open, 1562 .open = xt_target_open,
1538 .read = seq_read, 1563 .read = seq_read,
1539 .llseek = seq_lseek, 1564 .llseek = seq_lseek,
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index ee3421ad108d..6c2482b709b1 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -252,6 +252,7 @@ static struct xt_target idletimer_tg __read_mostly = {
252 .family = NFPROTO_UNSPEC, 252 .family = NFPROTO_UNSPEC,
253 .target = idletimer_tg_target, 253 .target = idletimer_tg_target,
254 .targetsize = sizeof(struct idletimer_tg_info), 254 .targetsize = sizeof(struct idletimer_tg_info),
255 .usersize = offsetof(struct idletimer_tg_info, timer),
255 .checkentry = idletimer_tg_checkentry, 256 .checkentry = idletimer_tg_checkentry,
256 .destroy = idletimer_tg_destroy, 257 .destroy = idletimer_tg_destroy,
257 .me = THIS_MODULE, 258 .me = THIS_MODULE,
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 0971634e5444..1dcad893df78 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -198,6 +198,7 @@ static struct xt_target led_tg_reg __read_mostly = {
198 .family = NFPROTO_UNSPEC, 198 .family = NFPROTO_UNSPEC,
199 .target = led_tg, 199 .target = led_tg,
200 .targetsize = sizeof(struct xt_led_info), 200 .targetsize = sizeof(struct xt_led_info),
201 .usersize = offsetof(struct xt_led_info, internal_data),
201 .checkentry = led_tg_check, 202 .checkentry = led_tg_check,
202 .destroy = led_tg_destroy, 203 .destroy = led_tg_destroy,
203 .me = THIS_MODULE, 204 .me = THIS_MODULE,
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 498b54fd04d7..141c295191f6 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -39,23 +39,31 @@ static void xt_rateest_hash_insert(struct xt_rateest *est)
39 hlist_add_head(&est->list, &rateest_hash[h]); 39 hlist_add_head(&est->list, &rateest_hash[h]);
40} 40}
41 41
42struct xt_rateest *xt_rateest_lookup(const char *name) 42static struct xt_rateest *__xt_rateest_lookup(const char *name)
43{ 43{
44 struct xt_rateest *est; 44 struct xt_rateest *est;
45 unsigned int h; 45 unsigned int h;
46 46
47 h = xt_rateest_hash(name); 47 h = xt_rateest_hash(name);
48 mutex_lock(&xt_rateest_mutex);
49 hlist_for_each_entry(est, &rateest_hash[h], list) { 48 hlist_for_each_entry(est, &rateest_hash[h], list) {
50 if (strcmp(est->name, name) == 0) { 49 if (strcmp(est->name, name) == 0) {
51 est->refcnt++; 50 est->refcnt++;
52 mutex_unlock(&xt_rateest_mutex);
53 return est; 51 return est;
54 } 52 }
55 } 53 }
56 mutex_unlock(&xt_rateest_mutex); 54
57 return NULL; 55 return NULL;
58} 56}
57
58struct xt_rateest *xt_rateest_lookup(const char *name)
59{
60 struct xt_rateest *est;
61
62 mutex_lock(&xt_rateest_mutex);
63 est = __xt_rateest_lookup(name);
64 mutex_unlock(&xt_rateest_mutex);
65 return est;
66}
59EXPORT_SYMBOL_GPL(xt_rateest_lookup); 67EXPORT_SYMBOL_GPL(xt_rateest_lookup);
60 68
61void xt_rateest_put(struct xt_rateest *est) 69void xt_rateest_put(struct xt_rateest *est)
@@ -100,8 +108,10 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
100 108
101 net_get_random_once(&jhash_rnd, sizeof(jhash_rnd)); 109 net_get_random_once(&jhash_rnd, sizeof(jhash_rnd));
102 110
103 est = xt_rateest_lookup(info->name); 111 mutex_lock(&xt_rateest_mutex);
112 est = __xt_rateest_lookup(info->name);
104 if (est) { 113 if (est) {
114 mutex_unlock(&xt_rateest_mutex);
105 /* 115 /*
106 * If estimator parameters are specified, they must match the 116 * If estimator parameters are specified, they must match the
107 * existing estimator. 117 * existing estimator.
@@ -139,11 +149,13 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
139 149
140 info->est = est; 150 info->est = est;
141 xt_rateest_hash_insert(est); 151 xt_rateest_hash_insert(est);
152 mutex_unlock(&xt_rateest_mutex);
142 return 0; 153 return 0;
143 154
144err2: 155err2:
145 kfree(est); 156 kfree(est);
146err1: 157err1:
158 mutex_unlock(&xt_rateest_mutex);
147 return ret; 159 return ret;
148} 160}
149 161
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 9dae4d665965..99bb8e410f22 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -48,7 +48,6 @@ static u_int32_t tcpmss_reverse_mtu(struct net *net,
48 unsigned int family) 48 unsigned int family)
49{ 49{
50 struct flowi fl; 50 struct flowi fl;
51 const struct nf_afinfo *ai;
52 struct rtable *rt = NULL; 51 struct rtable *rt = NULL;
53 u_int32_t mtu = ~0U; 52 u_int32_t mtu = ~0U;
54 53
@@ -62,10 +61,8 @@ static u_int32_t tcpmss_reverse_mtu(struct net *net,
62 memset(fl6, 0, sizeof(*fl6)); 61 memset(fl6, 0, sizeof(*fl6));
63 fl6->daddr = ipv6_hdr(skb)->saddr; 62 fl6->daddr = ipv6_hdr(skb)->saddr;
64 } 63 }
65 ai = nf_get_afinfo(family);
66 if (ai != NULL)
67 ai->route(net, (struct dst_entry **)&rt, &fl, false);
68 64
65 nf_route(net, (struct dst_entry **)&rt, &fl, false, family);
69 if (rt != NULL) { 66 if (rt != NULL) {
70 mtu = dst_mtu(&rt->dst); 67 mtu = dst_mtu(&rt->dst);
71 dst_release(&rt->dst); 68 dst_release(&rt->dst);
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 3b2be2ae6987..911a7c0da504 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -36,7 +36,7 @@ MODULE_ALIAS("ip6t_addrtype");
36static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, 36static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
37 const struct in6_addr *addr, u16 mask) 37 const struct in6_addr *addr, u16 mask)
38{ 38{
39 const struct nf_afinfo *afinfo; 39 const struct nf_ipv6_ops *v6ops;
40 struct flowi6 flow; 40 struct flowi6 flow;
41 struct rt6_info *rt; 41 struct rt6_info *rt;
42 u32 ret = 0; 42 u32 ret = 0;
@@ -47,17 +47,14 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
47 if (dev) 47 if (dev)
48 flow.flowi6_oif = dev->ifindex; 48 flow.flowi6_oif = dev->ifindex;
49 49
50 afinfo = nf_get_afinfo(NFPROTO_IPV6); 50 v6ops = nf_get_ipv6_ops();
51 if (afinfo != NULL) { 51 if (v6ops) {
52 const struct nf_ipv6_ops *v6ops;
53
54 if (dev && (mask & XT_ADDRTYPE_LOCAL)) { 52 if (dev && (mask & XT_ADDRTYPE_LOCAL)) {
55 v6ops = nf_get_ipv6_ops(); 53 if (v6ops->chk_addr(net, addr, dev, true))
56 if (v6ops && v6ops->chk_addr(net, addr, dev, true))
57 ret = XT_ADDRTYPE_LOCAL; 54 ret = XT_ADDRTYPE_LOCAL;
58 } 55 }
59 route_err = afinfo->route(net, (struct dst_entry **)&rt, 56 route_err = v6ops->route(net, (struct dst_entry **)&rt,
60 flowi6_to_flowi(&flow), false); 57 flowi6_to_flowi(&flow), false);
61 } else { 58 } else {
62 route_err = 1; 59 route_err = 1;
63 } 60 }
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 1db1ce59079f..891f4e7e8ea7 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -52,6 +52,7 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
52 return -EINVAL; 52 return -EINVAL;
53 } 53 }
54 54
55 info->priv = NULL;
55 if (info->has_path) { 56 if (info->has_path) {
56 cgrp = cgroup_get_from_path(info->path); 57 cgrp = cgroup_get_from_path(info->path);
57 if (IS_ERR(cgrp)) { 58 if (IS_ERR(cgrp)) {
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index a6214f235333..b1b17b9353e1 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -12,292 +12,30 @@
12 * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au). 12 * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au).
13 */ 13 */
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15#include <linux/in.h> 15
16#include <linux/in6.h>
17#include <linux/ip.h>
18#include <linux/ipv6.h>
19#include <linux/jhash.h>
20#include <linux/slab.h>
21#include <linux/list.h>
22#include <linux/rbtree.h>
23#include <linux/module.h> 16#include <linux/module.h>
24#include <linux/random.h>
25#include <linux/skbuff.h> 17#include <linux/skbuff.h>
26#include <linux/spinlock.h>
27#include <linux/netfilter/nf_conntrack_tcp.h>
28#include <linux/netfilter/x_tables.h> 18#include <linux/netfilter/x_tables.h>
29#include <linux/netfilter/xt_connlimit.h> 19#include <linux/netfilter/xt_connlimit.h>
20
30#include <net/netfilter/nf_conntrack.h> 21#include <net/netfilter/nf_conntrack.h>
31#include <net/netfilter/nf_conntrack_core.h> 22#include <net/netfilter/nf_conntrack_core.h>
32#include <net/netfilter/nf_conntrack_tuple.h> 23#include <net/netfilter/nf_conntrack_tuple.h>
33#include <net/netfilter/nf_conntrack_zones.h> 24#include <net/netfilter/nf_conntrack_zones.h>
34 25#include <net/netfilter/nf_conntrack_count.h>
35#define CONNLIMIT_SLOTS 256U
36
37#ifdef CONFIG_LOCKDEP
38#define CONNLIMIT_LOCK_SLOTS 8U
39#else
40#define CONNLIMIT_LOCK_SLOTS 256U
41#endif
42
43#define CONNLIMIT_GC_MAX_NODES 8
44
45/* we will save the tuples of all connections we care about */
46struct xt_connlimit_conn {
47 struct hlist_node node;
48 struct nf_conntrack_tuple tuple;
49};
50
51struct xt_connlimit_rb {
52 struct rb_node node;
53 struct hlist_head hhead; /* connections/hosts in same subnet */
54 union nf_inet_addr addr; /* search key */
55};
56
57static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp;
58
59struct xt_connlimit_data {
60 struct rb_root climit_root[CONNLIMIT_SLOTS];
61};
62
63static u_int32_t connlimit_rnd __read_mostly;
64static struct kmem_cache *connlimit_rb_cachep __read_mostly;
65static struct kmem_cache *connlimit_conn_cachep __read_mostly;
66
67static inline unsigned int connlimit_iphash(__be32 addr)
68{
69 return jhash_1word((__force __u32)addr,
70 connlimit_rnd) % CONNLIMIT_SLOTS;
71}
72
73static inline unsigned int
74connlimit_iphash6(const union nf_inet_addr *addr)
75{
76 return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6),
77 connlimit_rnd) % CONNLIMIT_SLOTS;
78}
79
80static inline bool already_closed(const struct nf_conn *conn)
81{
82 if (nf_ct_protonum(conn) == IPPROTO_TCP)
83 return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT ||
84 conn->proto.tcp.state == TCP_CONNTRACK_CLOSE;
85 else
86 return 0;
87}
88
89static int
90same_source(const union nf_inet_addr *addr,
91 const union nf_inet_addr *u3, u_int8_t family)
92{
93 if (family == NFPROTO_IPV4)
94 return ntohl(addr->ip) - ntohl(u3->ip);
95
96 return memcmp(addr->ip6, u3->ip6, sizeof(addr->ip6));
97}
98
99static bool add_hlist(struct hlist_head *head,
100 const struct nf_conntrack_tuple *tuple,
101 const union nf_inet_addr *addr)
102{
103 struct xt_connlimit_conn *conn;
104
105 conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
106 if (conn == NULL)
107 return false;
108 conn->tuple = *tuple;
109 hlist_add_head(&conn->node, head);
110 return true;
111}
112
113static unsigned int check_hlist(struct net *net,
114 struct hlist_head *head,
115 const struct nf_conntrack_tuple *tuple,
116 const struct nf_conntrack_zone *zone,
117 bool *addit)
118{
119 const struct nf_conntrack_tuple_hash *found;
120 struct xt_connlimit_conn *conn;
121 struct hlist_node *n;
122 struct nf_conn *found_ct;
123 unsigned int length = 0;
124
125 *addit = true;
126
127 /* check the saved connections */
128 hlist_for_each_entry_safe(conn, n, head, node) {
129 found = nf_conntrack_find_get(net, zone, &conn->tuple);
130 if (found == NULL) {
131 hlist_del(&conn->node);
132 kmem_cache_free(connlimit_conn_cachep, conn);
133 continue;
134 }
135
136 found_ct = nf_ct_tuplehash_to_ctrack(found);
137
138 if (nf_ct_tuple_equal(&conn->tuple, tuple)) {
139 /*
140 * Just to be sure we have it only once in the list.
141 * We should not see tuples twice unless someone hooks
142 * this into a table without "-p tcp --syn".
143 */
144 *addit = false;
145 } else if (already_closed(found_ct)) {
146 /*
147 * we do not care about connections which are
148 * closed already -> ditch it
149 */
150 nf_ct_put(found_ct);
151 hlist_del(&conn->node);
152 kmem_cache_free(connlimit_conn_cachep, conn);
153 continue;
154 }
155
156 nf_ct_put(found_ct);
157 length++;
158 }
159
160 return length;
161}
162
163static void tree_nodes_free(struct rb_root *root,
164 struct xt_connlimit_rb *gc_nodes[],
165 unsigned int gc_count)
166{
167 struct xt_connlimit_rb *rbconn;
168
169 while (gc_count) {
170 rbconn = gc_nodes[--gc_count];
171 rb_erase(&rbconn->node, root);
172 kmem_cache_free(connlimit_rb_cachep, rbconn);
173 }
174}
175
176static unsigned int
177count_tree(struct net *net, struct rb_root *root,
178 const struct nf_conntrack_tuple *tuple,
179 const union nf_inet_addr *addr,
180 u8 family, const struct nf_conntrack_zone *zone)
181{
182 struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
183 struct rb_node **rbnode, *parent;
184 struct xt_connlimit_rb *rbconn;
185 struct xt_connlimit_conn *conn;
186 unsigned int gc_count;
187 bool no_gc = false;
188
189 restart:
190 gc_count = 0;
191 parent = NULL;
192 rbnode = &(root->rb_node);
193 while (*rbnode) {
194 int diff;
195 bool addit;
196
197 rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node);
198
199 parent = *rbnode;
200 diff = same_source(addr, &rbconn->addr, family);
201 if (diff < 0) {
202 rbnode = &((*rbnode)->rb_left);
203 } else if (diff > 0) {
204 rbnode = &((*rbnode)->rb_right);
205 } else {
206 /* same source network -> be counted! */
207 unsigned int count;
208 count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
209
210 tree_nodes_free(root, gc_nodes, gc_count);
211 if (!addit)
212 return count;
213
214 if (!add_hlist(&rbconn->hhead, tuple, addr))
215 return 0; /* hotdrop */
216
217 return count + 1;
218 }
219
220 if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
221 continue;
222
223 /* only used for GC on hhead, retval and 'addit' ignored */
224 check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
225 if (hlist_empty(&rbconn->hhead))
226 gc_nodes[gc_count++] = rbconn;
227 }
228
229 if (gc_count) {
230 no_gc = true;
231 tree_nodes_free(root, gc_nodes, gc_count);
232 /* tree_node_free before new allocation permits
233 * allocator to re-use newly free'd object.
234 *
235 * This is a rare event; in most cases we will find
236 * existing node to re-use. (or gc_count is 0).
237 */
238 goto restart;
239 }
240
241 /* no match, need to insert new node */
242 rbconn = kmem_cache_alloc(connlimit_rb_cachep, GFP_ATOMIC);
243 if (rbconn == NULL)
244 return 0;
245
246 conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
247 if (conn == NULL) {
248 kmem_cache_free(connlimit_rb_cachep, rbconn);
249 return 0;
250 }
251
252 conn->tuple = *tuple;
253 rbconn->addr = *addr;
254
255 INIT_HLIST_HEAD(&rbconn->hhead);
256 hlist_add_head(&conn->node, &rbconn->hhead);
257
258 rb_link_node(&rbconn->node, parent, rbnode);
259 rb_insert_color(&rbconn->node, root);
260 return 1;
261}
262
263static int count_them(struct net *net,
264 struct xt_connlimit_data *data,
265 const struct nf_conntrack_tuple *tuple,
266 const union nf_inet_addr *addr,
267 u_int8_t family,
268 const struct nf_conntrack_zone *zone)
269{
270 struct rb_root *root;
271 int count;
272 u32 hash;
273
274 if (family == NFPROTO_IPV6)
275 hash = connlimit_iphash6(addr);
276 else
277 hash = connlimit_iphash(addr->ip);
278 root = &data->climit_root[hash];
279
280 spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
281
282 count = count_tree(net, root, tuple, addr, family, zone);
283
284 spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
285
286 return count;
287}
288 26
289static bool 27static bool
290connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) 28connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
291{ 29{
292 struct net *net = xt_net(par); 30 struct net *net = xt_net(par);
293 const struct xt_connlimit_info *info = par->matchinfo; 31 const struct xt_connlimit_info *info = par->matchinfo;
294 union nf_inet_addr addr;
295 struct nf_conntrack_tuple tuple; 32 struct nf_conntrack_tuple tuple;
296 const struct nf_conntrack_tuple *tuple_ptr = &tuple; 33 const struct nf_conntrack_tuple *tuple_ptr = &tuple;
297 const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; 34 const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
298 enum ip_conntrack_info ctinfo; 35 enum ip_conntrack_info ctinfo;
299 const struct nf_conn *ct; 36 const struct nf_conn *ct;
300 unsigned int connections; 37 unsigned int connections;
38 u32 key[5];
301 39
302 ct = nf_ct_get(skb, &ctinfo); 40 ct = nf_ct_get(skb, &ctinfo);
303 if (ct != NULL) { 41 if (ct != NULL) {
@@ -310,6 +48,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
310 48
311 if (xt_family(par) == NFPROTO_IPV6) { 49 if (xt_family(par) == NFPROTO_IPV6) {
312 const struct ipv6hdr *iph = ipv6_hdr(skb); 50 const struct ipv6hdr *iph = ipv6_hdr(skb);
51 union nf_inet_addr addr;
313 unsigned int i; 52 unsigned int i;
314 53
315 memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ? 54 memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
@@ -317,22 +56,24 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
317 56
318 for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i) 57 for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i)
319 addr.ip6[i] &= info->mask.ip6[i]; 58 addr.ip6[i] &= info->mask.ip6[i];
59 memcpy(key, &addr, sizeof(addr.ip6));
60 key[4] = zone->id;
320 } else { 61 } else {
321 const struct iphdr *iph = ip_hdr(skb); 62 const struct iphdr *iph = ip_hdr(skb);
322 addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ? 63 key[0] = (info->flags & XT_CONNLIMIT_DADDR) ?
323 iph->daddr : iph->saddr; 64 iph->daddr : iph->saddr;
324 65
325 addr.ip &= info->mask.ip; 66 key[0] &= info->mask.ip;
67 key[1] = zone->id;
326 } 68 }
327 69
328 connections = count_them(net, info->data, tuple_ptr, &addr, 70 connections = nf_conncount_count(net, info->data, key,
329 xt_family(par), zone); 71 xt_family(par), tuple_ptr, zone);
330 if (connections == 0) 72 if (connections == 0)
331 /* kmalloc failed, drop it entirely */ 73 /* kmalloc failed, drop it entirely */
332 goto hotdrop; 74 goto hotdrop;
333 75
334 return (connections > info->limit) ^ 76 return (connections > info->limit) ^ !!(info->flags & XT_CONNLIMIT_INVERT);
335 !!(info->flags & XT_CONNLIMIT_INVERT);
336 77
337 hotdrop: 78 hotdrop:
338 par->hotdrop = true; 79 par->hotdrop = true;
@@ -342,61 +83,27 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
342static int connlimit_mt_check(const struct xt_mtchk_param *par) 83static int connlimit_mt_check(const struct xt_mtchk_param *par)
343{ 84{
344 struct xt_connlimit_info *info = par->matchinfo; 85 struct xt_connlimit_info *info = par->matchinfo;
345 unsigned int i; 86 unsigned int keylen;
346 int ret;
347 87
348 net_get_random_once(&connlimit_rnd, sizeof(connlimit_rnd)); 88 keylen = sizeof(u32);
349 89 if (par->family == NFPROTO_IPV6)
350 ret = nf_ct_netns_get(par->net, par->family); 90 keylen += sizeof(struct in6_addr);
351 if (ret < 0) { 91 else
352 pr_info("cannot load conntrack support for " 92 keylen += sizeof(struct in_addr);
353 "address family %u\n", par->family);
354 return ret;
355 }
356 93
357 /* init private data */ 94 /* init private data */
358 info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL); 95 info->data = nf_conncount_init(par->net, par->family, keylen);
359 if (info->data == NULL) { 96 if (IS_ERR(info->data))
360 nf_ct_netns_put(par->net, par->family); 97 return PTR_ERR(info->data);
361 return -ENOMEM;
362 }
363
364 for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
365 info->data->climit_root[i] = RB_ROOT;
366 98
367 return 0; 99 return 0;
368} 100}
369 101
370static void destroy_tree(struct rb_root *r)
371{
372 struct xt_connlimit_conn *conn;
373 struct xt_connlimit_rb *rbconn;
374 struct hlist_node *n;
375 struct rb_node *node;
376
377 while ((node = rb_first(r)) != NULL) {
378 rbconn = rb_entry(node, struct xt_connlimit_rb, node);
379
380 rb_erase(node, r);
381
382 hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node)
383 kmem_cache_free(connlimit_conn_cachep, conn);
384
385 kmem_cache_free(connlimit_rb_cachep, rbconn);
386 }
387}
388
389static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) 102static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
390{ 103{
391 const struct xt_connlimit_info *info = par->matchinfo; 104 const struct xt_connlimit_info *info = par->matchinfo;
392 unsigned int i;
393
394 nf_ct_netns_put(par->net, par->family);
395
396 for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
397 destroy_tree(&info->data->climit_root[i]);
398 105
399 kfree(info->data); 106 nf_conncount_destroy(par->net, par->family, info->data);
400} 107}
401 108
402static struct xt_match connlimit_mt_reg __read_mostly = { 109static struct xt_match connlimit_mt_reg __read_mostly = {
@@ -413,40 +120,12 @@ static struct xt_match connlimit_mt_reg __read_mostly = {
413 120
414static int __init connlimit_mt_init(void) 121static int __init connlimit_mt_init(void)
415{ 122{
416 int ret, i; 123 return xt_register_match(&connlimit_mt_reg);
417
418 BUILD_BUG_ON(CONNLIMIT_LOCK_SLOTS > CONNLIMIT_SLOTS);
419 BUILD_BUG_ON((CONNLIMIT_SLOTS % CONNLIMIT_LOCK_SLOTS) != 0);
420
421 for (i = 0; i < CONNLIMIT_LOCK_SLOTS; ++i)
422 spin_lock_init(&xt_connlimit_locks[i]);
423
424 connlimit_conn_cachep = kmem_cache_create("xt_connlimit_conn",
425 sizeof(struct xt_connlimit_conn),
426 0, 0, NULL);
427 if (!connlimit_conn_cachep)
428 return -ENOMEM;
429
430 connlimit_rb_cachep = kmem_cache_create("xt_connlimit_rb",
431 sizeof(struct xt_connlimit_rb),
432 0, 0, NULL);
433 if (!connlimit_rb_cachep) {
434 kmem_cache_destroy(connlimit_conn_cachep);
435 return -ENOMEM;
436 }
437 ret = xt_register_match(&connlimit_mt_reg);
438 if (ret != 0) {
439 kmem_cache_destroy(connlimit_conn_cachep);
440 kmem_cache_destroy(connlimit_rb_cachep);
441 }
442 return ret;
443} 124}
444 125
445static void __exit connlimit_mt_exit(void) 126static void __exit connlimit_mt_exit(void)
446{ 127{
447 xt_unregister_match(&connlimit_mt_reg); 128 xt_unregister_match(&connlimit_mt_reg);
448 kmem_cache_destroy(connlimit_conn_cachep);
449 kmem_cache_destroy(connlimit_rb_cachep);
450} 129}
451 130
452module_init(connlimit_mt_init); 131module_init(connlimit_mt_init);
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 5da8746f7b88..ca6847403ca2 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -353,7 +353,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
353static bool select_all(const struct xt_hashlimit_htable *ht, 353static bool select_all(const struct xt_hashlimit_htable *ht,
354 const struct dsthash_ent *he) 354 const struct dsthash_ent *he)
355{ 355{
356 return 1; 356 return true;
357} 357}
358 358
359static bool select_gc(const struct xt_hashlimit_htable *ht, 359static bool select_gc(const struct xt_hashlimit_htable *ht,
@@ -1266,7 +1266,6 @@ static int dl_proc_open(struct inode *inode, struct file *file)
1266} 1266}
1267 1267
1268static const struct file_operations dl_file_ops_v2 = { 1268static const struct file_operations dl_file_ops_v2 = {
1269 .owner = THIS_MODULE,
1270 .open = dl_proc_open_v2, 1269 .open = dl_proc_open_v2,
1271 .read = seq_read, 1270 .read = seq_read,
1272 .llseek = seq_lseek, 1271 .llseek = seq_lseek,
@@ -1274,7 +1273,6 @@ static const struct file_operations dl_file_ops_v2 = {
1274}; 1273};
1275 1274
1276static const struct file_operations dl_file_ops_v1 = { 1275static const struct file_operations dl_file_ops_v1 = {
1277 .owner = THIS_MODULE,
1278 .open = dl_proc_open_v1, 1276 .open = dl_proc_open_v1,
1279 .read = seq_read, 1277 .read = seq_read,
1280 .llseek = seq_lseek, 1278 .llseek = seq_lseek,
@@ -1282,7 +1280,6 @@ static const struct file_operations dl_file_ops_v1 = {
1282}; 1280};
1283 1281
1284static const struct file_operations dl_file_ops = { 1282static const struct file_operations dl_file_ops = {
1285 .owner = THIS_MODULE,
1286 .open = dl_proc_open, 1283 .open = dl_proc_open,
1287 .read = seq_read, 1284 .read = seq_read,
1288 .llseek = seq_lseek, 1285 .llseek = seq_lseek,
diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c
index 000e70377f85..7ca64a50db04 100644
--- a/net/netfilter/xt_ipcomp.c
+++ b/net/netfilter/xt_ipcomp.c
@@ -58,7 +58,7 @@ static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par)
58 */ 58 */
59 pr_debug("Dropping evil IPComp tinygram.\n"); 59 pr_debug("Dropping evil IPComp tinygram.\n");
60 par->hotdrop = true; 60 par->hotdrop = true;
61 return 0; 61 return false;
62 } 62 }
63 63
64 return spi_match(compinfo->spis[0], compinfo->spis[1], 64 return spi_match(compinfo->spis[0], compinfo->spis[1],
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index d27b5f1ea619..61403b77361c 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -193,9 +193,8 @@ static struct xt_match limit_mt_reg __read_mostly = {
193 .compatsize = sizeof(struct compat_xt_rateinfo), 193 .compatsize = sizeof(struct compat_xt_rateinfo),
194 .compat_from_user = limit_mt_compat_from_user, 194 .compat_from_user = limit_mt_compat_from_user,
195 .compat_to_user = limit_mt_compat_to_user, 195 .compat_to_user = limit_mt_compat_to_user,
196#else
197 .usersize = offsetof(struct xt_rateinfo, prev),
198#endif 196#endif
197 .usersize = offsetof(struct xt_rateinfo, prev),
199 .me = THIS_MODULE, 198 .me = THIS_MODULE,
200}; 199};
201 200
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index cc0518fe598e..6f92d25590a8 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -62,6 +62,7 @@ static struct xt_match nfacct_mt_reg __read_mostly = {
62 .match = nfacct_mt, 62 .match = nfacct_mt,
63 .destroy = nfacct_mt_destroy, 63 .destroy = nfacct_mt_destroy,
64 .matchsize = sizeof(struct xt_nfacct_match_info), 64 .matchsize = sizeof(struct xt_nfacct_match_info),
65 .usersize = offsetof(struct xt_nfacct_match_info, nfacct),
65 .me = THIS_MODULE, 66 .me = THIS_MODULE,
66}; 67};
67 68
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 2b4ab189bba7..5639fb03bdd9 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -93,7 +93,8 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
93 if (dst->xfrm == NULL) 93 if (dst->xfrm == NULL)
94 return -1; 94 return -1;
95 95
96 for (i = 0; dst && dst->xfrm; dst = dst->child, i++) { 96 for (i = 0; dst && dst->xfrm;
97 dst = ((struct xfrm_dst *)dst)->child, i++) {
97 pos = strict ? i : 0; 98 pos = strict ? i : 0;
98 if (pos >= info->len) 99 if (pos >= info->len)
99 return 0; 100 return 0;
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 64285702afd5..16b6b11ee83f 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -39,13 +39,17 @@ match_set(ip_set_id_t index, const struct sk_buff *skb,
39 return inv; 39 return inv;
40} 40}
41 41
42#define ADT_OPT(n, f, d, fs, cfs, t) \ 42#define ADT_OPT(n, f, d, fs, cfs, t, p, b, po, bo) \
43struct ip_set_adt_opt n = { \ 43struct ip_set_adt_opt n = { \
44 .family = f, \ 44 .family = f, \
45 .dim = d, \ 45 .dim = d, \
46 .flags = fs, \ 46 .flags = fs, \
47 .cmdflags = cfs, \ 47 .cmdflags = cfs, \
48 .ext.timeout = t, \ 48 .ext.timeout = t, \
49 .ext.packets = p, \
50 .ext.bytes = b, \
51 .ext.packets_op = po, \
52 .ext.bytes_op = bo, \
49} 53}
50 54
51/* Revision 0 interface: backward compatible with netfilter/iptables */ 55/* Revision 0 interface: backward compatible with netfilter/iptables */
@@ -56,7 +60,8 @@ set_match_v0(const struct sk_buff *skb, struct xt_action_param *par)
56 const struct xt_set_info_match_v0 *info = par->matchinfo; 60 const struct xt_set_info_match_v0 *info = par->matchinfo;
57 61
58 ADT_OPT(opt, xt_family(par), info->match_set.u.compat.dim, 62 ADT_OPT(opt, xt_family(par), info->match_set.u.compat.dim,
59 info->match_set.u.compat.flags, 0, UINT_MAX); 63 info->match_set.u.compat.flags, 0, UINT_MAX,
64 0, 0, 0, 0);
60 65
61 return match_set(info->match_set.index, skb, par, &opt, 66 return match_set(info->match_set.index, skb, par, &opt,
62 info->match_set.u.compat.flags & IPSET_INV_MATCH); 67 info->match_set.u.compat.flags & IPSET_INV_MATCH);
@@ -119,7 +124,8 @@ set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
119 const struct xt_set_info_match_v1 *info = par->matchinfo; 124 const struct xt_set_info_match_v1 *info = par->matchinfo;
120 125
121 ADT_OPT(opt, xt_family(par), info->match_set.dim, 126 ADT_OPT(opt, xt_family(par), info->match_set.dim,
122 info->match_set.flags, 0, UINT_MAX); 127 info->match_set.flags, 0, UINT_MAX,
128 0, 0, 0, 0);
123 129
124 if (opt.flags & IPSET_RETURN_NOMATCH) 130 if (opt.flags & IPSET_RETURN_NOMATCH)
125 opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH; 131 opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH;
@@ -161,45 +167,21 @@ set_match_v1_destroy(const struct xt_mtdtor_param *par)
161/* Revision 3 match */ 167/* Revision 3 match */
162 168
163static bool 169static bool
164match_counter0(u64 counter, const struct ip_set_counter_match0 *info)
165{
166 switch (info->op) {
167 case IPSET_COUNTER_NONE:
168 return true;
169 case IPSET_COUNTER_EQ:
170 return counter == info->value;
171 case IPSET_COUNTER_NE:
172 return counter != info->value;
173 case IPSET_COUNTER_LT:
174 return counter < info->value;
175 case IPSET_COUNTER_GT:
176 return counter > info->value;
177 }
178 return false;
179}
180
181static bool
182set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) 170set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
183{ 171{
184 const struct xt_set_info_match_v3 *info = par->matchinfo; 172 const struct xt_set_info_match_v3 *info = par->matchinfo;
185 int ret;
186 173
187 ADT_OPT(opt, xt_family(par), info->match_set.dim, 174 ADT_OPT(opt, xt_family(par), info->match_set.dim,
188 info->match_set.flags, info->flags, UINT_MAX); 175 info->match_set.flags, info->flags, UINT_MAX,
176 info->packets.value, info->bytes.value,
177 info->packets.op, info->bytes.op);
189 178
190 if (info->packets.op != IPSET_COUNTER_NONE || 179 if (info->packets.op != IPSET_COUNTER_NONE ||
191 info->bytes.op != IPSET_COUNTER_NONE) 180 info->bytes.op != IPSET_COUNTER_NONE)
192 opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS; 181 opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS;
193 182
194 ret = match_set(info->match_set.index, skb, par, &opt, 183 return match_set(info->match_set.index, skb, par, &opt,
195 info->match_set.flags & IPSET_INV_MATCH); 184 info->match_set.flags & IPSET_INV_MATCH);
196
197 if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS))
198 return ret;
199
200 if (!match_counter0(opt.ext.packets, &info->packets))
201 return false;
202 return match_counter0(opt.ext.bytes, &info->bytes);
203} 185}
204 186
205#define set_match_v3_checkentry set_match_v1_checkentry 187#define set_match_v3_checkentry set_match_v1_checkentry
@@ -208,45 +190,21 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
208/* Revision 4 match */ 190/* Revision 4 match */
209 191
210static bool 192static bool
211match_counter(u64 counter, const struct ip_set_counter_match *info)
212{
213 switch (info->op) {
214 case IPSET_COUNTER_NONE:
215 return true;
216 case IPSET_COUNTER_EQ:
217 return counter == info->value;
218 case IPSET_COUNTER_NE:
219 return counter != info->value;
220 case IPSET_COUNTER_LT:
221 return counter < info->value;
222 case IPSET_COUNTER_GT:
223 return counter > info->value;
224 }
225 return false;
226}
227
228static bool
229set_match_v4(const struct sk_buff *skb, struct xt_action_param *par) 193set_match_v4(const struct sk_buff *skb, struct xt_action_param *par)
230{ 194{
231 const struct xt_set_info_match_v4 *info = par->matchinfo; 195 const struct xt_set_info_match_v4 *info = par->matchinfo;
232 int ret;
233 196
234 ADT_OPT(opt, xt_family(par), info->match_set.dim, 197 ADT_OPT(opt, xt_family(par), info->match_set.dim,
235 info->match_set.flags, info->flags, UINT_MAX); 198 info->match_set.flags, info->flags, UINT_MAX,
199 info->packets.value, info->bytes.value,
200 info->packets.op, info->bytes.op);
236 201
237 if (info->packets.op != IPSET_COUNTER_NONE || 202 if (info->packets.op != IPSET_COUNTER_NONE ||
238 info->bytes.op != IPSET_COUNTER_NONE) 203 info->bytes.op != IPSET_COUNTER_NONE)
239 opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS; 204 opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS;
240 205
241 ret = match_set(info->match_set.index, skb, par, &opt, 206 return match_set(info->match_set.index, skb, par, &opt,
242 info->match_set.flags & IPSET_INV_MATCH); 207 info->match_set.flags & IPSET_INV_MATCH);
243
244 if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS))
245 return ret;
246
247 if (!match_counter(opt.ext.packets, &info->packets))
248 return false;
249 return match_counter(opt.ext.bytes, &info->bytes);
250} 208}
251 209
252#define set_match_v4_checkentry set_match_v1_checkentry 210#define set_match_v4_checkentry set_match_v1_checkentry
@@ -260,9 +218,11 @@ set_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
260 const struct xt_set_info_target_v0 *info = par->targinfo; 218 const struct xt_set_info_target_v0 *info = par->targinfo;
261 219
262 ADT_OPT(add_opt, xt_family(par), info->add_set.u.compat.dim, 220 ADT_OPT(add_opt, xt_family(par), info->add_set.u.compat.dim,
263 info->add_set.u.compat.flags, 0, UINT_MAX); 221 info->add_set.u.compat.flags, 0, UINT_MAX,
222 0, 0, 0, 0);
264 ADT_OPT(del_opt, xt_family(par), info->del_set.u.compat.dim, 223 ADT_OPT(del_opt, xt_family(par), info->del_set.u.compat.dim,
265 info->del_set.u.compat.flags, 0, UINT_MAX); 224 info->del_set.u.compat.flags, 0, UINT_MAX,
225 0, 0, 0, 0);
266 226
267 if (info->add_set.index != IPSET_INVALID_ID) 227 if (info->add_set.index != IPSET_INVALID_ID)
268 ip_set_add(info->add_set.index, skb, par, &add_opt); 228 ip_set_add(info->add_set.index, skb, par, &add_opt);
@@ -333,9 +293,11 @@ set_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
333 const struct xt_set_info_target_v1 *info = par->targinfo; 293 const struct xt_set_info_target_v1 *info = par->targinfo;
334 294
335 ADT_OPT(add_opt, xt_family(par), info->add_set.dim, 295 ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
336 info->add_set.flags, 0, UINT_MAX); 296 info->add_set.flags, 0, UINT_MAX,
297 0, 0, 0, 0);
337 ADT_OPT(del_opt, xt_family(par), info->del_set.dim, 298 ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
338 info->del_set.flags, 0, UINT_MAX); 299 info->del_set.flags, 0, UINT_MAX,
300 0, 0, 0, 0);
339 301
340 if (info->add_set.index != IPSET_INVALID_ID) 302 if (info->add_set.index != IPSET_INVALID_ID)
341 ip_set_add(info->add_set.index, skb, par, &add_opt); 303 ip_set_add(info->add_set.index, skb, par, &add_opt);
@@ -402,9 +364,11 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
402 const struct xt_set_info_target_v2 *info = par->targinfo; 364 const struct xt_set_info_target_v2 *info = par->targinfo;
403 365
404 ADT_OPT(add_opt, xt_family(par), info->add_set.dim, 366 ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
405 info->add_set.flags, info->flags, info->timeout); 367 info->add_set.flags, info->flags, info->timeout,
368 0, 0, 0, 0);
406 ADT_OPT(del_opt, xt_family(par), info->del_set.dim, 369 ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
407 info->del_set.flags, 0, UINT_MAX); 370 info->del_set.flags, 0, UINT_MAX,
371 0, 0, 0, 0);
408 372
409 /* Normalize to fit into jiffies */ 373 /* Normalize to fit into jiffies */
410 if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && 374 if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
@@ -432,11 +396,14 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
432 int ret; 396 int ret;
433 397
434 ADT_OPT(add_opt, xt_family(par), info->add_set.dim, 398 ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
435 info->add_set.flags, info->flags, info->timeout); 399 info->add_set.flags, info->flags, info->timeout,
400 0, 0, 0, 0);
436 ADT_OPT(del_opt, xt_family(par), info->del_set.dim, 401 ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
437 info->del_set.flags, 0, UINT_MAX); 402 info->del_set.flags, 0, UINT_MAX,
403 0, 0, 0, 0);
438 ADT_OPT(map_opt, xt_family(par), info->map_set.dim, 404 ADT_OPT(map_opt, xt_family(par), info->map_set.dim,
439 info->map_set.flags, 0, UINT_MAX); 405 info->map_set.flags, 0, UINT_MAX,
406 0, 0, 0, 0);
440 407
441 /* Normalize to fit into jiffies */ 408 /* Normalize to fit into jiffies */
442 if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && 409 if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 11de55e7a868..8710fdba2ae2 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -84,6 +84,7 @@ static struct xt_match xt_statistic_mt_reg __read_mostly = {
84 .checkentry = statistic_mt_check, 84 .checkentry = statistic_mt_check,
85 .destroy = statistic_mt_destroy, 85 .destroy = statistic_mt_destroy,
86 .matchsize = sizeof(struct xt_statistic_info), 86 .matchsize = sizeof(struct xt_statistic_info),
87 .usersize = offsetof(struct xt_statistic_info, master),
87 .me = THIS_MODULE, 88 .me = THIS_MODULE,
88}; 89};
89 90
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 84a4e4c3be4b..2ad445c1d27c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -65,6 +65,7 @@
65#include <linux/net_namespace.h> 65#include <linux/net_namespace.h>
66 66
67#include <net/net_namespace.h> 67#include <net/net_namespace.h>
68#include <net/netns/generic.h>
68#include <net/sock.h> 69#include <net/sock.h>
69#include <net/scm.h> 70#include <net/scm.h>
70#include <net/netlink.h> 71#include <net/netlink.h>
@@ -145,8 +146,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
145 146
146static BLOCKING_NOTIFIER_HEAD(netlink_chain); 147static BLOCKING_NOTIFIER_HEAD(netlink_chain);
147 148
148static DEFINE_SPINLOCK(netlink_tap_lock);
149static struct list_head netlink_tap_all __read_mostly;
150 149
151static const struct rhashtable_params netlink_rhashtable_params; 150static const struct rhashtable_params netlink_rhashtable_params;
152 151
@@ -173,14 +172,24 @@ static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
173 return new; 172 return new;
174} 173}
175 174
175static unsigned int netlink_tap_net_id;
176
177struct netlink_tap_net {
178 struct list_head netlink_tap_all;
179 struct mutex netlink_tap_lock;
180};
181
176int netlink_add_tap(struct netlink_tap *nt) 182int netlink_add_tap(struct netlink_tap *nt)
177{ 183{
184 struct net *net = dev_net(nt->dev);
185 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);
186
178 if (unlikely(nt->dev->type != ARPHRD_NETLINK)) 187 if (unlikely(nt->dev->type != ARPHRD_NETLINK))
179 return -EINVAL; 188 return -EINVAL;
180 189
181 spin_lock(&netlink_tap_lock); 190 mutex_lock(&nn->netlink_tap_lock);
182 list_add_rcu(&nt->list, &netlink_tap_all); 191 list_add_rcu(&nt->list, &nn->netlink_tap_all);
183 spin_unlock(&netlink_tap_lock); 192 mutex_unlock(&nn->netlink_tap_lock);
184 193
185 __module_get(nt->module); 194 __module_get(nt->module);
186 195
@@ -190,12 +199,14 @@ EXPORT_SYMBOL_GPL(netlink_add_tap);
190 199
191static int __netlink_remove_tap(struct netlink_tap *nt) 200static int __netlink_remove_tap(struct netlink_tap *nt)
192{ 201{
202 struct net *net = dev_net(nt->dev);
203 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);
193 bool found = false; 204 bool found = false;
194 struct netlink_tap *tmp; 205 struct netlink_tap *tmp;
195 206
196 spin_lock(&netlink_tap_lock); 207 mutex_lock(&nn->netlink_tap_lock);
197 208
198 list_for_each_entry(tmp, &netlink_tap_all, list) { 209 list_for_each_entry(tmp, &nn->netlink_tap_all, list) {
199 if (nt == tmp) { 210 if (nt == tmp) {
200 list_del_rcu(&nt->list); 211 list_del_rcu(&nt->list);
201 found = true; 212 found = true;
@@ -205,7 +216,7 @@ static int __netlink_remove_tap(struct netlink_tap *nt)
205 216
206 pr_warn("__netlink_remove_tap: %p not found\n", nt); 217 pr_warn("__netlink_remove_tap: %p not found\n", nt);
207out: 218out:
208 spin_unlock(&netlink_tap_lock); 219 mutex_unlock(&nn->netlink_tap_lock);
209 220
210 if (found) 221 if (found)
211 module_put(nt->module); 222 module_put(nt->module);
@@ -224,6 +235,26 @@ int netlink_remove_tap(struct netlink_tap *nt)
224} 235}
225EXPORT_SYMBOL_GPL(netlink_remove_tap); 236EXPORT_SYMBOL_GPL(netlink_remove_tap);
226 237
238static __net_init int netlink_tap_init_net(struct net *net)
239{
240 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);
241
242 INIT_LIST_HEAD(&nn->netlink_tap_all);
243 mutex_init(&nn->netlink_tap_lock);
244 return 0;
245}
246
247static void __net_exit netlink_tap_exit_net(struct net *net)
248{
249}
250
251static struct pernet_operations netlink_tap_net_ops = {
252 .init = netlink_tap_init_net,
253 .exit = netlink_tap_exit_net,
254 .id = &netlink_tap_net_id,
255 .size = sizeof(struct netlink_tap_net),
256};
257
227static bool netlink_filter_tap(const struct sk_buff *skb) 258static bool netlink_filter_tap(const struct sk_buff *skb)
228{ 259{
229 struct sock *sk = skb->sk; 260 struct sock *sk = skb->sk;
@@ -277,7 +308,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
277 return ret; 308 return ret;
278} 309}
279 310
280static void __netlink_deliver_tap(struct sk_buff *skb) 311static void __netlink_deliver_tap(struct sk_buff *skb, struct netlink_tap_net *nn)
281{ 312{
282 int ret; 313 int ret;
283 struct netlink_tap *tmp; 314 struct netlink_tap *tmp;
@@ -285,19 +316,21 @@ static void __netlink_deliver_tap(struct sk_buff *skb)
285 if (!netlink_filter_tap(skb)) 316 if (!netlink_filter_tap(skb))
286 return; 317 return;
287 318
288 list_for_each_entry_rcu(tmp, &netlink_tap_all, list) { 319 list_for_each_entry_rcu(tmp, &nn->netlink_tap_all, list) {
289 ret = __netlink_deliver_tap_skb(skb, tmp->dev); 320 ret = __netlink_deliver_tap_skb(skb, tmp->dev);
290 if (unlikely(ret)) 321 if (unlikely(ret))
291 break; 322 break;
292 } 323 }
293} 324}
294 325
295static void netlink_deliver_tap(struct sk_buff *skb) 326static void netlink_deliver_tap(struct net *net, struct sk_buff *skb)
296{ 327{
328 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);
329
297 rcu_read_lock(); 330 rcu_read_lock();
298 331
299 if (unlikely(!list_empty(&netlink_tap_all))) 332 if (unlikely(!list_empty(&nn->netlink_tap_all)))
300 __netlink_deliver_tap(skb); 333 __netlink_deliver_tap(skb, nn);
301 334
302 rcu_read_unlock(); 335 rcu_read_unlock();
303} 336}
@@ -306,7 +339,7 @@ static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src,
306 struct sk_buff *skb) 339 struct sk_buff *skb)
307{ 340{
308 if (!(netlink_is_kernel(dst) && netlink_is_kernel(src))) 341 if (!(netlink_is_kernel(dst) && netlink_is_kernel(src)))
309 netlink_deliver_tap(skb); 342 netlink_deliver_tap(sock_net(dst), skb);
310} 343}
311 344
312static void netlink_overrun(struct sock *sk) 345static void netlink_overrun(struct sock *sk)
@@ -1216,7 +1249,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1216{ 1249{
1217 int len = skb->len; 1250 int len = skb->len;
1218 1251
1219 netlink_deliver_tap(skb); 1252 netlink_deliver_tap(sock_net(sk), skb);
1220 1253
1221 skb_queue_tail(&sk->sk_receive_queue, skb); 1254 skb_queue_tail(&sk->sk_receive_queue, skb);
1222 sk->sk_data_ready(sk); 1255 sk->sk_data_ready(sk);
@@ -2482,8 +2515,9 @@ static int netlink_walk_start(struct nl_seq_iter *iter)
2482 return err; 2515 return err;
2483 } 2516 }
2484 2517
2485 err = rhashtable_walk_start(&iter->hti); 2518 rhashtable_walk_start(&iter->hti);
2486 return err == -EAGAIN ? 0 : err; 2519
2520 return 0;
2487} 2521}
2488 2522
2489static void netlink_walk_stop(struct nl_seq_iter *iter) 2523static void netlink_walk_stop(struct nl_seq_iter *iter)
@@ -2604,7 +2638,6 @@ static int netlink_seq_open(struct inode *inode, struct file *file)
2604} 2638}
2605 2639
2606static const struct file_operations netlink_seq_fops = { 2640static const struct file_operations netlink_seq_fops = {
2607 .owner = THIS_MODULE,
2608 .open = netlink_seq_open, 2641 .open = netlink_seq_open,
2609 .read = seq_read, 2642 .read = seq_read,
2610 .llseek = seq_lseek, 2643 .llseek = seq_lseek,
@@ -2734,12 +2767,11 @@ static int __init netlink_proto_init(void)
2734 } 2767 }
2735 } 2768 }
2736 2769
2737 INIT_LIST_HEAD(&netlink_tap_all);
2738
2739 netlink_add_usersock_entry(); 2770 netlink_add_usersock_entry();
2740 2771
2741 sock_register(&netlink_family_ops); 2772 sock_register(&netlink_family_ops);
2742 register_pernet_subsys(&netlink_net_ops); 2773 register_pernet_subsys(&netlink_net_ops);
2774 register_pernet_subsys(&netlink_tap_net_ops);
2743 /* The netlink device handler may be needed early. */ 2775 /* The netlink device handler may be needed early. */
2744 rtnetlink_init(); 2776 rtnetlink_init();
2745out: 2777out:
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 8faa20b4d457..7dda33b9b784 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -115,11 +115,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
115 if (!s_num) 115 if (!s_num)
116 rhashtable_walk_enter(&tbl->hash, hti); 116 rhashtable_walk_enter(&tbl->hash, hti);
117 117
118 ret = rhashtable_walk_start(hti); 118 rhashtable_walk_start(hti);
119 if (ret == -EAGAIN)
120 ret = 0;
121 if (ret)
122 goto stop;
123 119
124 while ((nlsk = rhashtable_walk_next(hti))) { 120 while ((nlsk = rhashtable_walk_next(hti))) {
125 if (IS_ERR(nlsk)) { 121 if (IS_ERR(nlsk)) {
@@ -146,8 +142,8 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
146 } 142 }
147 } 143 }
148 144
149stop:
150 rhashtable_walk_stop(hti); 145 rhashtable_walk_stop(hti);
146
151 if (ret) 147 if (ret)
152 goto done; 148 goto done;
153 149
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index d444daf1ac04..6f02499ef007 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1081,6 +1081,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
1081{ 1081{
1082 struct sk_buff *tmp; 1082 struct sk_buff *tmp;
1083 struct net *net, *prev = NULL; 1083 struct net *net, *prev = NULL;
1084 bool delivered = false;
1084 int err; 1085 int err;
1085 1086
1086 for_each_net_rcu(net) { 1087 for_each_net_rcu(net) {
@@ -1092,14 +1093,21 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
1092 } 1093 }
1093 err = nlmsg_multicast(prev->genl_sock, tmp, 1094 err = nlmsg_multicast(prev->genl_sock, tmp,
1094 portid, group, flags); 1095 portid, group, flags);
1095 if (err) 1096 if (!err)
1097 delivered = true;
1098 else if (err != -ESRCH)
1096 goto error; 1099 goto error;
1097 } 1100 }
1098 1101
1099 prev = net; 1102 prev = net;
1100 } 1103 }
1101 1104
1102 return nlmsg_multicast(prev->genl_sock, skb, portid, group, flags); 1105 err = nlmsg_multicast(prev->genl_sock, skb, portid, group, flags);
1106 if (!err)
1107 delivered = true;
1108 else if (err != -ESRCH)
1109 goto error;
1110 return delivered ? 0 : -ESRCH;
1103 error: 1111 error:
1104 kfree_skb(skb); 1112 kfree_skb(skb);
1105 return err; 1113 return err;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 7ed9d4422a73..9ba30c63be3d 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1344,7 +1344,6 @@ static int nr_info_open(struct inode *inode, struct file *file)
1344} 1344}
1345 1345
1346static const struct file_operations nr_info_fops = { 1346static const struct file_operations nr_info_fops = {
1347 .owner = THIS_MODULE,
1348 .open = nr_info_open, 1347 .open = nr_info_open,
1349 .read = seq_read, 1348 .read = seq_read,
1350 .llseek = seq_lseek, 1349 .llseek = seq_lseek,
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 75e6ba970fde..b5a7dcb30991 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -901,7 +901,6 @@ static int nr_node_info_open(struct inode *inode, struct file *file)
901} 901}
902 902
903const struct file_operations nr_nodes_fops = { 903const struct file_operations nr_nodes_fops = {
904 .owner = THIS_MODULE,
905 .open = nr_node_info_open, 904 .open = nr_node_info_open,
906 .read = seq_read, 905 .read = seq_read,
907 .llseek = seq_lseek, 906 .llseek = seq_lseek,
@@ -968,7 +967,6 @@ static int nr_neigh_info_open(struct inode *inode, struct file *file)
968} 967}
969 968
970const struct file_operations nr_neigh_fops = { 969const struct file_operations nr_neigh_fops = {
971 .owner = THIS_MODULE,
972 .open = nr_neigh_info_open, 970 .open = nr_neigh_info_open,
973 .read = seq_read, 971 .read = seq_read,
974 .llseek = seq_lseek, 972 .llseek = seq_lseek,
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index fb7afcaa3004..376040092142 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -531,7 +531,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
531 return 0; 531 return 0;
532} 532}
533 533
534static inline unsigned int llcp_accept_poll(struct sock *parent) 534static inline __poll_t llcp_accept_poll(struct sock *parent)
535{ 535{
536 struct nfc_llcp_sock *llcp_sock, *parent_sock; 536 struct nfc_llcp_sock *llcp_sock, *parent_sock;
537 struct sock *sk; 537 struct sock *sk;
@@ -543,17 +543,17 @@ static inline unsigned int llcp_accept_poll(struct sock *parent)
543 sk = &llcp_sock->sk; 543 sk = &llcp_sock->sk;
544 544
545 if (sk->sk_state == LLCP_CONNECTED) 545 if (sk->sk_state == LLCP_CONNECTED)
546 return POLLIN | POLLRDNORM; 546 return EPOLLIN | EPOLLRDNORM;
547 } 547 }
548 548
549 return 0; 549 return 0;
550} 550}
551 551
552static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, 552static __poll_t llcp_sock_poll(struct file *file, struct socket *sock,
553 poll_table *wait) 553 poll_table *wait)
554{ 554{
555 struct sock *sk = sock->sk; 555 struct sock *sk = sock->sk;
556 unsigned int mask = 0; 556 __poll_t mask = 0;
557 557
558 pr_debug("%p\n", sk); 558 pr_debug("%p\n", sk);
559 559
@@ -563,23 +563,23 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,
563 return llcp_accept_poll(sk); 563 return llcp_accept_poll(sk);
564 564
565 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 565 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
566 mask |= POLLERR | 566 mask |= EPOLLERR |
567 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); 567 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
568 568
569 if (!skb_queue_empty(&sk->sk_receive_queue)) 569 if (!skb_queue_empty(&sk->sk_receive_queue))
570 mask |= POLLIN | POLLRDNORM; 570 mask |= EPOLLIN | EPOLLRDNORM;
571 571
572 if (sk->sk_state == LLCP_CLOSED) 572 if (sk->sk_state == LLCP_CLOSED)
573 mask |= POLLHUP; 573 mask |= EPOLLHUP;
574 574
575 if (sk->sk_shutdown & RCV_SHUTDOWN) 575 if (sk->sk_shutdown & RCV_SHUTDOWN)
576 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 576 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
577 577
578 if (sk->sk_shutdown == SHUTDOWN_MASK) 578 if (sk->sk_shutdown == SHUTDOWN_MASK)
579 mask |= POLLHUP; 579 mask |= EPOLLHUP;
580 580
581 if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED) 581 if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED)
582 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 582 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
583 else 583 else
584 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 584 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
585 585
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index 8d104c1db628..a66f102c6c01 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -305,7 +305,7 @@ static ssize_t nci_uart_tty_write(struct tty_struct *tty, struct file *file,
305 return 0; 305 return 0;
306} 306}
307 307
308static unsigned int nci_uart_tty_poll(struct tty_struct *tty, 308static __poll_t nci_uart_tty_poll(struct tty_struct *tty,
309 struct file *filp, poll_table *wait) 309 struct file *filp, poll_table *wait)
310{ 310{
311 return 0; 311 return 0;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index b27c5c6d9cab..c5904f629091 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1098,6 +1098,36 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
1098 return 0; 1098 return 0;
1099} 1099}
1100 1100
1101/* Trim the skb to the length specified by the IP/IPv6 header,
1102 * removing any trailing lower-layer padding. This prepares the skb
1103 * for higher-layer processing that assumes skb->len excludes padding
1104 * (such as nf_ip_checksum). The caller needs to pull the skb to the
1105 * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
1106 */
1107static int ovs_skb_network_trim(struct sk_buff *skb)
1108{
1109 unsigned int len;
1110 int err;
1111
1112 switch (skb->protocol) {
1113 case htons(ETH_P_IP):
1114 len = ntohs(ip_hdr(skb)->tot_len);
1115 break;
1116 case htons(ETH_P_IPV6):
1117 len = sizeof(struct ipv6hdr)
1118 + ntohs(ipv6_hdr(skb)->payload_len);
1119 break;
1120 default:
1121 len = skb->len;
1122 }
1123
1124 err = pskb_trim_rcsum(skb, len);
1125 if (err)
1126 kfree_skb(skb);
1127
1128 return err;
1129}
1130
1101/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero 1131/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
1102 * value if 'skb' is freed. 1132 * value if 'skb' is freed.
1103 */ 1133 */
@@ -1112,6 +1142,10 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
1112 nh_ofs = skb_network_offset(skb); 1142 nh_ofs = skb_network_offset(skb);
1113 skb_pull_rcsum(skb, nh_ofs); 1143 skb_pull_rcsum(skb, nh_ofs);
1114 1144
1145 err = ovs_skb_network_trim(skb);
1146 if (err)
1147 return err;
1148
1115 if (key->ip.frag != OVS_FRAG_TYPE_NONE) { 1149 if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
1116 err = handle_fragments(net, key, info->zone.id, skb); 1150 err = handle_fragments(net, key, info->zone.id, skb);
1117 if (err) 1151 if (err)
@@ -1266,14 +1300,14 @@ static int parse_nat(const struct nlattr *attr,
1266 /* Do not allow flags if no type is given. */ 1300 /* Do not allow flags if no type is given. */
1267 if (info->range.flags) { 1301 if (info->range.flags) {
1268 OVS_NLERR(log, 1302 OVS_NLERR(log,
1269 "NAT flags may be given only when NAT range (SRC or DST) is also specified.\n" 1303 "NAT flags may be given only when NAT range (SRC or DST) is also specified."
1270 ); 1304 );
1271 return -EINVAL; 1305 return -EINVAL;
1272 } 1306 }
1273 info->nat = OVS_CT_NAT; /* NAT existing connections. */ 1307 info->nat = OVS_CT_NAT; /* NAT existing connections. */
1274 } else if (!info->commit) { 1308 } else if (!info->commit) {
1275 OVS_NLERR(log, 1309 OVS_NLERR(log,
1276 "NAT attributes may be specified only when CT COMMIT flag is also specified.\n" 1310 "NAT attributes may be specified only when CT COMMIT flag is also specified."
1277 ); 1311 );
1278 return -EINVAL; 1312 return -EINVAL;
1279 } 1313 }
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index f039064ce922..56b8e7167790 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -56,12 +56,12 @@
56 56
57u64 ovs_flow_used_time(unsigned long flow_jiffies) 57u64 ovs_flow_used_time(unsigned long flow_jiffies)
58{ 58{
59 struct timespec cur_ts; 59 struct timespec64 cur_ts;
60 u64 cur_ms, idle_ms; 60 u64 cur_ms, idle_ms;
61 61
62 ktime_get_ts(&cur_ts); 62 ktime_get_ts64(&cur_ts);
63 idle_ms = jiffies_to_msecs(jiffies - flow_jiffies); 63 idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
64 cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC + 64 cur_ms = (u64)(u32)cur_ts.tv_sec * MSEC_PER_SEC +
65 cur_ts.tv_nsec / NSEC_PER_MSEC; 65 cur_ts.tv_nsec / NSEC_PER_MSEC;
66 66
67 return cur_ms - idle_ms; 67 return cur_ms - idle_ms;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index f143908b651d..7322aa1e382e 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -49,6 +49,7 @@
49#include <net/mpls.h> 49#include <net/mpls.h>
50#include <net/vxlan.h> 50#include <net/vxlan.h>
51#include <net/tun_proto.h> 51#include <net/tun_proto.h>
52#include <net/erspan.h>
52 53
53#include "flow_netlink.h" 54#include "flow_netlink.h"
54 55
@@ -329,7 +330,8 @@ size_t ovs_tun_key_attr_size(void)
329 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ 330 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
330 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ 331 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
331 + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ 332 + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
332 /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with 333 /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and
334 * OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS is mutually exclusive with
333 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. 335 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
334 */ 336 */
335 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ 337 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
@@ -400,6 +402,7 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
400 .next = ovs_vxlan_ext_key_lens }, 402 .next = ovs_vxlan_ext_key_lens },
401 [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, 403 [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
402 [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, 404 [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) },
405 [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = OVS_ATTR_VARIABLE },
403}; 406};
404 407
405static const struct ovs_len_tbl 408static const struct ovs_len_tbl
@@ -631,6 +634,33 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
631 return 0; 634 return 0;
632} 635}
633 636
637static int erspan_tun_opt_from_nlattr(const struct nlattr *a,
638 struct sw_flow_match *match, bool is_mask,
639 bool log)
640{
641 unsigned long opt_key_offset;
642
643 BUILD_BUG_ON(sizeof(struct erspan_metadata) >
644 sizeof(match->key->tun_opts));
645
646 if (nla_len(a) > sizeof(match->key->tun_opts)) {
647 OVS_NLERR(log, "ERSPAN option length err (len %d, max %zu).",
648 nla_len(a), sizeof(match->key->tun_opts));
649 return -EINVAL;
650 }
651
652 if (!is_mask)
653 SW_FLOW_KEY_PUT(match, tun_opts_len,
654 sizeof(struct erspan_metadata), false);
655 else
656 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
657
658 opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
659 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
660 nla_len(a), is_mask);
661 return 0;
662}
663
634static int ip_tun_from_nlattr(const struct nlattr *attr, 664static int ip_tun_from_nlattr(const struct nlattr *attr,
635 struct sw_flow_match *match, bool is_mask, 665 struct sw_flow_match *match, bool is_mask,
636 bool log) 666 bool log)
@@ -738,6 +768,20 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
738 break; 768 break;
739 case OVS_TUNNEL_KEY_ATTR_PAD: 769 case OVS_TUNNEL_KEY_ATTR_PAD:
740 break; 770 break;
771 case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
772 if (opts_type) {
773 OVS_NLERR(log, "Multiple metadata blocks provided");
774 return -EINVAL;
775 }
776
777 err = erspan_tun_opt_from_nlattr(a, match, is_mask,
778 log);
779 if (err)
780 return err;
781
782 tun_flags |= TUNNEL_ERSPAN_OPT;
783 opts_type = type;
784 break;
741 default: 785 default:
742 OVS_NLERR(log, "Unknown IP tunnel attribute %d", 786 OVS_NLERR(log, "Unknown IP tunnel attribute %d",
743 type); 787 type);
@@ -862,6 +906,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
862 else if (output->tun_flags & TUNNEL_VXLAN_OPT && 906 else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
863 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) 907 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
864 return -EMSGSIZE; 908 return -EMSGSIZE;
909 else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
910 nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
911 swkey_tun_opts_len, tun_opts))
912 return -EMSGSIZE;
865 } 913 }
866 914
867 return 0; 915 return 0;
@@ -2454,7 +2502,7 @@ static int validate_geneve_opts(struct sw_flow_key *key)
2454 2502
2455 option = (struct geneve_opt *)((u8 *)option + len); 2503 option = (struct geneve_opt *)((u8 *)option + len);
2456 opts_len -= len; 2504 opts_len -= len;
2457 }; 2505 }
2458 2506
2459 key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; 2507 key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
2460 2508
@@ -2486,8 +2534,10 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
2486 break; 2534 break;
2487 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: 2535 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
2488 break; 2536 break;
2537 case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
2538 break;
2489 } 2539 }
2490 }; 2540 }
2491 2541
2492 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); 2542 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
2493 if (start < 0) 2543 if (start < 0)
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 3fbfc78991ac..04b94281a30b 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -488,7 +488,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
488 long long int max_bucket_size; 488 long long int max_bucket_size;
489 489
490 band = &meter->bands[i]; 490 band = &meter->bands[i];
491 max_bucket_size = (band->burst_size + band->rate) * 1000; 491 max_bucket_size = (band->burst_size + band->rate) * 1000LL;
492 492
493 band->bucket += delta_ms * band->rate; 493 band->bucket += delta_ms * band->rate;
494 if (band->bucket > max_bucket_size) 494 if (band->bucket > max_bucket_size)
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 04a3128adcf0..bb95c43aae76 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -16,7 +16,6 @@
16 * 02110-1301, USA 16 * 02110-1301, USA
17 */ 17 */
18 18
19#include <linux/hardirq.h>
20#include <linux/if_vlan.h> 19#include <linux/if_vlan.h>
21#include <linux/kernel.h> 20#include <linux/kernel.h>
22#include <linux/netdevice.h> 21#include <linux/netdevice.h>
@@ -126,18 +125,12 @@ internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
126 } 125 }
127} 126}
128 127
129static void internal_set_rx_headroom(struct net_device *dev, int new_hr)
130{
131 dev->needed_headroom = new_hr < 0 ? 0 : new_hr;
132}
133
134static const struct net_device_ops internal_dev_netdev_ops = { 128static const struct net_device_ops internal_dev_netdev_ops = {
135 .ndo_open = internal_dev_open, 129 .ndo_open = internal_dev_open,
136 .ndo_stop = internal_dev_stop, 130 .ndo_stop = internal_dev_stop,
137 .ndo_start_xmit = internal_dev_xmit, 131 .ndo_start_xmit = internal_dev_xmit,
138 .ndo_set_mac_address = eth_mac_addr, 132 .ndo_set_mac_address = eth_mac_addr,
139 .ndo_get_stats64 = internal_get_stats, 133 .ndo_get_stats64 = internal_get_stats,
140 .ndo_set_rx_headroom = internal_set_rx_headroom,
141}; 134};
142 135
143static struct rtnl_link_ops internal_dev_link_ops __read_mostly = { 136static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
@@ -154,7 +147,7 @@ static void do_setup(struct net_device *netdev)
154 147
155 netdev->priv_flags &= ~IFF_TX_SKB_SHARING; 148 netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
156 netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH | 149 netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
157 IFF_PHONY_HEADROOM | IFF_NO_QUEUE; 150 IFF_NO_QUEUE;
158 netdev->needs_free_netdev = true; 151 netdev->needs_free_netdev = true;
159 netdev->priv_destructor = internal_dev_destructor; 152 netdev->priv_destructor = internal_dev_destructor;
160 netdev->ethtool_ops = &internal_dev_ethtool_ops; 153 netdev->ethtool_ops = &internal_dev_ethtool_ops;
@@ -195,7 +188,6 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
195 err = -ENOMEM; 188 err = -ENOMEM;
196 goto error_free_netdev; 189 goto error_free_netdev;
197 } 190 }
198 vport->dev->needed_headroom = vport->dp->max_headroom;
199 191
200 dev_net_set(vport->dev, ovs_dp_get_net(vport->dp)); 192 dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
201 internal_dev = internal_dev_priv(vport->dev); 193 internal_dev = internal_dev_priv(vport->dev);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index da215e5c1399..e0f3f4aeeb4f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -247,12 +247,13 @@ static int packet_direct_xmit(struct sk_buff *skb)
247 struct sk_buff *orig_skb = skb; 247 struct sk_buff *orig_skb = skb;
248 struct netdev_queue *txq; 248 struct netdev_queue *txq;
249 int ret = NETDEV_TX_BUSY; 249 int ret = NETDEV_TX_BUSY;
250 bool again = false;
250 251
251 if (unlikely(!netif_running(dev) || 252 if (unlikely(!netif_running(dev) ||
252 !netif_carrier_ok(dev))) 253 !netif_carrier_ok(dev)))
253 goto drop; 254 goto drop;
254 255
255 skb = validate_xmit_skb_list(skb, dev); 256 skb = validate_xmit_skb_list(skb, dev, &again);
256 if (skb != orig_skb) 257 if (skb != orig_skb)
257 goto drop; 258 goto drop;
258 259
@@ -4073,18 +4074,18 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
4073 return 0; 4074 return 0;
4074} 4075}
4075 4076
4076static unsigned int packet_poll(struct file *file, struct socket *sock, 4077static __poll_t packet_poll(struct file *file, struct socket *sock,
4077 poll_table *wait) 4078 poll_table *wait)
4078{ 4079{
4079 struct sock *sk = sock->sk; 4080 struct sock *sk = sock->sk;
4080 struct packet_sock *po = pkt_sk(sk); 4081 struct packet_sock *po = pkt_sk(sk);
4081 unsigned int mask = datagram_poll(file, sock, wait); 4082 __poll_t mask = datagram_poll(file, sock, wait);
4082 4083
4083 spin_lock_bh(&sk->sk_receive_queue.lock); 4084 spin_lock_bh(&sk->sk_receive_queue.lock);
4084 if (po->rx_ring.pg_vec) { 4085 if (po->rx_ring.pg_vec) {
4085 if (!packet_previous_rx_frame(po, &po->rx_ring, 4086 if (!packet_previous_rx_frame(po, &po->rx_ring,
4086 TP_STATUS_KERNEL)) 4087 TP_STATUS_KERNEL))
4087 mask |= POLLIN | POLLRDNORM; 4088 mask |= EPOLLIN | EPOLLRDNORM;
4088 } 4089 }
4089 if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL) 4090 if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
4090 po->pressure = 0; 4091 po->pressure = 0;
@@ -4092,7 +4093,7 @@ static unsigned int packet_poll(struct file *file, struct socket *sock,
4092 spin_lock_bh(&sk->sk_write_queue.lock); 4093 spin_lock_bh(&sk->sk_write_queue.lock);
4093 if (po->tx_ring.pg_vec) { 4094 if (po->tx_ring.pg_vec) {
4094 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE)) 4095 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
4095 mask |= POLLOUT | POLLWRNORM; 4096 mask |= EPOLLOUT | EPOLLWRNORM;
4096 } 4097 }
4097 spin_unlock_bh(&sk->sk_write_queue.lock); 4098 spin_unlock_bh(&sk->sk_write_queue.lock);
4098 return mask; 4099 return mask;
@@ -4530,7 +4531,6 @@ static int packet_seq_open(struct inode *inode, struct file *file)
4530} 4531}
4531 4532
4532static const struct file_operations packet_seq_fops = { 4533static const struct file_operations packet_seq_fops = {
4533 .owner = THIS_MODULE,
4534 .open = packet_seq_open, 4534 .open = packet_seq_open,
4535 .read = seq_read, 4535 .read = seq_read,
4536 .llseek = seq_lseek, 4536 .llseek = seq_lseek,
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index da754fc926e7..871eaf2cb85e 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -299,16 +299,21 @@ out:
299 299
300int __init phonet_netlink_register(void) 300int __init phonet_netlink_register(void)
301{ 301{
302 int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, 302 int err = rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_NEWADDR,
303 NULL, 0); 303 addr_doit, NULL, 0);
304 if (err) 304 if (err)
305 return err; 305 return err;
306 306
307 /* Further __rtnl_register() cannot fail */ 307 /* Further rtnl_register_module() cannot fail */
308 __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, 0); 308 rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_DELADDR,
309 __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, 0); 309 addr_doit, NULL, 0);
310 __rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, 0); 310 rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_GETADDR,
311 __rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, 0); 311 NULL, getaddr_dumpit, 0);
312 __rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, 0); 312 rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_NEWROUTE,
313 route_doit, NULL, 0);
314 rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_DELROUTE,
315 route_doit, NULL, 0);
316 rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_GETROUTE,
317 NULL, route_dumpit, 0);
313 return 0; 318 return 0;
314} 319}
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 1b050dd17393..fffcd69f63ff 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -341,28 +341,28 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
341 return 0; 341 return 0;
342} 342}
343 343
344static unsigned int pn_socket_poll(struct file *file, struct socket *sock, 344static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
345 poll_table *wait) 345 poll_table *wait)
346{ 346{
347 struct sock *sk = sock->sk; 347 struct sock *sk = sock->sk;
348 struct pep_sock *pn = pep_sk(sk); 348 struct pep_sock *pn = pep_sk(sk);
349 unsigned int mask = 0; 349 __poll_t mask = 0;
350 350
351 poll_wait(file, sk_sleep(sk), wait); 351 poll_wait(file, sk_sleep(sk), wait);
352 352
353 if (sk->sk_state == TCP_CLOSE) 353 if (sk->sk_state == TCP_CLOSE)
354 return POLLERR; 354 return EPOLLERR;
355 if (!skb_queue_empty(&sk->sk_receive_queue)) 355 if (!skb_queue_empty(&sk->sk_receive_queue))
356 mask |= POLLIN | POLLRDNORM; 356 mask |= EPOLLIN | EPOLLRDNORM;
357 if (!skb_queue_empty(&pn->ctrlreq_queue)) 357 if (!skb_queue_empty(&pn->ctrlreq_queue))
358 mask |= POLLPRI; 358 mask |= EPOLLPRI;
359 if (!mask && sk->sk_state == TCP_CLOSE_WAIT) 359 if (!mask && sk->sk_state == TCP_CLOSE_WAIT)
360 return POLLHUP; 360 return EPOLLHUP;
361 361
362 if (sk->sk_state == TCP_ESTABLISHED && 362 if (sk->sk_state == TCP_ESTABLISHED &&
363 refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf && 363 refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf &&
364 atomic_read(&pn->tx_credits)) 364 atomic_read(&pn->tx_credits))
365 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 365 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
366 366
367 return mask; 367 return mask;
368} 368}
@@ -635,7 +635,6 @@ static int pn_sock_open(struct inode *inode, struct file *file)
635} 635}
636 636
637const struct file_operations pn_sock_seq_fops = { 637const struct file_operations pn_sock_seq_fops = {
638 .owner = THIS_MODULE,
639 .open = pn_sock_open, 638 .open = pn_sock_open,
640 .read = seq_read, 639 .read = seq_read,
641 .llseek = seq_lseek, 640 .llseek = seq_lseek,
@@ -818,7 +817,6 @@ static int pn_res_open(struct inode *inode, struct file *file)
818} 817}
819 818
820const struct file_operations pn_res_seq_fops = { 819const struct file_operations pn_res_seq_fops = {
821 .owner = THIS_MODULE,
822 .open = pn_res_open, 820 .open = pn_res_open,
823 .read = seq_read, 821 .read = seq_read,
824 .llseek = seq_lseek, 822 .llseek = seq_lseek,
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 77ab05e23001..5fb3929e3d7d 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -1116,9 +1116,13 @@ static int __init qrtr_proto_init(void)
1116 return rc; 1116 return rc;
1117 } 1117 }
1118 1118
1119 rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, 0); 1119 rc = rtnl_register_module(THIS_MODULE, PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, 0);
1120 if (rc) {
1121 sock_unregister(qrtr_family.family);
1122 proto_unregister(&qrtr_proto);
1123 }
1120 1124
1121 return 0; 1125 return rc;
1122} 1126}
1123postcore_initcall(qrtr_proto_init); 1127postcore_initcall(qrtr_proto_init);
1124 1128
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index b405f77d664c..744c637c86b0 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -137,27 +137,27 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
137 137
138/* 138/*
139 * RDS' poll is without a doubt the least intuitive part of the interface, 139 * RDS' poll is without a doubt the least intuitive part of the interface,
140 * as POLLIN and POLLOUT do not behave entirely as you would expect from 140 * as EPOLLIN and EPOLLOUT do not behave entirely as you would expect from
141 * a network protocol. 141 * a network protocol.
142 * 142 *
143 * POLLIN is asserted if 143 * EPOLLIN is asserted if
144 * - there is data on the receive queue. 144 * - there is data on the receive queue.
145 * - to signal that a previously congested destination may have become 145 * - to signal that a previously congested destination may have become
146 * uncongested 146 * uncongested
147 * - A notification has been queued to the socket (this can be a congestion 147 * - A notification has been queued to the socket (this can be a congestion
148 * update, or a RDMA completion). 148 * update, or a RDMA completion).
149 * 149 *
150 * POLLOUT is asserted if there is room on the send queue. This does not mean 150 * EPOLLOUT is asserted if there is room on the send queue. This does not mean
151 * however, that the next sendmsg() call will succeed. If the application tries 151 * however, that the next sendmsg() call will succeed. If the application tries
152 * to send to a congested destination, the system call may still fail (and 152 * to send to a congested destination, the system call may still fail (and
153 * return ENOBUFS). 153 * return ENOBUFS).
154 */ 154 */
155static unsigned int rds_poll(struct file *file, struct socket *sock, 155static __poll_t rds_poll(struct file *file, struct socket *sock,
156 poll_table *wait) 156 poll_table *wait)
157{ 157{
158 struct sock *sk = sock->sk; 158 struct sock *sk = sock->sk;
159 struct rds_sock *rs = rds_sk_to_rs(sk); 159 struct rds_sock *rs = rds_sk_to_rs(sk);
160 unsigned int mask = 0; 160 __poll_t mask = 0;
161 unsigned long flags; 161 unsigned long flags;
162 162
163 poll_wait(file, sk_sleep(sk), wait); 163 poll_wait(file, sk_sleep(sk), wait);
@@ -167,22 +167,22 @@ static unsigned int rds_poll(struct file *file, struct socket *sock,
167 167
168 read_lock_irqsave(&rs->rs_recv_lock, flags); 168 read_lock_irqsave(&rs->rs_recv_lock, flags);
169 if (!rs->rs_cong_monitor) { 169 if (!rs->rs_cong_monitor) {
170 /* When a congestion map was updated, we signal POLLIN for 170 /* When a congestion map was updated, we signal EPOLLIN for
171 * "historical" reasons. Applications can also poll for 171 * "historical" reasons. Applications can also poll for
172 * WRBAND instead. */ 172 * WRBAND instead. */
173 if (rds_cong_updated_since(&rs->rs_cong_track)) 173 if (rds_cong_updated_since(&rs->rs_cong_track))
174 mask |= (POLLIN | POLLRDNORM | POLLWRBAND); 174 mask |= (EPOLLIN | EPOLLRDNORM | EPOLLWRBAND);
175 } else { 175 } else {
176 spin_lock(&rs->rs_lock); 176 spin_lock(&rs->rs_lock);
177 if (rs->rs_cong_notify) 177 if (rs->rs_cong_notify)
178 mask |= (POLLIN | POLLRDNORM); 178 mask |= (EPOLLIN | EPOLLRDNORM);
179 spin_unlock(&rs->rs_lock); 179 spin_unlock(&rs->rs_lock);
180 } 180 }
181 if (!list_empty(&rs->rs_recv_queue) || 181 if (!list_empty(&rs->rs_recv_queue) ||
182 !list_empty(&rs->rs_notify_queue)) 182 !list_empty(&rs->rs_notify_queue))
183 mask |= (POLLIN | POLLRDNORM); 183 mask |= (EPOLLIN | EPOLLRDNORM);
184 if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) 184 if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
185 mask |= (POLLOUT | POLLWRNORM); 185 mask |= (EPOLLOUT | EPOLLWRNORM);
186 read_unlock_irqrestore(&rs->rs_recv_lock, flags); 186 read_unlock_irqrestore(&rs->rs_recv_lock, flags);
187 187
188 /* clear state any time we wake a seen-congested socket */ 188 /* clear state any time we wake a seen-congested socket */
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 75d43dc8e96b..5aa3a64aa4f0 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -114,6 +114,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
114 rs, &addr, (int)ntohs(*port)); 114 rs, &addr, (int)ntohs(*port));
115 break; 115 break;
116 } else { 116 } else {
117 rs->rs_bound_addr = 0;
117 rds_sock_put(rs); 118 rds_sock_put(rs);
118 ret = -ENOMEM; 119 ret = -ENOMEM;
119 break; 120 break;
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 8398fee7c866..63da9d2f142d 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -219,7 +219,11 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
219 spin_lock_irqsave(&rds_cong_lock, flags); 219 spin_lock_irqsave(&rds_cong_lock, flags);
220 220
221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) { 221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
222 if (!test_and_set_bit(0, &conn->c_map_queued)) { 222 struct rds_conn_path *cp = &conn->c_path[0];
223
224 rcu_read_lock();
225 if (!test_and_set_bit(0, &conn->c_map_queued) &&
226 !rds_destroy_pending(cp->cp_conn)) {
223 rds_stats_inc(s_cong_update_queued); 227 rds_stats_inc(s_cong_update_queued);
224 /* We cannot inline the call to rds_send_xmit() here 228 /* We cannot inline the call to rds_send_xmit() here
225 * for two reasons (both pertaining to a TCP transport): 229 * for two reasons (both pertaining to a TCP transport):
@@ -235,9 +239,9 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
235 * therefore trigger warnings. 239 * therefore trigger warnings.
236 * Defer the xmit to rds_send_worker() instead. 240 * Defer the xmit to rds_send_worker() instead.
237 */ 241 */
238 queue_delayed_work(rds_wq, 242 queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
239 &conn->c_path[0].cp_send_w, 0);
240 } 243 }
244 rcu_read_unlock();
241 } 245 }
242 246
243 spin_unlock_irqrestore(&rds_cong_lock, flags); 247 spin_unlock_irqrestore(&rds_cong_lock, flags);
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 7ee2d5d68b78..94e190febfdd 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -220,8 +220,13 @@ static struct rds_connection *__rds_conn_create(struct net *net,
220 is_outgoing); 220 is_outgoing);
221 conn->c_path[i].cp_index = i; 221 conn->c_path[i].cp_index = i;
222 } 222 }
223 ret = trans->conn_alloc(conn, gfp); 223 rcu_read_lock();
224 if (rds_destroy_pending(conn))
225 ret = -ENETDOWN;
226 else
227 ret = trans->conn_alloc(conn, gfp);
224 if (ret) { 228 if (ret) {
229 rcu_read_unlock();
225 kfree(conn->c_path); 230 kfree(conn->c_path);
226 kmem_cache_free(rds_conn_slab, conn); 231 kmem_cache_free(rds_conn_slab, conn);
227 conn = ERR_PTR(ret); 232 conn = ERR_PTR(ret);
@@ -230,8 +235,8 @@ static struct rds_connection *__rds_conn_create(struct net *net,
230 235
231 rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n", 236 rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n",
232 conn, &laddr, &faddr, 237 conn, &laddr, &faddr,
233 trans->t_name ? trans->t_name : "[unknown]", 238 strnlen(trans->t_name, sizeof(trans->t_name)) ? trans->t_name :
234 is_outgoing ? "(outgoing)" : ""); 239 "[unknown]", is_outgoing ? "(outgoing)" : "");
235 240
236 /* 241 /*
237 * Since we ran without holding the conn lock, someone could 242 * Since we ran without holding the conn lock, someone could
@@ -283,6 +288,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
283 } 288 }
284 } 289 }
285 spin_unlock_irqrestore(&rds_conn_lock, flags); 290 spin_unlock_irqrestore(&rds_conn_lock, flags);
291 rcu_read_unlock();
286 292
287out: 293out:
288 return conn; 294 return conn;
@@ -403,6 +409,11 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
403 if (cp->cp_xmit_rm) 409 if (cp->cp_xmit_rm)
404 rds_message_put(cp->cp_xmit_rm); 410 rds_message_put(cp->cp_xmit_rm);
405 411
412 WARN_ON(delayed_work_pending(&cp->cp_send_w));
413 WARN_ON(delayed_work_pending(&cp->cp_recv_w));
414 WARN_ON(delayed_work_pending(&cp->cp_conn_w));
415 WARN_ON(work_pending(&cp->cp_down_w));
416
406 cp->cp_conn->c_trans->conn_free(cp->cp_transport_data); 417 cp->cp_conn->c_trans->conn_free(cp->cp_transport_data);
407} 418}
408 419
@@ -424,7 +435,6 @@ void rds_conn_destroy(struct rds_connection *conn)
424 "%pI4\n", conn, &conn->c_laddr, 435 "%pI4\n", conn, &conn->c_laddr,
425 &conn->c_faddr); 436 &conn->c_faddr);
426 437
427 conn->c_destroy_in_prog = 1;
428 /* Ensure conn will not be scheduled for reconnect */ 438 /* Ensure conn will not be scheduled for reconnect */
429 spin_lock_irq(&rds_conn_lock); 439 spin_lock_irq(&rds_conn_lock);
430 hlist_del_init_rcu(&conn->c_hash_node); 440 hlist_del_init_rcu(&conn->c_hash_node);
@@ -445,7 +455,6 @@ void rds_conn_destroy(struct rds_connection *conn)
445 */ 455 */
446 rds_cong_remove_conn(conn); 456 rds_cong_remove_conn(conn);
447 457
448 put_net(conn->c_net);
449 kfree(conn->c_path); 458 kfree(conn->c_path);
450 kmem_cache_free(rds_conn_slab, conn); 459 kmem_cache_free(rds_conn_slab, conn);
451 460
@@ -684,10 +693,13 @@ void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
684{ 693{
685 atomic_set(&cp->cp_state, RDS_CONN_ERROR); 694 atomic_set(&cp->cp_state, RDS_CONN_ERROR);
686 695
687 if (!destroy && cp->cp_conn->c_destroy_in_prog) 696 rcu_read_lock();
697 if (!destroy && rds_destroy_pending(cp->cp_conn)) {
698 rcu_read_unlock();
688 return; 699 return;
689 700 }
690 queue_work(rds_wq, &cp->cp_down_w); 701 queue_work(rds_wq, &cp->cp_down_w);
702 rcu_read_unlock();
691} 703}
692EXPORT_SYMBOL_GPL(rds_conn_path_drop); 704EXPORT_SYMBOL_GPL(rds_conn_path_drop);
693 705
@@ -704,9 +716,15 @@ EXPORT_SYMBOL_GPL(rds_conn_drop);
704 */ 716 */
705void rds_conn_path_connect_if_down(struct rds_conn_path *cp) 717void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
706{ 718{
719 rcu_read_lock();
720 if (rds_destroy_pending(cp->cp_conn)) {
721 rcu_read_unlock();
722 return;
723 }
707 if (rds_conn_path_state(cp) == RDS_CONN_DOWN && 724 if (rds_conn_path_state(cp) == RDS_CONN_DOWN &&
708 !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags)) 725 !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags))
709 queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); 726 queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
727 rcu_read_unlock();
710} 728}
711EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down); 729EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down);
712 730
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 36dd2099048a..50a88f3e7e39 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -48,6 +48,7 @@
48static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE; 48static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE;
49static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE; 49static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE;
50unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT; 50unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
51static atomic_t rds_ib_unloading;
51 52
52module_param(rds_ib_mr_1m_pool_size, int, 0444); 53module_param(rds_ib_mr_1m_pool_size, int, 0444);
53MODULE_PARM_DESC(rds_ib_mr_1m_pool_size, " Max number of 1M mr per HCA"); 54MODULE_PARM_DESC(rds_ib_mr_1m_pool_size, " Max number of 1M mr per HCA");
@@ -301,13 +302,11 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
301 memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); 302 memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
302 if (rds_conn_state(conn) == RDS_CONN_UP) { 303 if (rds_conn_state(conn) == RDS_CONN_UP) {
303 struct rds_ib_device *rds_ibdev; 304 struct rds_ib_device *rds_ibdev;
304 struct rdma_dev_addr *dev_addr;
305 305
306 ic = conn->c_transport_data; 306 ic = conn->c_transport_data;
307 dev_addr = &ic->i_cm_id->route.addr.dev_addr;
308 307
309 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); 308 rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid,
310 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); 309 (union ib_gid *)&iinfo->dst_gid);
311 310
312 rds_ibdev = ic->rds_ibdev; 311 rds_ibdev = ic->rds_ibdev;
313 iinfo->max_send_wr = ic->i_send_ring.w_nr; 312 iinfo->max_send_wr = ic->i_send_ring.w_nr;
@@ -347,7 +346,8 @@ static int rds_ib_laddr_check(struct net *net, __be32 addr)
347 /* Create a CMA ID and try to bind it. This catches both 346 /* Create a CMA ID and try to bind it. This catches both
348 * IB and iWARP capable NICs. 347 * IB and iWARP capable NICs.
349 */ 348 */
350 cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); 349 cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler,
350 NULL, RDMA_PS_TCP, IB_QPT_RC);
351 if (IS_ERR(cm_id)) 351 if (IS_ERR(cm_id))
352 return PTR_ERR(cm_id); 352 return PTR_ERR(cm_id);
353 353
@@ -379,8 +379,23 @@ static void rds_ib_unregister_client(void)
379 flush_workqueue(rds_wq); 379 flush_workqueue(rds_wq);
380} 380}
381 381
382static void rds_ib_set_unloading(void)
383{
384 atomic_set(&rds_ib_unloading, 1);
385}
386
387static bool rds_ib_is_unloading(struct rds_connection *conn)
388{
389 struct rds_conn_path *cp = &conn->c_path[0];
390
391 return (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags) ||
392 atomic_read(&rds_ib_unloading) != 0);
393}
394
382void rds_ib_exit(void) 395void rds_ib_exit(void)
383{ 396{
397 rds_ib_set_unloading();
398 synchronize_rcu();
384 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); 399 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
385 rds_ib_unregister_client(); 400 rds_ib_unregister_client();
386 rds_ib_destroy_nodev_conns(); 401 rds_ib_destroy_nodev_conns();
@@ -414,6 +429,7 @@ struct rds_transport rds_ib_transport = {
414 .flush_mrs = rds_ib_flush_mrs, 429 .flush_mrs = rds_ib_flush_mrs,
415 .t_owner = THIS_MODULE, 430 .t_owner = THIS_MODULE,
416 .t_name = "infiniband", 431 .t_name = "infiniband",
432 .t_unloading = rds_ib_is_unloading,
417 .t_type = RDS_TRANS_IB 433 .t_type = RDS_TRANS_IB
418}; 434};
419 435
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 80fb6f63e768..eea1d8611b20 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -117,6 +117,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
117 &conn->c_laddr, &conn->c_faddr, 117 &conn->c_laddr, &conn->c_faddr,
118 RDS_PROTOCOL_MAJOR(conn->c_version), 118 RDS_PROTOCOL_MAJOR(conn->c_version),
119 RDS_PROTOCOL_MINOR(conn->c_version)); 119 RDS_PROTOCOL_MINOR(conn->c_version));
120 set_bit(RDS_DESTROY_PENDING, &conn->c_path[0].cp_flags);
120 rds_conn_destroy(conn); 121 rds_conn_destroy(conn);
121 return; 122 return;
122 } else { 123 } else {
diff --git a/net/rds/rds.h b/net/rds/rds.h
index c349c71babff..7301b9b01890 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -88,6 +88,7 @@ enum {
88#define RDS_RECONNECT_PENDING 1 88#define RDS_RECONNECT_PENDING 1
89#define RDS_IN_XMIT 2 89#define RDS_IN_XMIT 2
90#define RDS_RECV_REFILL 3 90#define RDS_RECV_REFILL 3
91#define RDS_DESTROY_PENDING 4
91 92
92/* Max number of multipaths per RDS connection. Must be a power of 2 */ 93/* Max number of multipaths per RDS connection. Must be a power of 2 */
93#define RDS_MPATH_WORKERS 8 94#define RDS_MPATH_WORKERS 8
@@ -139,8 +140,7 @@ struct rds_connection {
139 __be32 c_faddr; 140 __be32 c_faddr;
140 unsigned int c_loopback:1, 141 unsigned int c_loopback:1,
141 c_ping_triggered:1, 142 c_ping_triggered:1,
142 c_destroy_in_prog:1, 143 c_pad_to_32:30;
143 c_pad_to_32:29;
144 int c_npaths; 144 int c_npaths;
145 struct rds_connection *c_passive; 145 struct rds_connection *c_passive;
146 struct rds_transport *c_trans; 146 struct rds_transport *c_trans;
@@ -150,7 +150,7 @@ struct rds_connection {
150 150
151 /* Protocol version */ 151 /* Protocol version */
152 unsigned int c_version; 152 unsigned int c_version;
153 struct net *c_net; 153 possible_net_t c_net;
154 154
155 struct list_head c_map_item; 155 struct list_head c_map_item;
156 unsigned long c_map_queued; 156 unsigned long c_map_queued;
@@ -165,13 +165,13 @@ struct rds_connection {
165static inline 165static inline
166struct net *rds_conn_net(struct rds_connection *conn) 166struct net *rds_conn_net(struct rds_connection *conn)
167{ 167{
168 return conn->c_net; 168 return read_pnet(&conn->c_net);
169} 169}
170 170
171static inline 171static inline
172void rds_conn_net_set(struct rds_connection *conn, struct net *net) 172void rds_conn_net_set(struct rds_connection *conn, struct net *net)
173{ 173{
174 conn->c_net = get_net(net); 174 write_pnet(&conn->c_net, net);
175} 175}
176 176
177#define RDS_FLAG_CONG_BITMAP 0x01 177#define RDS_FLAG_CONG_BITMAP 0x01
@@ -518,6 +518,7 @@ struct rds_transport {
518 void (*sync_mr)(void *trans_private, int direction); 518 void (*sync_mr)(void *trans_private, int direction);
519 void (*free_mr)(void *trans_private, int invalidate); 519 void (*free_mr)(void *trans_private, int invalidate);
520 void (*flush_mrs)(void); 520 void (*flush_mrs)(void);
521 bool (*t_unloading)(struct rds_connection *conn);
521}; 522};
522 523
523struct rds_sock { 524struct rds_sock {
@@ -862,6 +863,12 @@ static inline void rds_mr_put(struct rds_mr *mr)
862 __rds_put_mr_final(mr); 863 __rds_put_mr_final(mr);
863} 864}
864 865
866static inline bool rds_destroy_pending(struct rds_connection *conn)
867{
868 return !check_net(rds_conn_net(conn)) ||
869 (conn->c_trans->t_unloading && conn->c_trans->t_unloading(conn));
870}
871
865/* stats.c */ 872/* stats.c */
866DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); 873DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
867#define rds_stats_inc_which(which, member) do { \ 874#define rds_stats_inc_which(which, member) do { \
diff --git a/net/rds/send.c b/net/rds/send.c
index f72466c63f0c..b1b0022b8370 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -162,6 +162,12 @@ restart:
162 goto out; 162 goto out;
163 } 163 }
164 164
165 if (rds_destroy_pending(cp->cp_conn)) {
166 release_in_xmit(cp);
167 ret = -ENETUNREACH; /* dont requeue send work */
168 goto out;
169 }
170
165 /* 171 /*
166 * we record the send generation after doing the xmit acquire. 172 * we record the send generation after doing the xmit acquire.
167 * if someone else manages to jump in and do some work, we'll use 173 * if someone else manages to jump in and do some work, we'll use
@@ -437,7 +443,12 @@ over_batch:
437 !list_empty(&cp->cp_send_queue)) && !raced) { 443 !list_empty(&cp->cp_send_queue)) && !raced) {
438 if (batch_count < send_batch_count) 444 if (batch_count < send_batch_count)
439 goto restart; 445 goto restart;
440 queue_delayed_work(rds_wq, &cp->cp_send_w, 1); 446 rcu_read_lock();
447 if (rds_destroy_pending(cp->cp_conn))
448 ret = -ENETUNREACH;
449 else
450 queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
451 rcu_read_unlock();
441 } else if (raced) { 452 } else if (raced) {
442 rds_stats_inc(s_send_lock_queue_raced); 453 rds_stats_inc(s_send_lock_queue_raced);
443 } 454 }
@@ -1151,6 +1162,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
1151 else 1162 else
1152 cpath = &conn->c_path[0]; 1163 cpath = &conn->c_path[0];
1153 1164
1165 if (rds_destroy_pending(conn)) {
1166 ret = -EAGAIN;
1167 goto out;
1168 }
1169
1154 rds_conn_path_connect_if_down(cpath); 1170 rds_conn_path_connect_if_down(cpath);
1155 1171
1156 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); 1172 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
@@ -1190,9 +1206,17 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
1190 rds_stats_inc(s_send_queued); 1206 rds_stats_inc(s_send_queued);
1191 1207
1192 ret = rds_send_xmit(cpath); 1208 ret = rds_send_xmit(cpath);
1193 if (ret == -ENOMEM || ret == -EAGAIN) 1209 if (ret == -ENOMEM || ret == -EAGAIN) {
1194 queue_delayed_work(rds_wq, &cpath->cp_send_w, 1); 1210 ret = 0;
1195 1211 rcu_read_lock();
1212 if (rds_destroy_pending(cpath->cp_conn))
1213 ret = -ENETUNREACH;
1214 else
1215 queue_delayed_work(rds_wq, &cpath->cp_send_w, 1);
1216 rcu_read_unlock();
1217 }
1218 if (ret)
1219 goto out;
1196 rds_message_put(rm); 1220 rds_message_put(rm);
1197 return payload_len; 1221 return payload_len;
1198 1222
@@ -1270,7 +1294,10 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport,
1270 rds_stats_inc(s_send_pong); 1294 rds_stats_inc(s_send_pong);
1271 1295
1272 /* schedule the send work on rds_wq */ 1296 /* schedule the send work on rds_wq */
1273 queue_delayed_work(rds_wq, &cp->cp_send_w, 1); 1297 rcu_read_lock();
1298 if (!rds_destroy_pending(cp->cp_conn))
1299 queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
1300 rcu_read_unlock();
1274 1301
1275 rds_message_put(rm); 1302 rds_message_put(rm);
1276 return 0; 1303 return 0;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index ab7356e0ba83..44c4652721af 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -49,6 +49,7 @@ static unsigned int rds_tcp_tc_count;
49/* Track rds_tcp_connection structs so they can be cleaned up */ 49/* Track rds_tcp_connection structs so they can be cleaned up */
50static DEFINE_SPINLOCK(rds_tcp_conn_lock); 50static DEFINE_SPINLOCK(rds_tcp_conn_lock);
51static LIST_HEAD(rds_tcp_conn_list); 51static LIST_HEAD(rds_tcp_conn_list);
52static atomic_t rds_tcp_unloading = ATOMIC_INIT(0);
52 53
53static struct kmem_cache *rds_tcp_conn_slab; 54static struct kmem_cache *rds_tcp_conn_slab;
54 55
@@ -271,16 +272,32 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)
271 return -EADDRNOTAVAIL; 272 return -EADDRNOTAVAIL;
272} 273}
273 274
275static void rds_tcp_conn_free(void *arg)
276{
277 struct rds_tcp_connection *tc = arg;
278
279 rdsdebug("freeing tc %p\n", tc);
280
281 spin_lock_bh(&rds_tcp_conn_lock);
282 if (!tc->t_tcp_node_detached)
283 list_del(&tc->t_tcp_node);
284 spin_unlock_bh(&rds_tcp_conn_lock);
285
286 kmem_cache_free(rds_tcp_conn_slab, tc);
287}
288
274static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) 289static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
275{ 290{
276 struct rds_tcp_connection *tc; 291 struct rds_tcp_connection *tc;
277 int i; 292 int i, j;
293 int ret = 0;
278 294
279 for (i = 0; i < RDS_MPATH_WORKERS; i++) { 295 for (i = 0; i < RDS_MPATH_WORKERS; i++) {
280 tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); 296 tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
281 if (!tc) 297 if (!tc) {
282 return -ENOMEM; 298 ret = -ENOMEM;
283 299 goto fail;
300 }
284 mutex_init(&tc->t_conn_path_lock); 301 mutex_init(&tc->t_conn_path_lock);
285 tc->t_sock = NULL; 302 tc->t_sock = NULL;
286 tc->t_tinc = NULL; 303 tc->t_tinc = NULL;
@@ -289,28 +306,24 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
289 306
290 conn->c_path[i].cp_transport_data = tc; 307 conn->c_path[i].cp_transport_data = tc;
291 tc->t_cpath = &conn->c_path[i]; 308 tc->t_cpath = &conn->c_path[i];
309 tc->t_tcp_node_detached = true;
292 310
293 spin_lock_irq(&rds_tcp_conn_lock);
294 list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
295 spin_unlock_irq(&rds_tcp_conn_lock);
296 rdsdebug("rds_conn_path [%d] tc %p\n", i, 311 rdsdebug("rds_conn_path [%d] tc %p\n", i,
297 conn->c_path[i].cp_transport_data); 312 conn->c_path[i].cp_transport_data);
298 } 313 }
299 314 spin_lock_bh(&rds_tcp_conn_lock);
300 return 0; 315 for (i = 0; i < RDS_MPATH_WORKERS; i++) {
301} 316 tc = conn->c_path[i].cp_transport_data;
302 317 tc->t_tcp_node_detached = false;
303static void rds_tcp_conn_free(void *arg) 318 list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
304{ 319 }
305 struct rds_tcp_connection *tc = arg; 320 spin_unlock_bh(&rds_tcp_conn_lock);
306 unsigned long flags; 321fail:
307 rdsdebug("freeing tc %p\n", tc); 322 if (ret) {
308 323 for (j = 0; j < i; j++)
309 spin_lock_irqsave(&rds_tcp_conn_lock, flags); 324 rds_tcp_conn_free(conn->c_path[j].cp_transport_data);
310 list_del(&tc->t_tcp_node); 325 }
311 spin_unlock_irqrestore(&rds_tcp_conn_lock, flags); 326 return ret;
312
313 kmem_cache_free(rds_tcp_conn_slab, tc);
314} 327}
315 328
316static bool list_has_conn(struct list_head *list, struct rds_connection *conn) 329static bool list_has_conn(struct list_head *list, struct rds_connection *conn)
@@ -324,6 +337,16 @@ static bool list_has_conn(struct list_head *list, struct rds_connection *conn)
324 return false; 337 return false;
325} 338}
326 339
340static void rds_tcp_set_unloading(void)
341{
342 atomic_set(&rds_tcp_unloading, 1);
343}
344
345static bool rds_tcp_is_unloading(struct rds_connection *conn)
346{
347 return atomic_read(&rds_tcp_unloading) != 0;
348}
349
327static void rds_tcp_destroy_conns(void) 350static void rds_tcp_destroy_conns(void)
328{ 351{
329 struct rds_tcp_connection *tc, *_tc; 352 struct rds_tcp_connection *tc, *_tc;
@@ -362,6 +385,7 @@ struct rds_transport rds_tcp_transport = {
362 .t_type = RDS_TRANS_TCP, 385 .t_type = RDS_TRANS_TCP,
363 .t_prefer_loopback = 1, 386 .t_prefer_loopback = 1,
364 .t_mp_capable = 1, 387 .t_mp_capable = 1,
388 .t_unloading = rds_tcp_is_unloading,
365}; 389};
366 390
367static unsigned int rds_tcp_netid; 391static unsigned int rds_tcp_netid;
@@ -496,27 +520,6 @@ static struct pernet_operations rds_tcp_net_ops = {
496 .size = sizeof(struct rds_tcp_net), 520 .size = sizeof(struct rds_tcp_net),
497}; 521};
498 522
499/* explicitly send a RST on each socket, thereby releasing any socket refcnts
500 * that may otherwise hold up netns deletion.
501 */
502static void rds_tcp_conn_paths_destroy(struct rds_connection *conn)
503{
504 struct rds_conn_path *cp;
505 struct rds_tcp_connection *tc;
506 int i;
507 struct sock *sk;
508
509 for (i = 0; i < RDS_MPATH_WORKERS; i++) {
510 cp = &conn->c_path[i];
511 tc = cp->cp_transport_data;
512 if (!tc->t_sock)
513 continue;
514 sk = tc->t_sock->sk;
515 sk->sk_prot->disconnect(sk, 0);
516 tcp_done(sk);
517 }
518}
519
520static void rds_tcp_kill_sock(struct net *net) 523static void rds_tcp_kill_sock(struct net *net)
521{ 524{
522 struct rds_tcp_connection *tc, *_tc; 525 struct rds_tcp_connection *tc, *_tc;
@@ -526,20 +529,22 @@ static void rds_tcp_kill_sock(struct net *net)
526 529
527 rtn->rds_tcp_listen_sock = NULL; 530 rtn->rds_tcp_listen_sock = NULL;
528 rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); 531 rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
529 spin_lock_irq(&rds_tcp_conn_lock); 532 spin_lock_bh(&rds_tcp_conn_lock);
530 list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { 533 list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
531 struct net *c_net = tc->t_cpath->cp_conn->c_net; 534 struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
532 535
533 if (net != c_net || !tc->t_sock) 536 if (net != c_net || !tc->t_sock)
534 continue; 537 continue;
535 if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) 538 if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) {
536 list_move_tail(&tc->t_tcp_node, &tmp_list); 539 list_move_tail(&tc->t_tcp_node, &tmp_list);
540 } else {
541 list_del(&tc->t_tcp_node);
542 tc->t_tcp_node_detached = true;
543 }
537 } 544 }
538 spin_unlock_irq(&rds_tcp_conn_lock); 545 spin_unlock_bh(&rds_tcp_conn_lock);
539 list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { 546 list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
540 rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn);
541 rds_conn_destroy(tc->t_cpath->cp_conn); 547 rds_conn_destroy(tc->t_cpath->cp_conn);
542 }
543} 548}
544 549
545void *rds_tcp_listen_sock_def_readable(struct net *net) 550void *rds_tcp_listen_sock_def_readable(struct net *net)
@@ -585,9 +590,9 @@ static void rds_tcp_sysctl_reset(struct net *net)
585{ 590{
586 struct rds_tcp_connection *tc, *_tc; 591 struct rds_tcp_connection *tc, *_tc;
587 592
588 spin_lock_irq(&rds_tcp_conn_lock); 593 spin_lock_bh(&rds_tcp_conn_lock);
589 list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { 594 list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
590 struct net *c_net = tc->t_cpath->cp_conn->c_net; 595 struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
591 596
592 if (net != c_net || !tc->t_sock) 597 if (net != c_net || !tc->t_sock)
593 continue; 598 continue;
@@ -595,7 +600,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
595 /* reconnect with new parameters */ 600 /* reconnect with new parameters */
596 rds_conn_path_drop(tc->t_cpath, false); 601 rds_conn_path_drop(tc->t_cpath, false);
597 } 602 }
598 spin_unlock_irq(&rds_tcp_conn_lock); 603 spin_unlock_bh(&rds_tcp_conn_lock);
599} 604}
600 605
601static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write, 606static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
@@ -618,6 +623,8 @@ static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
618 623
619static void rds_tcp_exit(void) 624static void rds_tcp_exit(void)
620{ 625{
626 rds_tcp_set_unloading();
627 synchronize_rcu();
621 rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); 628 rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
622 unregister_pernet_subsys(&rds_tcp_net_ops); 629 unregister_pernet_subsys(&rds_tcp_net_ops);
623 if (unregister_netdevice_notifier(&rds_tcp_dev_notifier)) 630 if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 864ca7d8f019..c6fa080e9b6d 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -12,6 +12,7 @@ struct rds_tcp_incoming {
12struct rds_tcp_connection { 12struct rds_tcp_connection {
13 13
14 struct list_head t_tcp_node; 14 struct list_head t_tcp_node;
15 bool t_tcp_node_detached;
15 struct rds_conn_path *t_cpath; 16 struct rds_conn_path *t_cpath;
16 /* t_conn_path_lock synchronizes the connection establishment between 17 /* t_conn_path_lock synchronizes the connection establishment between
17 * rds_tcp_accept_one and rds_tcp_conn_path_connect 18 * rds_tcp_accept_one and rds_tcp_conn_path_connect
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 46f74dad0e16..d999e7075645 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -170,7 +170,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp)
170 cp->cp_conn, tc, sock); 170 cp->cp_conn, tc, sock);
171 171
172 if (sock) { 172 if (sock) {
173 if (cp->cp_conn->c_destroy_in_prog) 173 if (rds_destroy_pending(cp->cp_conn))
174 rds_tcp_set_linger(sock); 174 rds_tcp_set_linger(sock);
175 sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN); 175 sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN);
176 lock_sock(sock->sk); 176 lock_sock(sock->sk);
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index e006ef8e6d40..b9fbd2ee74ef 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -321,8 +321,12 @@ void rds_tcp_data_ready(struct sock *sk)
321 ready = tc->t_orig_data_ready; 321 ready = tc->t_orig_data_ready;
322 rds_tcp_stats_inc(s_tcp_data_ready_calls); 322 rds_tcp_stats_inc(s_tcp_data_ready_calls);
323 323
324 if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) 324 if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) {
325 queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); 325 rcu_read_lock();
326 if (!rds_destroy_pending(cp->cp_conn))
327 queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
328 rcu_read_unlock();
329 }
326out: 330out:
327 read_unlock_bh(&sk->sk_callback_lock); 331 read_unlock_bh(&sk->sk_callback_lock);
328 ready(sk); 332 ready(sk);
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 9b76e0fa1722..7df869d37afd 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -202,8 +202,11 @@ void rds_tcp_write_space(struct sock *sk)
202 tc->t_last_seen_una = rds_tcp_snd_una(tc); 202 tc->t_last_seen_una = rds_tcp_snd_una(tc);
203 rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked); 203 rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked);
204 204
205 if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) 205 rcu_read_lock();
206 if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf &&
207 !rds_destroy_pending(cp->cp_conn))
206 queue_delayed_work(rds_wq, &cp->cp_send_w, 0); 208 queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
209 rcu_read_unlock();
207 210
208out: 211out:
209 read_unlock_bh(&sk->sk_callback_lock); 212 read_unlock_bh(&sk->sk_callback_lock);
diff --git a/net/rds/threads.c b/net/rds/threads.c
index f121daa402c8..c52861d77a59 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -87,8 +87,12 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
87 87
88 cp->cp_reconnect_jiffies = 0; 88 cp->cp_reconnect_jiffies = 0;
89 set_bit(0, &cp->cp_conn->c_map_queued); 89 set_bit(0, &cp->cp_conn->c_map_queued);
90 queue_delayed_work(rds_wq, &cp->cp_send_w, 0); 90 rcu_read_lock();
91 queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); 91 if (!rds_destroy_pending(cp->cp_conn)) {
92 queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
93 queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
94 }
95 rcu_read_unlock();
92} 96}
93EXPORT_SYMBOL_GPL(rds_connect_path_complete); 97EXPORT_SYMBOL_GPL(rds_connect_path_complete);
94 98
@@ -133,7 +137,10 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
133 set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); 137 set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
134 if (cp->cp_reconnect_jiffies == 0) { 138 if (cp->cp_reconnect_jiffies == 0) {
135 cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; 139 cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
136 queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); 140 rcu_read_lock();
141 if (!rds_destroy_pending(cp->cp_conn))
142 queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
143 rcu_read_unlock();
137 return; 144 return;
138 } 145 }
139 146
@@ -141,8 +148,11 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
141 rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n", 148 rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n",
142 rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies, 149 rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
143 conn, &conn->c_laddr, &conn->c_faddr); 150 conn, &conn->c_laddr, &conn->c_faddr);
144 queue_delayed_work(rds_wq, &cp->cp_conn_w, 151 rcu_read_lock();
145 rand % cp->cp_reconnect_jiffies); 152 if (!rds_destroy_pending(cp->cp_conn))
153 queue_delayed_work(rds_wq, &cp->cp_conn_w,
154 rand % cp->cp_reconnect_jiffies);
155 rcu_read_unlock();
146 156
147 cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2, 157 cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2,
148 rds_sysctl_reconnect_max_jiffies); 158 rds_sysctl_reconnect_max_jiffies);
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 2064c3a35ef8..59d0eb960275 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1139,16 +1139,16 @@ static int rfkill_fop_open(struct inode *inode, struct file *file)
1139 return -ENOMEM; 1139 return -ENOMEM;
1140} 1140}
1141 1141
1142static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait) 1142static __poll_t rfkill_fop_poll(struct file *file, poll_table *wait)
1143{ 1143{
1144 struct rfkill_data *data = file->private_data; 1144 struct rfkill_data *data = file->private_data;
1145 unsigned int res = POLLOUT | POLLWRNORM; 1145 __poll_t res = EPOLLOUT | EPOLLWRNORM;
1146 1146
1147 poll_wait(file, &data->read_wait, wait); 1147 poll_wait(file, &data->read_wait, wait);
1148 1148
1149 mutex_lock(&data->mtx); 1149 mutex_lock(&data->mtx);
1150 if (!list_empty(&data->events)) 1150 if (!list_empty(&data->events))
1151 res = POLLIN | POLLRDNORM; 1151 res = EPOLLIN | EPOLLRDNORM;
1152 mutex_unlock(&data->mtx); 1152 mutex_unlock(&data->mtx);
1153 1153
1154 return res; 1154 return res;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 6a5c4992cf61..083bd251406f 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1461,7 +1461,6 @@ static int rose_info_open(struct inode *inode, struct file *file)
1461} 1461}
1462 1462
1463static const struct file_operations rose_info_fops = { 1463static const struct file_operations rose_info_fops = {
1464 .owner = THIS_MODULE,
1465 .open = rose_info_open, 1464 .open = rose_info_open,
1466 .read = seq_read, 1465 .read = seq_read,
1467 .llseek = seq_lseek, 1466 .llseek = seq_lseek,
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 8ca3124df83f..178619ddab68 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -1156,7 +1156,6 @@ static int rose_nodes_open(struct inode *inode, struct file *file)
1156} 1156}
1157 1157
1158const struct file_operations rose_nodes_fops = { 1158const struct file_operations rose_nodes_fops = {
1159 .owner = THIS_MODULE,
1160 .open = rose_nodes_open, 1159 .open = rose_nodes_open,
1161 .read = seq_read, 1160 .read = seq_read,
1162 .llseek = seq_lseek, 1161 .llseek = seq_lseek,
@@ -1240,7 +1239,6 @@ static int rose_neigh_open(struct inode *inode, struct file *file)
1240} 1239}
1241 1240
1242const struct file_operations rose_neigh_fops = { 1241const struct file_operations rose_neigh_fops = {
1243 .owner = THIS_MODULE,
1244 .open = rose_neigh_open, 1242 .open = rose_neigh_open,
1245 .read = seq_read, 1243 .read = seq_read,
1246 .llseek = seq_lseek, 1244 .llseek = seq_lseek,
@@ -1326,7 +1324,6 @@ static int rose_route_open(struct inode *inode, struct file *file)
1326} 1324}
1327 1325
1328const struct file_operations rose_routes_fops = { 1326const struct file_operations rose_routes_fops = {
1329 .owner = THIS_MODULE,
1330 .open = rose_route_open, 1327 .open = rose_route_open,
1331 .read = seq_read, 1328 .read = seq_read,
1332 .llseek = seq_lseek, 1329 .llseek = seq_lseek,
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index dcd818fa837e..0c9c18aa7c77 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -729,12 +729,12 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname,
729/* 729/*
730 * permit an RxRPC socket to be polled 730 * permit an RxRPC socket to be polled
731 */ 731 */
732static unsigned int rxrpc_poll(struct file *file, struct socket *sock, 732static __poll_t rxrpc_poll(struct file *file, struct socket *sock,
733 poll_table *wait) 733 poll_table *wait)
734{ 734{
735 struct sock *sk = sock->sk; 735 struct sock *sk = sock->sk;
736 struct rxrpc_sock *rx = rxrpc_sk(sk); 736 struct rxrpc_sock *rx = rxrpc_sk(sk);
737 unsigned int mask; 737 __poll_t mask;
738 738
739 sock_poll_wait(file, sk_sleep(sk), wait); 739 sock_poll_wait(file, sk_sleep(sk), wait);
740 mask = 0; 740 mask = 0;
@@ -742,13 +742,13 @@ static unsigned int rxrpc_poll(struct file *file, struct socket *sock,
742 /* the socket is readable if there are any messages waiting on the Rx 742 /* the socket is readable if there are any messages waiting on the Rx
743 * queue */ 743 * queue */
744 if (!list_empty(&rx->recvmsg_q)) 744 if (!list_empty(&rx->recvmsg_q))
745 mask |= POLLIN | POLLRDNORM; 745 mask |= EPOLLIN | EPOLLRDNORM;
746 746
747 /* the socket is writable if there is space to add new data to the 747 /* the socket is writable if there is space to add new data to the
748 * socket; there is no guarantee that any particular call in progress 748 * socket; there is no guarantee that any particular call in progress
749 * on the socket may have space in the Tx ACK window */ 749 * on the socket may have space in the Tx ACK window */
750 if (rxrpc_writable(sk)) 750 if (rxrpc_writable(sk))
751 mask |= POLLOUT | POLLWRNORM; 751 mask |= EPOLLOUT | EPOLLWRNORM;
752 752
753 return mask; 753 return mask;
754} 754}
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 7f74ca3059f8..064175068059 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -834,7 +834,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
834 * can be skipped if we find a follow-on call. The first DATA packet 834 * can be skipped if we find a follow-on call. The first DATA packet
835 * of the follow on call will implicitly ACK this call. 835 * of the follow on call will implicitly ACK this call.
836 */ 836 */
837 if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { 837 if (call->completion == RXRPC_CALL_SUCCEEDED &&
838 test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
838 unsigned long final_ack_at = jiffies + 2; 839 unsigned long final_ack_at = jiffies + 2;
839 840
840 WRITE_ONCE(chan->final_ack_at, final_ack_at); 841 WRITE_ONCE(chan->final_ack_at, final_ack_at);
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 4ca11be6be3c..b1dfae107431 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -460,6 +460,7 @@ void rxrpc_process_connection(struct work_struct *work)
460 case -EKEYEXPIRED: 460 case -EKEYEXPIRED:
461 case -EKEYREJECTED: 461 case -EKEYREJECTED:
462 goto protocol_error; 462 goto protocol_error;
463 case -ENOMEM:
463 case -EAGAIN: 464 case -EAGAIN:
464 goto requeue_and_leave; 465 goto requeue_and_leave;
465 case -ECONNABORTED: 466 case -ECONNABORTED:
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index c628351eb900..ccbac190add1 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -177,13 +177,21 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
177 * through the channel, whilst disposing of the actual call record. 177 * through the channel, whilst disposing of the actual call record.
178 */ 178 */
179 trace_rxrpc_disconnect_call(call); 179 trace_rxrpc_disconnect_call(call);
180 if (call->abort_code) { 180 switch (call->completion) {
181 chan->last_abort = call->abort_code; 181 case RXRPC_CALL_SUCCEEDED:
182 chan->last_type = RXRPC_PACKET_TYPE_ABORT;
183 } else {
184 chan->last_seq = call->rx_hard_ack; 182 chan->last_seq = call->rx_hard_ack;
185 chan->last_type = RXRPC_PACKET_TYPE_ACK; 183 chan->last_type = RXRPC_PACKET_TYPE_ACK;
184 break;
185 case RXRPC_CALL_LOCALLY_ABORTED:
186 chan->last_abort = call->abort_code;
187 chan->last_type = RXRPC_PACKET_TYPE_ABORT;
188 break;
189 default:
190 chan->last_abort = RX_USER_ABORT;
191 chan->last_type = RXRPC_PACKET_TYPE_ABORT;
192 break;
186 } 193 }
194
187 /* Sync with rxrpc_conn_retransmit(). */ 195 /* Sync with rxrpc_conn_retransmit(). */
188 smp_wmb(); 196 smp_wmb();
189 chan->last_call = chan->call_id; 197 chan->last_call = chan->call_id;
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 7421656963a9..f79f260c6ddc 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -125,7 +125,6 @@ static int rxrpc_call_seq_open(struct inode *inode, struct file *file)
125} 125}
126 126
127const struct file_operations rxrpc_call_seq_fops = { 127const struct file_operations rxrpc_call_seq_fops = {
128 .owner = THIS_MODULE,
129 .open = rxrpc_call_seq_open, 128 .open = rxrpc_call_seq_open,
130 .read = seq_read, 129 .read = seq_read,
131 .llseek = seq_lseek, 130 .llseek = seq_lseek,
@@ -217,7 +216,6 @@ static int rxrpc_connection_seq_open(struct inode *inode, struct file *file)
217} 216}
218 217
219const struct file_operations rxrpc_connection_seq_fops = { 218const struct file_operations rxrpc_connection_seq_fops = {
220 .owner = THIS_MODULE,
221 .open = rxrpc_connection_seq_open, 219 .open = rxrpc_connection_seq_open,
222 .read = seq_read, 220 .read = seq_read,
223 .llseek = seq_lseek, 221 .llseek = seq_lseek,
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index c38b3a1de56c..77cb23c7bd0a 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -773,8 +773,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
773{ 773{
774 const struct rxrpc_key_token *token; 774 const struct rxrpc_key_token *token;
775 struct rxkad_challenge challenge; 775 struct rxkad_challenge challenge;
776 struct rxkad_response resp 776 struct rxkad_response *resp;
777 __attribute__((aligned(8))); /* must be aligned for crypto */
778 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 777 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
779 const char *eproto; 778 const char *eproto;
780 u32 version, nonce, min_level, abort_code; 779 u32 version, nonce, min_level, abort_code;
@@ -818,26 +817,29 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
818 token = conn->params.key->payload.data[0]; 817 token = conn->params.key->payload.data[0];
819 818
820 /* build the response packet */ 819 /* build the response packet */
821 memset(&resp, 0, sizeof(resp)); 820 resp = kzalloc(sizeof(struct rxkad_response), GFP_NOFS);
822 821 if (!resp)
823 resp.version = htonl(RXKAD_VERSION); 822 return -ENOMEM;
824 resp.encrypted.epoch = htonl(conn->proto.epoch); 823
825 resp.encrypted.cid = htonl(conn->proto.cid); 824 resp->version = htonl(RXKAD_VERSION);
826 resp.encrypted.securityIndex = htonl(conn->security_ix); 825 resp->encrypted.epoch = htonl(conn->proto.epoch);
827 resp.encrypted.inc_nonce = htonl(nonce + 1); 826 resp->encrypted.cid = htonl(conn->proto.cid);
828 resp.encrypted.level = htonl(conn->params.security_level); 827 resp->encrypted.securityIndex = htonl(conn->security_ix);
829 resp.kvno = htonl(token->kad->kvno); 828 resp->encrypted.inc_nonce = htonl(nonce + 1);
830 resp.ticket_len = htonl(token->kad->ticket_len); 829 resp->encrypted.level = htonl(conn->params.security_level);
831 830 resp->kvno = htonl(token->kad->kvno);
832 resp.encrypted.call_id[0] = htonl(conn->channels[0].call_counter); 831 resp->ticket_len = htonl(token->kad->ticket_len);
833 resp.encrypted.call_id[1] = htonl(conn->channels[1].call_counter); 832 resp->encrypted.call_id[0] = htonl(conn->channels[0].call_counter);
834 resp.encrypted.call_id[2] = htonl(conn->channels[2].call_counter); 833 resp->encrypted.call_id[1] = htonl(conn->channels[1].call_counter);
835 resp.encrypted.call_id[3] = htonl(conn->channels[3].call_counter); 834 resp->encrypted.call_id[2] = htonl(conn->channels[2].call_counter);
835 resp->encrypted.call_id[3] = htonl(conn->channels[3].call_counter);
836 836
837 /* calculate the response checksum and then do the encryption */ 837 /* calculate the response checksum and then do the encryption */
838 rxkad_calc_response_checksum(&resp); 838 rxkad_calc_response_checksum(resp);
839 rxkad_encrypt_response(conn, &resp, token->kad); 839 rxkad_encrypt_response(conn, resp, token->kad);
840 return rxkad_send_response(conn, &sp->hdr, &resp, token->kad); 840 ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad);
841 kfree(resp);
842 return ret;
841 843
842protocol_error: 844protocol_error:
843 trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); 845 trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
@@ -1048,8 +1050,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
1048 struct sk_buff *skb, 1050 struct sk_buff *skb,
1049 u32 *_abort_code) 1051 u32 *_abort_code)
1050{ 1052{
1051 struct rxkad_response response 1053 struct rxkad_response *response;
1052 __attribute__((aligned(8))); /* must be aligned for crypto */
1053 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 1054 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
1054 struct rxrpc_crypt session_key; 1055 struct rxrpc_crypt session_key;
1055 const char *eproto; 1056 const char *eproto;
@@ -1061,17 +1062,22 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
1061 1062
1062 _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); 1063 _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
1063 1064
1065 ret = -ENOMEM;
1066 response = kzalloc(sizeof(struct rxkad_response), GFP_NOFS);
1067 if (!response)
1068 goto temporary_error;
1069
1064 eproto = tracepoint_string("rxkad_rsp_short"); 1070 eproto = tracepoint_string("rxkad_rsp_short");
1065 abort_code = RXKADPACKETSHORT; 1071 abort_code = RXKADPACKETSHORT;
1066 if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), 1072 if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
1067 &response, sizeof(response)) < 0) 1073 response, sizeof(*response)) < 0)
1068 goto protocol_error; 1074 goto protocol_error;
1069 if (!pskb_pull(skb, sizeof(response))) 1075 if (!pskb_pull(skb, sizeof(*response)))
1070 BUG(); 1076 BUG();
1071 1077
1072 version = ntohl(response.version); 1078 version = ntohl(response->version);
1073 ticket_len = ntohl(response.ticket_len); 1079 ticket_len = ntohl(response->ticket_len);
1074 kvno = ntohl(response.kvno); 1080 kvno = ntohl(response->kvno);
1075 _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }", 1081 _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }",
1076 sp->hdr.serial, version, kvno, ticket_len); 1082 sp->hdr.serial, version, kvno, ticket_len);
1077 1083
@@ -1105,31 +1111,31 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
1105 ret = rxkad_decrypt_ticket(conn, skb, ticket, ticket_len, &session_key, 1111 ret = rxkad_decrypt_ticket(conn, skb, ticket, ticket_len, &session_key,
1106 &expiry, _abort_code); 1112 &expiry, _abort_code);
1107 if (ret < 0) 1113 if (ret < 0)
1108 goto temporary_error_free; 1114 goto temporary_error_free_resp;
1109 1115
1110 /* use the session key from inside the ticket to decrypt the 1116 /* use the session key from inside the ticket to decrypt the
1111 * response */ 1117 * response */
1112 rxkad_decrypt_response(conn, &response, &session_key); 1118 rxkad_decrypt_response(conn, response, &session_key);
1113 1119
1114 eproto = tracepoint_string("rxkad_rsp_param"); 1120 eproto = tracepoint_string("rxkad_rsp_param");
1115 abort_code = RXKADSEALEDINCON; 1121 abort_code = RXKADSEALEDINCON;
1116 if (ntohl(response.encrypted.epoch) != conn->proto.epoch) 1122 if (ntohl(response->encrypted.epoch) != conn->proto.epoch)
1117 goto protocol_error_free; 1123 goto protocol_error_free;
1118 if (ntohl(response.encrypted.cid) != conn->proto.cid) 1124 if (ntohl(response->encrypted.cid) != conn->proto.cid)
1119 goto protocol_error_free; 1125 goto protocol_error_free;
1120 if (ntohl(response.encrypted.securityIndex) != conn->security_ix) 1126 if (ntohl(response->encrypted.securityIndex) != conn->security_ix)
1121 goto protocol_error_free; 1127 goto protocol_error_free;
1122 csum = response.encrypted.checksum; 1128 csum = response->encrypted.checksum;
1123 response.encrypted.checksum = 0; 1129 response->encrypted.checksum = 0;
1124 rxkad_calc_response_checksum(&response); 1130 rxkad_calc_response_checksum(response);
1125 eproto = tracepoint_string("rxkad_rsp_csum"); 1131 eproto = tracepoint_string("rxkad_rsp_csum");
1126 if (response.encrypted.checksum != csum) 1132 if (response->encrypted.checksum != csum)
1127 goto protocol_error_free; 1133 goto protocol_error_free;
1128 1134
1129 spin_lock(&conn->channel_lock); 1135 spin_lock(&conn->channel_lock);
1130 for (i = 0; i < RXRPC_MAXCALLS; i++) { 1136 for (i = 0; i < RXRPC_MAXCALLS; i++) {
1131 struct rxrpc_call *call; 1137 struct rxrpc_call *call;
1132 u32 call_id = ntohl(response.encrypted.call_id[i]); 1138 u32 call_id = ntohl(response->encrypted.call_id[i]);
1133 1139
1134 eproto = tracepoint_string("rxkad_rsp_callid"); 1140 eproto = tracepoint_string("rxkad_rsp_callid");
1135 if (call_id > INT_MAX) 1141 if (call_id > INT_MAX)
@@ -1153,12 +1159,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
1153 1159
1154 eproto = tracepoint_string("rxkad_rsp_seq"); 1160 eproto = tracepoint_string("rxkad_rsp_seq");
1155 abort_code = RXKADOUTOFSEQUENCE; 1161 abort_code = RXKADOUTOFSEQUENCE;
1156 if (ntohl(response.encrypted.inc_nonce) != conn->security_nonce + 1) 1162 if (ntohl(response->encrypted.inc_nonce) != conn->security_nonce + 1)
1157 goto protocol_error_free; 1163 goto protocol_error_free;
1158 1164
1159 eproto = tracepoint_string("rxkad_rsp_level"); 1165 eproto = tracepoint_string("rxkad_rsp_level");
1160 abort_code = RXKADLEVELFAIL; 1166 abort_code = RXKADLEVELFAIL;
1161 level = ntohl(response.encrypted.level); 1167 level = ntohl(response->encrypted.level);
1162 if (level > RXRPC_SECURITY_ENCRYPT) 1168 if (level > RXRPC_SECURITY_ENCRYPT)
1163 goto protocol_error_free; 1169 goto protocol_error_free;
1164 conn->params.security_level = level; 1170 conn->params.security_level = level;
@@ -1168,9 +1174,10 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
1168 * as for a client connection */ 1174 * as for a client connection */
1169 ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno); 1175 ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno);
1170 if (ret < 0) 1176 if (ret < 0)
1171 goto temporary_error_free; 1177 goto temporary_error_free_ticket;
1172 1178
1173 kfree(ticket); 1179 kfree(ticket);
1180 kfree(response);
1174 _leave(" = 0"); 1181 _leave(" = 0");
1175 return 0; 1182 return 0;
1176 1183
@@ -1179,12 +1186,15 @@ protocol_error_unlock:
1179protocol_error_free: 1186protocol_error_free:
1180 kfree(ticket); 1187 kfree(ticket);
1181protocol_error: 1188protocol_error:
1189 kfree(response);
1182 trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); 1190 trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
1183 *_abort_code = abort_code; 1191 *_abort_code = abort_code;
1184 return -EPROTO; 1192 return -EPROTO;
1185 1193
1186temporary_error_free: 1194temporary_error_free_ticket:
1187 kfree(ticket); 1195 kfree(ticket);
1196temporary_error_free_resp:
1197 kfree(response);
1188temporary_error: 1198temporary_error:
1189 /* Ignore the response packet if we got a temporary error such as 1199 /* Ignore the response packet if we got a temporary error such as
1190 * ENOMEM. We just want to send the challenge again. Note that we 1200 * ENOMEM. We just want to send the challenge again. Note that we
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index c03d86a7775e..f24a6ae6819a 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -857,17 +857,14 @@ config NET_ACT_TUNNEL_KEY
857config NET_IFE_SKBMARK 857config NET_IFE_SKBMARK
858 tristate "Support to encoding decoding skb mark on IFE action" 858 tristate "Support to encoding decoding skb mark on IFE action"
859 depends on NET_ACT_IFE 859 depends on NET_ACT_IFE
860 ---help---
861 860
862config NET_IFE_SKBPRIO 861config NET_IFE_SKBPRIO
863 tristate "Support to encoding decoding skb prio on IFE action" 862 tristate "Support to encoding decoding skb prio on IFE action"
864 depends on NET_ACT_IFE 863 depends on NET_ACT_IFE
865 ---help---
866 864
867config NET_IFE_SKBTCINDEX 865config NET_IFE_SKBTCINDEX
868 tristate "Support to encoding decoding skb tcindex on IFE action" 866 tristate "Support to encoding decoding skb tcindex on IFE action"
869 depends on NET_ACT_IFE 867 depends on NET_ACT_IFE
870 ---help---
871 868
872config NET_CLS_IND 869config NET_CLS_IND
873 bool "Incoming device classification" 870 bool "Incoming device classification"
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 4d33a50a8a6d..eba6682727dd 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -78,7 +78,7 @@ static void free_tcf(struct tc_action *p)
78static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p) 78static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
79{ 79{
80 spin_lock_bh(&idrinfo->lock); 80 spin_lock_bh(&idrinfo->lock);
81 idr_remove_ext(&idrinfo->action_idr, p->tcfa_index); 81 idr_remove(&idrinfo->action_idr, p->tcfa_index);
82 spin_unlock_bh(&idrinfo->lock); 82 spin_unlock_bh(&idrinfo->lock);
83 gen_kill_estimator(&p->tcfa_rate_est); 83 gen_kill_estimator(&p->tcfa_rate_est);
84 free_tcf(p); 84 free_tcf(p);
@@ -99,7 +99,7 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
99 p->tcfa_refcnt--; 99 p->tcfa_refcnt--;
100 if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) { 100 if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
101 if (p->ops->cleanup) 101 if (p->ops->cleanup)
102 p->ops->cleanup(p, bind); 102 p->ops->cleanup(p);
103 tcf_idr_remove(p->idrinfo, p); 103 tcf_idr_remove(p->idrinfo, p);
104 ret = ACT_P_DELETED; 104 ret = ACT_P_DELETED;
105 } 105 }
@@ -124,7 +124,7 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
124 124
125 s_i = cb->args[0]; 125 s_i = cb->args[0];
126 126
127 idr_for_each_entry_ext(idr, p, id) { 127 idr_for_each_entry_ul(idr, p, id) {
128 index++; 128 index++;
129 if (index < s_i) 129 if (index < s_i)
130 continue; 130 continue;
@@ -181,7 +181,7 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
181 if (nla_put_string(skb, TCA_KIND, ops->kind)) 181 if (nla_put_string(skb, TCA_KIND, ops->kind))
182 goto nla_put_failure; 182 goto nla_put_failure;
183 183
184 idr_for_each_entry_ext(idr, p, id) { 184 idr_for_each_entry_ul(idr, p, id) {
185 ret = __tcf_idr_release(p, false, true); 185 ret = __tcf_idr_release(p, false, true);
186 if (ret == ACT_P_DELETED) { 186 if (ret == ACT_P_DELETED) {
187 module_put(ops->owner); 187 module_put(ops->owner);
@@ -222,7 +222,7 @@ static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo)
222 struct tc_action *p = NULL; 222 struct tc_action *p = NULL;
223 223
224 spin_lock_bh(&idrinfo->lock); 224 spin_lock_bh(&idrinfo->lock);
225 p = idr_find_ext(&idrinfo->action_idr, index); 225 p = idr_find(&idrinfo->action_idr, index);
226 spin_unlock_bh(&idrinfo->lock); 226 spin_unlock_bh(&idrinfo->lock);
227 227
228 return p; 228 return p;
@@ -274,7 +274,6 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
274 struct tcf_idrinfo *idrinfo = tn->idrinfo; 274 struct tcf_idrinfo *idrinfo = tn->idrinfo;
275 struct idr *idr = &idrinfo->action_idr; 275 struct idr *idr = &idrinfo->action_idr;
276 int err = -ENOMEM; 276 int err = -ENOMEM;
277 unsigned long idr_index;
278 277
279 if (unlikely(!p)) 278 if (unlikely(!p))
280 return -ENOMEM; 279 return -ENOMEM;
@@ -284,45 +283,28 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
284 283
285 if (cpustats) { 284 if (cpustats) {
286 p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); 285 p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
287 if (!p->cpu_bstats) { 286 if (!p->cpu_bstats)
288err1:
289 kfree(p);
290 return err;
291 }
292 p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
293 if (!p->cpu_qstats) {
294err2:
295 free_percpu(p->cpu_bstats);
296 goto err1; 287 goto err1;
297 } 288 p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
289 if (!p->cpu_qstats)
290 goto err2;
298 } 291 }
299 spin_lock_init(&p->tcfa_lock); 292 spin_lock_init(&p->tcfa_lock);
293 idr_preload(GFP_KERNEL);
294 spin_lock_bh(&idrinfo->lock);
300 /* user doesn't specify an index */ 295 /* user doesn't specify an index */
301 if (!index) { 296 if (!index) {
302 idr_preload(GFP_KERNEL); 297 index = 1;
303 spin_lock_bh(&idrinfo->lock); 298 err = idr_alloc_u32(idr, NULL, &index, UINT_MAX, GFP_ATOMIC);
304 err = idr_alloc_ext(idr, NULL, &idr_index, 1, 0,
305 GFP_ATOMIC);
306 spin_unlock_bh(&idrinfo->lock);
307 idr_preload_end();
308 if (err) {
309err3:
310 free_percpu(p->cpu_qstats);
311 goto err2;
312 }
313 p->tcfa_index = idr_index;
314 } else { 299 } else {
315 idr_preload(GFP_KERNEL); 300 err = idr_alloc_u32(idr, NULL, &index, index, GFP_ATOMIC);
316 spin_lock_bh(&idrinfo->lock);
317 err = idr_alloc_ext(idr, NULL, NULL, index, index + 1,
318 GFP_ATOMIC);
319 spin_unlock_bh(&idrinfo->lock);
320 idr_preload_end();
321 if (err)
322 goto err3;
323 p->tcfa_index = index;
324 } 301 }
302 spin_unlock_bh(&idrinfo->lock);
303 idr_preload_end();
304 if (err)
305 goto err3;
325 306
307 p->tcfa_index = index;
326 p->tcfa_tm.install = jiffies; 308 p->tcfa_tm.install = jiffies;
327 p->tcfa_tm.lastuse = jiffies; 309 p->tcfa_tm.lastuse = jiffies;
328 p->tcfa_tm.firstuse = 0; 310 p->tcfa_tm.firstuse = 0;
@@ -330,9 +312,8 @@ err3:
330 err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats, 312 err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
331 &p->tcfa_rate_est, 313 &p->tcfa_rate_est,
332 &p->tcfa_lock, NULL, est); 314 &p->tcfa_lock, NULL, est);
333 if (err) { 315 if (err)
334 goto err3; 316 goto err4;
335 }
336 } 317 }
337 318
338 p->idrinfo = idrinfo; 319 p->idrinfo = idrinfo;
@@ -340,6 +321,15 @@ err3:
340 INIT_LIST_HEAD(&p->list); 321 INIT_LIST_HEAD(&p->list);
341 *a = p; 322 *a = p;
342 return 0; 323 return 0;
324err4:
325 idr_remove(idr, index);
326err3:
327 free_percpu(p->cpu_qstats);
328err2:
329 free_percpu(p->cpu_bstats);
330err1:
331 kfree(p);
332 return err;
343} 333}
344EXPORT_SYMBOL(tcf_idr_create); 334EXPORT_SYMBOL(tcf_idr_create);
345 335
@@ -348,7 +338,7 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
348 struct tcf_idrinfo *idrinfo = tn->idrinfo; 338 struct tcf_idrinfo *idrinfo = tn->idrinfo;
349 339
350 spin_lock_bh(&idrinfo->lock); 340 spin_lock_bh(&idrinfo->lock);
351 idr_replace_ext(&idrinfo->action_idr, a, a->tcfa_index); 341 idr_replace(&idrinfo->action_idr, a, a->tcfa_index);
352 spin_unlock_bh(&idrinfo->lock); 342 spin_unlock_bh(&idrinfo->lock);
353} 343}
354EXPORT_SYMBOL(tcf_idr_insert); 344EXPORT_SYMBOL(tcf_idr_insert);
@@ -361,7 +351,7 @@ void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
361 int ret; 351 int ret;
362 unsigned long id = 1; 352 unsigned long id = 1;
363 353
364 idr_for_each_entry_ext(idr, p, id) { 354 idr_for_each_entry_ul(idr, p, id) {
365 ret = __tcf_idr_release(p, false, true); 355 ret = __tcf_idr_release(p, false, true);
366 if (ret == ACT_P_DELETED) 356 if (ret == ACT_P_DELETED)
367 module_put(ops->owner); 357 module_put(ops->owner);
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 5ef8ce8c83d4..b3f2c15affa7 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -357,7 +357,7 @@ out:
357 return ret; 357 return ret;
358} 358}
359 359
360static void tcf_bpf_cleanup(struct tc_action *act, int bind) 360static void tcf_bpf_cleanup(struct tc_action *act)
361{ 361{
362 struct tcf_bpf_cfg tmp; 362 struct tcf_bpf_cfg tmp;
363 363
@@ -401,16 +401,14 @@ static __net_init int bpf_init_net(struct net *net)
401 return tc_action_net_init(tn, &act_bpf_ops); 401 return tc_action_net_init(tn, &act_bpf_ops);
402} 402}
403 403
404static void __net_exit bpf_exit_net(struct net *net) 404static void __net_exit bpf_exit_net(struct list_head *net_list)
405{ 405{
406 struct tc_action_net *tn = net_generic(net, bpf_net_id); 406 tc_action_net_exit(net_list, bpf_net_id);
407
408 tc_action_net_exit(tn);
409} 407}
410 408
411static struct pernet_operations bpf_net_ops = { 409static struct pernet_operations bpf_net_ops = {
412 .init = bpf_init_net, 410 .init = bpf_init_net,
413 .exit = bpf_exit_net, 411 .exit_batch = bpf_exit_net,
414 .id = &bpf_net_id, 412 .id = &bpf_net_id,
415 .size = sizeof(struct tc_action_net), 413 .size = sizeof(struct tc_action_net),
416}; 414};
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 10b7a8855a6c..2b15ba84e0c8 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -209,16 +209,14 @@ static __net_init int connmark_init_net(struct net *net)
209 return tc_action_net_init(tn, &act_connmark_ops); 209 return tc_action_net_init(tn, &act_connmark_ops);
210} 210}
211 211
212static void __net_exit connmark_exit_net(struct net *net) 212static void __net_exit connmark_exit_net(struct list_head *net_list)
213{ 213{
214 struct tc_action_net *tn = net_generic(net, connmark_net_id); 214 tc_action_net_exit(net_list, connmark_net_id);
215
216 tc_action_net_exit(tn);
217} 215}
218 216
219static struct pernet_operations connmark_net_ops = { 217static struct pernet_operations connmark_net_ops = {
220 .init = connmark_init_net, 218 .init = connmark_init_net,
221 .exit = connmark_exit_net, 219 .exit_batch = connmark_exit_net,
222 .id = &connmark_net_id, 220 .id = &connmark_net_id,
223 .size = sizeof(struct tc_action_net), 221 .size = sizeof(struct tc_action_net),
224}; 222};
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index d836f998117b..b7ba9b06b147 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -49,6 +49,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
49 int bind) 49 int bind)
50{ 50{
51 struct tc_action_net *tn = net_generic(net, csum_net_id); 51 struct tc_action_net *tn = net_generic(net, csum_net_id);
52 struct tcf_csum_params *params_old, *params_new;
52 struct nlattr *tb[TCA_CSUM_MAX + 1]; 53 struct nlattr *tb[TCA_CSUM_MAX + 1];
53 struct tc_csum *parm; 54 struct tc_csum *parm;
54 struct tcf_csum *p; 55 struct tcf_csum *p;
@@ -67,7 +68,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
67 68
68 if (!tcf_idr_check(tn, parm->index, a, bind)) { 69 if (!tcf_idr_check(tn, parm->index, a, bind)) {
69 ret = tcf_idr_create(tn, parm->index, est, a, 70 ret = tcf_idr_create(tn, parm->index, est, a,
70 &act_csum_ops, bind, false); 71 &act_csum_ops, bind, true);
71 if (ret) 72 if (ret)
72 return ret; 73 return ret;
73 ret = ACT_P_CREATED; 74 ret = ACT_P_CREATED;
@@ -80,10 +81,21 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
80 } 81 }
81 82
82 p = to_tcf_csum(*a); 83 p = to_tcf_csum(*a);
83 spin_lock_bh(&p->tcf_lock); 84 ASSERT_RTNL();
84 p->tcf_action = parm->action; 85
85 p->update_flags = parm->update_flags; 86 params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
86 spin_unlock_bh(&p->tcf_lock); 87 if (unlikely(!params_new)) {
88 if (ret == ACT_P_CREATED)
89 tcf_idr_release(*a, bind);
90 return -ENOMEM;
91 }
92 params_old = rtnl_dereference(p->params);
93
94 params_new->action = parm->action;
95 params_new->update_flags = parm->update_flags;
96 rcu_assign_pointer(p->params, params_new);
97 if (params_old)
98 kfree_rcu(params_old, rcu);
87 99
88 if (ret == ACT_P_CREATED) 100 if (ret == ACT_P_CREATED)
89 tcf_idr_insert(tn, *a); 101 tcf_idr_insert(tn, *a);
@@ -539,19 +551,21 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
539 struct tcf_result *res) 551 struct tcf_result *res)
540{ 552{
541 struct tcf_csum *p = to_tcf_csum(a); 553 struct tcf_csum *p = to_tcf_csum(a);
542 int action; 554 struct tcf_csum_params *params;
543 u32 update_flags; 555 u32 update_flags;
556 int action;
557
558 rcu_read_lock();
559 params = rcu_dereference(p->params);
544 560
545 spin_lock(&p->tcf_lock);
546 tcf_lastuse_update(&p->tcf_tm); 561 tcf_lastuse_update(&p->tcf_tm);
547 bstats_update(&p->tcf_bstats, skb); 562 bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
548 action = p->tcf_action;
549 update_flags = p->update_flags;
550 spin_unlock(&p->tcf_lock);
551 563
564 action = params->action;
552 if (unlikely(action == TC_ACT_SHOT)) 565 if (unlikely(action == TC_ACT_SHOT))
553 goto drop; 566 goto drop_stats;
554 567
568 update_flags = params->update_flags;
555 switch (tc_skb_protocol(skb)) { 569 switch (tc_skb_protocol(skb)) {
556 case cpu_to_be16(ETH_P_IP): 570 case cpu_to_be16(ETH_P_IP):
557 if (!tcf_csum_ipv4(skb, update_flags)) 571 if (!tcf_csum_ipv4(skb, update_flags))
@@ -563,13 +577,16 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
563 break; 577 break;
564 } 578 }
565 579
580unlock:
581 rcu_read_unlock();
566 return action; 582 return action;
567 583
568drop: 584drop:
569 spin_lock(&p->tcf_lock); 585 action = TC_ACT_SHOT;
570 p->tcf_qstats.drops++; 586
571 spin_unlock(&p->tcf_lock); 587drop_stats:
572 return TC_ACT_SHOT; 588 qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
589 goto unlock;
573} 590}
574 591
575static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, 592static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
@@ -577,15 +594,18 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
577{ 594{
578 unsigned char *b = skb_tail_pointer(skb); 595 unsigned char *b = skb_tail_pointer(skb);
579 struct tcf_csum *p = to_tcf_csum(a); 596 struct tcf_csum *p = to_tcf_csum(a);
597 struct tcf_csum_params *params;
580 struct tc_csum opt = { 598 struct tc_csum opt = {
581 .update_flags = p->update_flags,
582 .index = p->tcf_index, 599 .index = p->tcf_index,
583 .action = p->tcf_action,
584 .refcnt = p->tcf_refcnt - ref, 600 .refcnt = p->tcf_refcnt - ref,
585 .bindcnt = p->tcf_bindcnt - bind, 601 .bindcnt = p->tcf_bindcnt - bind,
586 }; 602 };
587 struct tcf_t t; 603 struct tcf_t t;
588 604
605 params = rtnl_dereference(p->params);
606 opt.action = params->action;
607 opt.update_flags = params->update_flags;
608
589 if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt)) 609 if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt))
590 goto nla_put_failure; 610 goto nla_put_failure;
591 611
@@ -600,6 +620,15 @@ nla_put_failure:
600 return -1; 620 return -1;
601} 621}
602 622
623static void tcf_csum_cleanup(struct tc_action *a)
624{
625 struct tcf_csum *p = to_tcf_csum(a);
626 struct tcf_csum_params *params;
627
628 params = rcu_dereference_protected(p->params, 1);
629 kfree_rcu(params, rcu);
630}
631
603static int tcf_csum_walker(struct net *net, struct sk_buff *skb, 632static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
604 struct netlink_callback *cb, int type, 633 struct netlink_callback *cb, int type,
605 const struct tc_action_ops *ops) 634 const struct tc_action_ops *ops)
@@ -623,6 +652,7 @@ static struct tc_action_ops act_csum_ops = {
623 .act = tcf_csum, 652 .act = tcf_csum,
624 .dump = tcf_csum_dump, 653 .dump = tcf_csum_dump,
625 .init = tcf_csum_init, 654 .init = tcf_csum_init,
655 .cleanup = tcf_csum_cleanup,
626 .walk = tcf_csum_walker, 656 .walk = tcf_csum_walker,
627 .lookup = tcf_csum_search, 657 .lookup = tcf_csum_search,
628 .size = sizeof(struct tcf_csum), 658 .size = sizeof(struct tcf_csum),
@@ -635,16 +665,14 @@ static __net_init int csum_init_net(struct net *net)
635 return tc_action_net_init(tn, &act_csum_ops); 665 return tc_action_net_init(tn, &act_csum_ops);
636} 666}
637 667
638static void __net_exit csum_exit_net(struct net *net) 668static void __net_exit csum_exit_net(struct list_head *net_list)
639{ 669{
640 struct tc_action_net *tn = net_generic(net, csum_net_id); 670 tc_action_net_exit(net_list, csum_net_id);
641
642 tc_action_net_exit(tn);
643} 671}
644 672
645static struct pernet_operations csum_net_ops = { 673static struct pernet_operations csum_net_ops = {
646 .init = csum_init_net, 674 .init = csum_init_net,
647 .exit = csum_exit_net, 675 .exit_batch = csum_exit_net,
648 .id = &csum_net_id, 676 .id = &csum_net_id,
649 .size = sizeof(struct tc_action_net), 677 .size = sizeof(struct tc_action_net),
650}; 678};
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index a0ac42b3ed06..b56986d41c87 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -235,16 +235,14 @@ static __net_init int gact_init_net(struct net *net)
235 return tc_action_net_init(tn, &act_gact_ops); 235 return tc_action_net_init(tn, &act_gact_ops);
236} 236}
237 237
238static void __net_exit gact_exit_net(struct net *net) 238static void __net_exit gact_exit_net(struct list_head *net_list)
239{ 239{
240 struct tc_action_net *tn = net_generic(net, gact_net_id); 240 tc_action_net_exit(net_list, gact_net_id);
241
242 tc_action_net_exit(tn);
243} 241}
244 242
245static struct pernet_operations gact_net_ops = { 243static struct pernet_operations gact_net_ops = {
246 .init = gact_init_net, 244 .init = gact_init_net,
247 .exit = gact_exit_net, 245 .exit_batch = gact_exit_net,
248 .id = &gact_net_id, 246 .id = &gact_net_id,
249 .size = sizeof(struct tc_action_net), 247 .size = sizeof(struct tc_action_net),
250}; 248};
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 3007cb1310ea..5954e992685a 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -387,7 +387,7 @@ out_nlmsg_trim:
387} 387}
388 388
389/* under ife->tcf_lock */ 389/* under ife->tcf_lock */
390static void _tcf_ife_cleanup(struct tc_action *a, int bind) 390static void _tcf_ife_cleanup(struct tc_action *a)
391{ 391{
392 struct tcf_ife_info *ife = to_ife(a); 392 struct tcf_ife_info *ife = to_ife(a);
393 struct tcf_meta_info *e, *n; 393 struct tcf_meta_info *e, *n;
@@ -405,13 +405,13 @@ static void _tcf_ife_cleanup(struct tc_action *a, int bind)
405 } 405 }
406} 406}
407 407
408static void tcf_ife_cleanup(struct tc_action *a, int bind) 408static void tcf_ife_cleanup(struct tc_action *a)
409{ 409{
410 struct tcf_ife_info *ife = to_ife(a); 410 struct tcf_ife_info *ife = to_ife(a);
411 struct tcf_ife_params *p; 411 struct tcf_ife_params *p;
412 412
413 spin_lock_bh(&ife->tcf_lock); 413 spin_lock_bh(&ife->tcf_lock);
414 _tcf_ife_cleanup(a, bind); 414 _tcf_ife_cleanup(a);
415 spin_unlock_bh(&ife->tcf_lock); 415 spin_unlock_bh(&ife->tcf_lock);
416 416
417 p = rcu_dereference_protected(ife->params, 1); 417 p = rcu_dereference_protected(ife->params, 1);
@@ -546,7 +546,7 @@ metadata_parse_err:
546 if (exists) 546 if (exists)
547 tcf_idr_release(*a, bind); 547 tcf_idr_release(*a, bind);
548 if (ret == ACT_P_CREATED) 548 if (ret == ACT_P_CREATED)
549 _tcf_ife_cleanup(*a, bind); 549 _tcf_ife_cleanup(*a);
550 550
551 if (exists) 551 if (exists)
552 spin_unlock_bh(&ife->tcf_lock); 552 spin_unlock_bh(&ife->tcf_lock);
@@ -567,7 +567,7 @@ metadata_parse_err:
567 err = use_all_metadata(ife); 567 err = use_all_metadata(ife);
568 if (err) { 568 if (err) {
569 if (ret == ACT_P_CREATED) 569 if (ret == ACT_P_CREATED)
570 _tcf_ife_cleanup(*a, bind); 570 _tcf_ife_cleanup(*a);
571 571
572 if (exists) 572 if (exists)
573 spin_unlock_bh(&ife->tcf_lock); 573 spin_unlock_bh(&ife->tcf_lock);
@@ -858,16 +858,14 @@ static __net_init int ife_init_net(struct net *net)
858 return tc_action_net_init(tn, &act_ife_ops); 858 return tc_action_net_init(tn, &act_ife_ops);
859} 859}
860 860
861static void __net_exit ife_exit_net(struct net *net) 861static void __net_exit ife_exit_net(struct list_head *net_list)
862{ 862{
863 struct tc_action_net *tn = net_generic(net, ife_net_id); 863 tc_action_net_exit(net_list, ife_net_id);
864
865 tc_action_net_exit(tn);
866} 864}
867 865
868static struct pernet_operations ife_net_ops = { 866static struct pernet_operations ife_net_ops = {
869 .init = ife_init_net, 867 .init = ife_init_net,
870 .exit = ife_exit_net, 868 .exit_batch = ife_exit_net,
871 .id = &ife_net_id, 869 .id = &ife_net_id,
872 .size = sizeof(struct tc_action_net), 870 .size = sizeof(struct tc_action_net),
873}; 871};
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d9e399a7e3d5..06e380ae0928 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -77,7 +77,7 @@ static void ipt_destroy_target(struct xt_entry_target *t)
77 module_put(par.target->me); 77 module_put(par.target->me);
78} 78}
79 79
80static void tcf_ipt_release(struct tc_action *a, int bind) 80static void tcf_ipt_release(struct tc_action *a)
81{ 81{
82 struct tcf_ipt *ipt = to_ipt(a); 82 struct tcf_ipt *ipt = to_ipt(a);
83 ipt_destroy_target(ipt->tcfi_t); 83 ipt_destroy_target(ipt->tcfi_t);
@@ -337,16 +337,14 @@ static __net_init int ipt_init_net(struct net *net)
337 return tc_action_net_init(tn, &act_ipt_ops); 337 return tc_action_net_init(tn, &act_ipt_ops);
338} 338}
339 339
340static void __net_exit ipt_exit_net(struct net *net) 340static void __net_exit ipt_exit_net(struct list_head *net_list)
341{ 341{
342 struct tc_action_net *tn = net_generic(net, ipt_net_id); 342 tc_action_net_exit(net_list, ipt_net_id);
343
344 tc_action_net_exit(tn);
345} 343}
346 344
347static struct pernet_operations ipt_net_ops = { 345static struct pernet_operations ipt_net_ops = {
348 .init = ipt_init_net, 346 .init = ipt_init_net,
349 .exit = ipt_exit_net, 347 .exit_batch = ipt_exit_net,
350 .id = &ipt_net_id, 348 .id = &ipt_net_id,
351 .size = sizeof(struct tc_action_net), 349 .size = sizeof(struct tc_action_net),
352}; 350};
@@ -387,16 +385,14 @@ static __net_init int xt_init_net(struct net *net)
387 return tc_action_net_init(tn, &act_xt_ops); 385 return tc_action_net_init(tn, &act_xt_ops);
388} 386}
389 387
390static void __net_exit xt_exit_net(struct net *net) 388static void __net_exit xt_exit_net(struct list_head *net_list)
391{ 389{
392 struct tc_action_net *tn = net_generic(net, xt_net_id); 390 tc_action_net_exit(net_list, xt_net_id);
393
394 tc_action_net_exit(tn);
395} 391}
396 392
397static struct pernet_operations xt_net_ops = { 393static struct pernet_operations xt_net_ops = {
398 .init = xt_init_net, 394 .init = xt_init_net,
399 .exit = xt_exit_net, 395 .exit_batch = xt_exit_net,
400 .id = &xt_net_id, 396 .id = &xt_net_id,
401 .size = sizeof(struct tc_action_net), 397 .size = sizeof(struct tc_action_net),
402}; 398};
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 08b61849c2a2..e6ff88f72900 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -29,7 +29,6 @@
29#include <net/tc_act/tc_mirred.h> 29#include <net/tc_act/tc_mirred.h>
30 30
31static LIST_HEAD(mirred_list); 31static LIST_HEAD(mirred_list);
32static DEFINE_SPINLOCK(mirred_list_lock);
33 32
34static bool tcf_mirred_is_act_redirect(int action) 33static bool tcf_mirred_is_act_redirect(int action)
35{ 34{
@@ -50,18 +49,15 @@ static bool tcf_mirred_act_wants_ingress(int action)
50 } 49 }
51} 50}
52 51
53static void tcf_mirred_release(struct tc_action *a, int bind) 52static void tcf_mirred_release(struct tc_action *a)
54{ 53{
55 struct tcf_mirred *m = to_mirred(a); 54 struct tcf_mirred *m = to_mirred(a);
56 struct net_device *dev; 55 struct net_device *dev;
57 56
58 /* We could be called either in a RCU callback or with RTNL lock held. */
59 spin_lock_bh(&mirred_list_lock);
60 list_del(&m->tcfm_list); 57 list_del(&m->tcfm_list);
61 dev = rcu_dereference_protected(m->tcfm_dev, 1); 58 dev = rtnl_dereference(m->tcfm_dev);
62 if (dev) 59 if (dev)
63 dev_put(dev); 60 dev_put(dev);
64 spin_unlock_bh(&mirred_list_lock);
65} 61}
66 62
67static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { 63static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
@@ -139,8 +135,6 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
139 m->tcf_action = parm->action; 135 m->tcf_action = parm->action;
140 m->tcfm_eaction = parm->eaction; 136 m->tcfm_eaction = parm->eaction;
141 if (dev != NULL) { 137 if (dev != NULL) {
142 m->tcfm_ifindex = parm->ifindex;
143 m->net = net;
144 if (ret != ACT_P_CREATED) 138 if (ret != ACT_P_CREATED)
145 dev_put(rcu_dereference_protected(m->tcfm_dev, 1)); 139 dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
146 dev_hold(dev); 140 dev_hold(dev);
@@ -149,9 +143,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
149 } 143 }
150 144
151 if (ret == ACT_P_CREATED) { 145 if (ret == ACT_P_CREATED) {
152 spin_lock_bh(&mirred_list_lock);
153 list_add(&m->tcfm_list, &mirred_list); 146 list_add(&m->tcfm_list, &mirred_list);
154 spin_unlock_bh(&mirred_list_lock);
155 tcf_idr_insert(tn, *a); 147 tcf_idr_insert(tn, *a);
156 } 148 }
157 149
@@ -247,13 +239,14 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
247{ 239{
248 unsigned char *b = skb_tail_pointer(skb); 240 unsigned char *b = skb_tail_pointer(skb);
249 struct tcf_mirred *m = to_mirred(a); 241 struct tcf_mirred *m = to_mirred(a);
242 struct net_device *dev = rtnl_dereference(m->tcfm_dev);
250 struct tc_mirred opt = { 243 struct tc_mirred opt = {
251 .index = m->tcf_index, 244 .index = m->tcf_index,
252 .action = m->tcf_action, 245 .action = m->tcf_action,
253 .refcnt = m->tcf_refcnt - ref, 246 .refcnt = m->tcf_refcnt - ref,
254 .bindcnt = m->tcf_bindcnt - bind, 247 .bindcnt = m->tcf_bindcnt - bind,
255 .eaction = m->tcfm_eaction, 248 .eaction = m->tcfm_eaction,
256 .ifindex = m->tcfm_ifindex, 249 .ifindex = dev ? dev->ifindex : 0,
257 }; 250 };
258 struct tcf_t t; 251 struct tcf_t t;
259 252
@@ -294,7 +287,6 @@ static int mirred_device_event(struct notifier_block *unused,
294 287
295 ASSERT_RTNL(); 288 ASSERT_RTNL();
296 if (event == NETDEV_UNREGISTER) { 289 if (event == NETDEV_UNREGISTER) {
297 spin_lock_bh(&mirred_list_lock);
298 list_for_each_entry(m, &mirred_list, tcfm_list) { 290 list_for_each_entry(m, &mirred_list, tcfm_list) {
299 if (rcu_access_pointer(m->tcfm_dev) == dev) { 291 if (rcu_access_pointer(m->tcfm_dev) == dev) {
300 dev_put(dev); 292 dev_put(dev);
@@ -304,7 +296,6 @@ static int mirred_device_event(struct notifier_block *unused,
304 RCU_INIT_POINTER(m->tcfm_dev, NULL); 296 RCU_INIT_POINTER(m->tcfm_dev, NULL);
305 } 297 }
306 } 298 }
307 spin_unlock_bh(&mirred_list_lock);
308 } 299 }
309 300
310 return NOTIFY_DONE; 301 return NOTIFY_DONE;
@@ -318,7 +309,7 @@ static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
318{ 309{
319 struct tcf_mirred *m = to_mirred(a); 310 struct tcf_mirred *m = to_mirred(a);
320 311
321 return __dev_get_by_index(m->net, m->tcfm_ifindex); 312 return rtnl_dereference(m->tcfm_dev);
322} 313}
323 314
324static struct tc_action_ops act_mirred_ops = { 315static struct tc_action_ops act_mirred_ops = {
@@ -343,16 +334,14 @@ static __net_init int mirred_init_net(struct net *net)
343 return tc_action_net_init(tn, &act_mirred_ops); 334 return tc_action_net_init(tn, &act_mirred_ops);
344} 335}
345 336
346static void __net_exit mirred_exit_net(struct net *net) 337static void __net_exit mirred_exit_net(struct list_head *net_list)
347{ 338{
348 struct tc_action_net *tn = net_generic(net, mirred_net_id); 339 tc_action_net_exit(net_list, mirred_net_id);
349
350 tc_action_net_exit(tn);
351} 340}
352 341
353static struct pernet_operations mirred_net_ops = { 342static struct pernet_operations mirred_net_ops = {
354 .init = mirred_init_net, 343 .init = mirred_init_net,
355 .exit = mirred_exit_net, 344 .exit_batch = mirred_exit_net,
356 .id = &mirred_net_id, 345 .id = &mirred_net_id,
357 .size = sizeof(struct tc_action_net), 346 .size = sizeof(struct tc_action_net),
358}; 347};
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index c365d01b99c8..98c6a4b2f523 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -310,16 +310,14 @@ static __net_init int nat_init_net(struct net *net)
310 return tc_action_net_init(tn, &act_nat_ops); 310 return tc_action_net_init(tn, &act_nat_ops);
311} 311}
312 312
313static void __net_exit nat_exit_net(struct net *net) 313static void __net_exit nat_exit_net(struct list_head *net_list)
314{ 314{
315 struct tc_action_net *tn = net_generic(net, nat_net_id); 315 tc_action_net_exit(net_list, nat_net_id);
316
317 tc_action_net_exit(tn);
318} 316}
319 317
320static struct pernet_operations nat_net_ops = { 318static struct pernet_operations nat_net_ops = {
321 .init = nat_init_net, 319 .init = nat_init_net,
322 .exit = nat_exit_net, 320 .exit_batch = nat_exit_net,
323 .id = &nat_net_id, 321 .id = &nat_net_id,
324 .size = sizeof(struct tc_action_net), 322 .size = sizeof(struct tc_action_net),
325}; 323};
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 491fe5deb09e..349beaffb29e 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -216,7 +216,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
216 return ret; 216 return ret;
217} 217}
218 218
219static void tcf_pedit_cleanup(struct tc_action *a, int bind) 219static void tcf_pedit_cleanup(struct tc_action *a)
220{ 220{
221 struct tcf_pedit *p = to_pedit(a); 221 struct tcf_pedit *p = to_pedit(a);
222 struct tc_pedit_key *keys = p->tcfp_keys; 222 struct tc_pedit_key *keys = p->tcfp_keys;
@@ -453,16 +453,14 @@ static __net_init int pedit_init_net(struct net *net)
453 return tc_action_net_init(tn, &act_pedit_ops); 453 return tc_action_net_init(tn, &act_pedit_ops);
454} 454}
455 455
456static void __net_exit pedit_exit_net(struct net *net) 456static void __net_exit pedit_exit_net(struct list_head *net_list)
457{ 457{
458 struct tc_action_net *tn = net_generic(net, pedit_net_id); 458 tc_action_net_exit(net_list, pedit_net_id);
459
460 tc_action_net_exit(tn);
461} 459}
462 460
463static struct pernet_operations pedit_net_ops = { 461static struct pernet_operations pedit_net_ops = {
464 .init = pedit_init_net, 462 .init = pedit_init_net,
465 .exit = pedit_exit_net, 463 .exit_batch = pedit_exit_net,
466 .id = &pedit_net_id, 464 .id = &pedit_net_id,
467 .size = sizeof(struct tc_action_net), 465 .size = sizeof(struct tc_action_net),
468}; 466};
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 3bb2ebf9e9ae..95d3c9097b25 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -118,13 +118,13 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
118 police = to_police(*a); 118 police = to_police(*a);
119 if (parm->rate.rate) { 119 if (parm->rate.rate) {
120 err = -ENOMEM; 120 err = -ENOMEM;
121 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]); 121 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE], NULL);
122 if (R_tab == NULL) 122 if (R_tab == NULL)
123 goto failure; 123 goto failure;
124 124
125 if (parm->peakrate.rate) { 125 if (parm->peakrate.rate) {
126 P_tab = qdisc_get_rtab(&parm->peakrate, 126 P_tab = qdisc_get_rtab(&parm->peakrate,
127 tb[TCA_POLICE_PEAKRATE]); 127 tb[TCA_POLICE_PEAKRATE], NULL);
128 if (P_tab == NULL) 128 if (P_tab == NULL)
129 goto failure; 129 goto failure;
130 } 130 }
@@ -334,16 +334,14 @@ static __net_init int police_init_net(struct net *net)
334 return tc_action_net_init(tn, &act_police_ops); 334 return tc_action_net_init(tn, &act_police_ops);
335} 335}
336 336
337static void __net_exit police_exit_net(struct net *net) 337static void __net_exit police_exit_net(struct list_head *net_list)
338{ 338{
339 struct tc_action_net *tn = net_generic(net, police_net_id); 339 tc_action_net_exit(net_list, police_net_id);
340
341 tc_action_net_exit(tn);
342} 340}
343 341
344static struct pernet_operations police_net_ops = { 342static struct pernet_operations police_net_ops = {
345 .init = police_init_net, 343 .init = police_init_net,
346 .exit = police_exit_net, 344 .exit_batch = police_exit_net,
347 .id = &police_net_id, 345 .id = &police_net_id,
348 .size = sizeof(struct tc_action_net), 346 .size = sizeof(struct tc_action_net),
349}; 347};
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 9438969290a6..1ba0df238756 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -96,7 +96,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
96 return ret; 96 return ret;
97} 97}
98 98
99static void tcf_sample_cleanup(struct tc_action *a, int bind) 99static void tcf_sample_cleanup(struct tc_action *a)
100{ 100{
101 struct tcf_sample *s = to_sample(a); 101 struct tcf_sample *s = to_sample(a);
102 struct psample_group *psample_group; 102 struct psample_group *psample_group;
@@ -236,16 +236,14 @@ static __net_init int sample_init_net(struct net *net)
236 return tc_action_net_init(tn, &act_sample_ops); 236 return tc_action_net_init(tn, &act_sample_ops);
237} 237}
238 238
239static void __net_exit sample_exit_net(struct net *net) 239static void __net_exit sample_exit_net(struct list_head *net_list)
240{ 240{
241 struct tc_action_net *tn = net_generic(net, sample_net_id); 241 tc_action_net_exit(net_list, sample_net_id);
242
243 tc_action_net_exit(tn);
244} 242}
245 243
246static struct pernet_operations sample_net_ops = { 244static struct pernet_operations sample_net_ops = {
247 .init = sample_init_net, 245 .init = sample_init_net,
248 .exit = sample_exit_net, 246 .exit_batch = sample_exit_net,
249 .id = &sample_net_id, 247 .id = &sample_net_id,
250 .size = sizeof(struct tc_action_net), 248 .size = sizeof(struct tc_action_net),
251}; 249};
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index e7b57e5071a3..425eac11f6da 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -47,7 +47,7 @@ static int tcf_simp(struct sk_buff *skb, const struct tc_action *a,
47 return d->tcf_action; 47 return d->tcf_action;
48} 48}
49 49
50static void tcf_simp_release(struct tc_action *a, int bind) 50static void tcf_simp_release(struct tc_action *a)
51{ 51{
52 struct tcf_defact *d = to_defact(a); 52 struct tcf_defact *d = to_defact(a);
53 kfree(d->tcfd_defdata); 53 kfree(d->tcfd_defdata);
@@ -204,16 +204,14 @@ static __net_init int simp_init_net(struct net *net)
204 return tc_action_net_init(tn, &act_simp_ops); 204 return tc_action_net_init(tn, &act_simp_ops);
205} 205}
206 206
207static void __net_exit simp_exit_net(struct net *net) 207static void __net_exit simp_exit_net(struct list_head *net_list)
208{ 208{
209 struct tc_action_net *tn = net_generic(net, simp_net_id); 209 tc_action_net_exit(net_list, simp_net_id);
210
211 tc_action_net_exit(tn);
212} 210}
213 211
214static struct pernet_operations simp_net_ops = { 212static struct pernet_operations simp_net_ops = {
215 .init = simp_init_net, 213 .init = simp_init_net,
216 .exit = simp_exit_net, 214 .exit_batch = simp_exit_net,
217 .id = &simp_net_id, 215 .id = &simp_net_id,
218 .size = sizeof(struct tc_action_net), 216 .size = sizeof(struct tc_action_net),
219}; 217};
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 59949d61f20d..5a3f691bb545 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -241,16 +241,14 @@ static __net_init int skbedit_init_net(struct net *net)
241 return tc_action_net_init(tn, &act_skbedit_ops); 241 return tc_action_net_init(tn, &act_skbedit_ops);
242} 242}
243 243
244static void __net_exit skbedit_exit_net(struct net *net) 244static void __net_exit skbedit_exit_net(struct list_head *net_list)
245{ 245{
246 struct tc_action_net *tn = net_generic(net, skbedit_net_id); 246 tc_action_net_exit(net_list, skbedit_net_id);
247
248 tc_action_net_exit(tn);
249} 247}
250 248
251static struct pernet_operations skbedit_net_ops = { 249static struct pernet_operations skbedit_net_ops = {
252 .init = skbedit_init_net, 250 .init = skbedit_init_net,
253 .exit = skbedit_exit_net, 251 .exit_batch = skbedit_exit_net,
254 .id = &skbedit_net_id, 252 .id = &skbedit_net_id,
255 .size = sizeof(struct tc_action_net), 253 .size = sizeof(struct tc_action_net),
256}; 254};
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index b642ad3d39dd..fa975262dbac 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -184,7 +184,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
184 return ret; 184 return ret;
185} 185}
186 186
187static void tcf_skbmod_cleanup(struct tc_action *a, int bind) 187static void tcf_skbmod_cleanup(struct tc_action *a)
188{ 188{
189 struct tcf_skbmod *d = to_skbmod(a); 189 struct tcf_skbmod *d = to_skbmod(a);
190 struct tcf_skbmod_params *p; 190 struct tcf_skbmod_params *p;
@@ -266,16 +266,14 @@ static __net_init int skbmod_init_net(struct net *net)
266 return tc_action_net_init(tn, &act_skbmod_ops); 266 return tc_action_net_init(tn, &act_skbmod_ops);
267} 267}
268 268
269static void __net_exit skbmod_exit_net(struct net *net) 269static void __net_exit skbmod_exit_net(struct list_head *net_list)
270{ 270{
271 struct tc_action_net *tn = net_generic(net, skbmod_net_id); 271 tc_action_net_exit(net_list, skbmod_net_id);
272
273 tc_action_net_exit(tn);
274} 272}
275 273
276static struct pernet_operations skbmod_net_ops = { 274static struct pernet_operations skbmod_net_ops = {
277 .init = skbmod_init_net, 275 .init = skbmod_init_net,
278 .exit = skbmod_exit_net, 276 .exit_batch = skbmod_exit_net,
279 .id = &skbmod_net_id, 277 .id = &skbmod_net_id,
280 .size = sizeof(struct tc_action_net), 278 .size = sizeof(struct tc_action_net),
281}; 279};
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 30c96274c638..0e23aac09ad6 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -201,7 +201,7 @@ err_out:
201 return ret; 201 return ret;
202} 202}
203 203
204static void tunnel_key_release(struct tc_action *a, int bind) 204static void tunnel_key_release(struct tc_action *a)
205{ 205{
206 struct tcf_tunnel_key *t = to_tunnel_key(a); 206 struct tcf_tunnel_key *t = to_tunnel_key(a);
207 struct tcf_tunnel_key_params *params; 207 struct tcf_tunnel_key_params *params;
@@ -325,16 +325,14 @@ static __net_init int tunnel_key_init_net(struct net *net)
325 return tc_action_net_init(tn, &act_tunnel_key_ops); 325 return tc_action_net_init(tn, &act_tunnel_key_ops);
326} 326}
327 327
328static void __net_exit tunnel_key_exit_net(struct net *net) 328static void __net_exit tunnel_key_exit_net(struct list_head *net_list)
329{ 329{
330 struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); 330 tc_action_net_exit(net_list, tunnel_key_net_id);
331
332 tc_action_net_exit(tn);
333} 331}
334 332
335static struct pernet_operations tunnel_key_net_ops = { 333static struct pernet_operations tunnel_key_net_ops = {
336 .init = tunnel_key_init_net, 334 .init = tunnel_key_init_net,
337 .exit = tunnel_key_exit_net, 335 .exit_batch = tunnel_key_exit_net,
338 .id = &tunnel_key_net_id, 336 .id = &tunnel_key_net_id,
339 .size = sizeof(struct tc_action_net), 337 .size = sizeof(struct tc_action_net),
340}; 338};
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 97f717a13ad5..e1a1b3f3983a 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -219,7 +219,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
219 return ret; 219 return ret;
220} 220}
221 221
222static void tcf_vlan_cleanup(struct tc_action *a, int bind) 222static void tcf_vlan_cleanup(struct tc_action *a)
223{ 223{
224 struct tcf_vlan *v = to_vlan(a); 224 struct tcf_vlan *v = to_vlan(a);
225 struct tcf_vlan_params *p; 225 struct tcf_vlan_params *p;
@@ -301,16 +301,14 @@ static __net_init int vlan_init_net(struct net *net)
301 return tc_action_net_init(tn, &act_vlan_ops); 301 return tc_action_net_init(tn, &act_vlan_ops);
302} 302}
303 303
304static void __net_exit vlan_exit_net(struct net *net) 304static void __net_exit vlan_exit_net(struct list_head *net_list)
305{ 305{
306 struct tc_action_net *tn = net_generic(net, vlan_net_id); 306 tc_action_net_exit(net_list, vlan_net_id);
307
308 tc_action_net_exit(tn);
309} 307}
310 308
311static struct pernet_operations vlan_net_ops = { 309static struct pernet_operations vlan_net_ops = {
312 .init = vlan_init_net, 310 .init = vlan_init_net,
313 .exit = vlan_exit_net, 311 .exit_batch = vlan_exit_net,
314 .id = &vlan_net_id, 312 .id = &vlan_net_id,
315 .size = sizeof(struct tc_action_net), 313 .size = sizeof(struct tc_action_net),
316}; 314};
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index b9d63d2246e6..2bc1bc23d42e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -24,6 +24,7 @@
24#include <linux/init.h> 24#include <linux/init.h>
25#include <linux/kmod.h> 25#include <linux/kmod.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/idr.h>
27#include <net/net_namespace.h> 28#include <net/net_namespace.h>
28#include <net/sock.h> 29#include <net/sock.h>
29#include <net/netlink.h> 30#include <net/netlink.h>
@@ -121,8 +122,8 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
121} 122}
122 123
123static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, 124static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
124 u32 prio, u32 parent, struct Qdisc *q, 125 u32 prio, struct tcf_chain *chain,
125 struct tcf_chain *chain) 126 struct netlink_ext_ack *extack)
126{ 127{
127 struct tcf_proto *tp; 128 struct tcf_proto *tp;
128 int err; 129 int err;
@@ -148,6 +149,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
148 module_put(tp->ops->owner); 149 module_put(tp->ops->owner);
149 err = -EAGAIN; 150 err = -EAGAIN;
150 } else { 151 } else {
152 NL_SET_ERR_MSG(extack, "TC classifier not found");
151 err = -ENOENT; 153 err = -ENOENT;
152 } 154 }
153 goto errout; 155 goto errout;
@@ -156,8 +158,6 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
156 tp->classify = tp->ops->classify; 158 tp->classify = tp->ops->classify;
157 tp->protocol = protocol; 159 tp->protocol = protocol;
158 tp->prio = prio; 160 tp->prio = prio;
159 tp->classid = parent;
160 tp->q = q;
161 tp->chain = chain; 161 tp->chain = chain;
162 162
163 err = tp->ops->init(tp); 163 err = tp->ops->init(tp);
@@ -172,13 +172,20 @@ errout:
172 return ERR_PTR(err); 172 return ERR_PTR(err);
173} 173}
174 174
175static void tcf_proto_destroy(struct tcf_proto *tp) 175static void tcf_proto_destroy(struct tcf_proto *tp,
176 struct netlink_ext_ack *extack)
176{ 177{
177 tp->ops->destroy(tp); 178 tp->ops->destroy(tp, extack);
178 module_put(tp->ops->owner); 179 module_put(tp->ops->owner);
179 kfree_rcu(tp, rcu); 180 kfree_rcu(tp, rcu);
180} 181}
181 182
183struct tcf_filter_chain_list_item {
184 struct list_head list;
185 tcf_chain_head_change_t *chain_head_change;
186 void *chain_head_change_priv;
187};
188
182static struct tcf_chain *tcf_chain_create(struct tcf_block *block, 189static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
183 u32 chain_index) 190 u32 chain_index)
184{ 191{
@@ -187,6 +194,7 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
187 chain = kzalloc(sizeof(*chain), GFP_KERNEL); 194 chain = kzalloc(sizeof(*chain), GFP_KERNEL);
188 if (!chain) 195 if (!chain)
189 return NULL; 196 return NULL;
197 INIT_LIST_HEAD(&chain->filter_chain_list);
190 list_add_tail(&chain->list, &block->chain_list); 198 list_add_tail(&chain->list, &block->chain_list);
191 chain->block = block; 199 chain->block = block;
192 chain->index = chain_index; 200 chain->index = chain_index;
@@ -194,12 +202,19 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
194 return chain; 202 return chain;
195} 203}
196 204
205static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
206 struct tcf_proto *tp_head)
207{
208 if (item->chain_head_change)
209 item->chain_head_change(tp_head, item->chain_head_change_priv);
210}
197static void tcf_chain_head_change(struct tcf_chain *chain, 211static void tcf_chain_head_change(struct tcf_chain *chain,
198 struct tcf_proto *tp_head) 212 struct tcf_proto *tp_head)
199{ 213{
200 if (chain->chain_head_change) 214 struct tcf_filter_chain_list_item *item;
201 chain->chain_head_change(tp_head, 215
202 chain->chain_head_change_priv); 216 list_for_each_entry(item, &chain->filter_chain_list, list)
217 tcf_chain_head_change_item(item, tp_head);
203} 218}
204 219
205static void tcf_chain_flush(struct tcf_chain *chain) 220static void tcf_chain_flush(struct tcf_chain *chain)
@@ -209,7 +224,7 @@ static void tcf_chain_flush(struct tcf_chain *chain)
209 tcf_chain_head_change(chain, NULL); 224 tcf_chain_head_change(chain, NULL);
210 while (tp) { 225 while (tp) {
211 RCU_INIT_POINTER(chain->filter_chain, tp->next); 226 RCU_INIT_POINTER(chain->filter_chain, tp->next);
212 tcf_proto_destroy(tp); 227 tcf_proto_destroy(tp, NULL);
213 tp = rtnl_dereference(chain->filter_chain); 228 tp = rtnl_dereference(chain->filter_chain);
214 tcf_chain_put(chain); 229 tcf_chain_put(chain);
215 } 230 }
@@ -217,8 +232,12 @@ static void tcf_chain_flush(struct tcf_chain *chain)
217 232
218static void tcf_chain_destroy(struct tcf_chain *chain) 233static void tcf_chain_destroy(struct tcf_chain *chain)
219{ 234{
235 struct tcf_block *block = chain->block;
236
220 list_del(&chain->list); 237 list_del(&chain->list);
221 kfree(chain); 238 kfree(chain);
239 if (list_empty(&block->chain_list))
240 kfree(block);
222} 241}
223 242
224static void tcf_chain_hold(struct tcf_chain *chain) 243static void tcf_chain_hold(struct tcf_chain *chain)
@@ -249,62 +268,300 @@ void tcf_chain_put(struct tcf_chain *chain)
249} 268}
250EXPORT_SYMBOL(tcf_chain_put); 269EXPORT_SYMBOL(tcf_chain_put);
251 270
252static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q, 271static bool tcf_block_offload_in_use(struct tcf_block *block)
253 struct tcf_block_ext_info *ei, 272{
254 enum tc_block_command command) 273 return block->offloadcnt;
274}
275
276static int tcf_block_offload_cmd(struct tcf_block *block,
277 struct net_device *dev,
278 struct tcf_block_ext_info *ei,
279 enum tc_block_command command)
255{ 280{
256 struct net_device *dev = q->dev_queue->dev;
257 struct tc_block_offload bo = {}; 281 struct tc_block_offload bo = {};
258 282
259 if (!dev->netdev_ops->ndo_setup_tc)
260 return;
261 bo.command = command; 283 bo.command = command;
262 bo.binder_type = ei->binder_type; 284 bo.binder_type = ei->binder_type;
263 bo.block = block; 285 bo.block = block;
264 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); 286 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
265} 287}
266 288
267static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, 289static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
268 struct tcf_block_ext_info *ei) 290 struct tcf_block_ext_info *ei)
269{ 291{
270 tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND); 292 struct net_device *dev = q->dev_queue->dev;
293 int err;
294
295 if (!dev->netdev_ops->ndo_setup_tc)
296 goto no_offload_dev_inc;
297
298 /* If tc offload feature is disabled and the block we try to bind
299 * to already has some offloaded filters, forbid to bind.
300 */
301 if (!tc_can_offload(dev) && tcf_block_offload_in_use(block))
302 return -EOPNOTSUPP;
303
304 err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);
305 if (err == -EOPNOTSUPP)
306 goto no_offload_dev_inc;
307 return err;
308
309no_offload_dev_inc:
310 if (tcf_block_offload_in_use(block))
311 return -EOPNOTSUPP;
312 block->nooffloaddevcnt++;
313 return 0;
271} 314}
272 315
273static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, 316static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
274 struct tcf_block_ext_info *ei) 317 struct tcf_block_ext_info *ei)
275{ 318{
276 tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND); 319 struct net_device *dev = q->dev_queue->dev;
320 int err;
321
322 if (!dev->netdev_ops->ndo_setup_tc)
323 goto no_offload_dev_dec;
324 err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND);
325 if (err == -EOPNOTSUPP)
326 goto no_offload_dev_dec;
327 return;
328
329no_offload_dev_dec:
330 WARN_ON(block->nooffloaddevcnt-- == 0);
277} 331}
278 332
279int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, 333static int
280 struct tcf_block_ext_info *ei) 334tcf_chain_head_change_cb_add(struct tcf_chain *chain,
335 struct tcf_block_ext_info *ei,
336 struct netlink_ext_ack *extack)
337{
338 struct tcf_filter_chain_list_item *item;
339
340 item = kmalloc(sizeof(*item), GFP_KERNEL);
341 if (!item) {
342 NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed");
343 return -ENOMEM;
344 }
345 item->chain_head_change = ei->chain_head_change;
346 item->chain_head_change_priv = ei->chain_head_change_priv;
347 if (chain->filter_chain)
348 tcf_chain_head_change_item(item, chain->filter_chain);
349 list_add(&item->list, &chain->filter_chain_list);
350 return 0;
351}
352
353static void
354tcf_chain_head_change_cb_del(struct tcf_chain *chain,
355 struct tcf_block_ext_info *ei)
356{
357 struct tcf_filter_chain_list_item *item;
358
359 list_for_each_entry(item, &chain->filter_chain_list, list) {
360 if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
361 (item->chain_head_change == ei->chain_head_change &&
362 item->chain_head_change_priv == ei->chain_head_change_priv)) {
363 tcf_chain_head_change_item(item, NULL);
364 list_del(&item->list);
365 kfree(item);
366 return;
367 }
368 }
369 WARN_ON(1);
370}
371
372struct tcf_net {
373 struct idr idr;
374};
375
376static unsigned int tcf_net_id;
377
378static int tcf_block_insert(struct tcf_block *block, struct net *net,
379 u32 block_index, struct netlink_ext_ack *extack)
380{
381 struct tcf_net *tn = net_generic(net, tcf_net_id);
382 int err;
383
384 err = idr_alloc_u32(&tn->idr, block, &block_index, block_index,
385 GFP_KERNEL);
386 if (err)
387 return err;
388 block->index = block_index;
389 return 0;
390}
391
392static void tcf_block_remove(struct tcf_block *block, struct net *net)
393{
394 struct tcf_net *tn = net_generic(net, tcf_net_id);
395
396 idr_remove(&tn->idr, block->index);
397}
398
399static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
400 struct netlink_ext_ack *extack)
281{ 401{
282 struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL); 402 struct tcf_block *block;
283 struct tcf_chain *chain; 403 struct tcf_chain *chain;
284 int err; 404 int err;
285 405
286 if (!block) 406 block = kzalloc(sizeof(*block), GFP_KERNEL);
287 return -ENOMEM; 407 if (!block) {
408 NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
409 return ERR_PTR(-ENOMEM);
410 }
288 INIT_LIST_HEAD(&block->chain_list); 411 INIT_LIST_HEAD(&block->chain_list);
289 INIT_LIST_HEAD(&block->cb_list); 412 INIT_LIST_HEAD(&block->cb_list);
413 INIT_LIST_HEAD(&block->owner_list);
290 414
291 /* Create chain 0 by default, it has to be always present. */ 415 /* Create chain 0 by default, it has to be always present. */
292 chain = tcf_chain_create(block, 0); 416 chain = tcf_chain_create(block, 0);
293 if (!chain) { 417 if (!chain) {
418 NL_SET_ERR_MSG(extack, "Failed to create new tcf chain");
294 err = -ENOMEM; 419 err = -ENOMEM;
295 goto err_chain_create; 420 goto err_chain_create;
296 } 421 }
297 WARN_ON(!ei->chain_head_change);
298 chain->chain_head_change = ei->chain_head_change;
299 chain->chain_head_change_priv = ei->chain_head_change_priv;
300 block->net = qdisc_net(q); 422 block->net = qdisc_net(q);
423 block->refcnt = 1;
424 block->net = net;
301 block->q = q; 425 block->q = q;
302 tcf_block_offload_bind(block, q, ei); 426 return block;
303 *p_block = block;
304 return 0;
305 427
306err_chain_create: 428err_chain_create:
307 kfree(block); 429 kfree(block);
430 return ERR_PTR(err);
431}
432
433static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
434{
435 struct tcf_net *tn = net_generic(net, tcf_net_id);
436
437 return idr_find(&tn->idr, block_index);
438}
439
440static struct tcf_chain *tcf_block_chain_zero(struct tcf_block *block)
441{
442 return list_first_entry(&block->chain_list, struct tcf_chain, list);
443}
444
445struct tcf_block_owner_item {
446 struct list_head list;
447 struct Qdisc *q;
448 enum tcf_block_binder_type binder_type;
449};
450
451static void
452tcf_block_owner_netif_keep_dst(struct tcf_block *block,
453 struct Qdisc *q,
454 enum tcf_block_binder_type binder_type)
455{
456 if (block->keep_dst &&
457 binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
458 binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
459 netif_keep_dst(qdisc_dev(q));
460}
461
462void tcf_block_netif_keep_dst(struct tcf_block *block)
463{
464 struct tcf_block_owner_item *item;
465
466 block->keep_dst = true;
467 list_for_each_entry(item, &block->owner_list, list)
468 tcf_block_owner_netif_keep_dst(block, item->q,
469 item->binder_type);
470}
471EXPORT_SYMBOL(tcf_block_netif_keep_dst);
472
473static int tcf_block_owner_add(struct tcf_block *block,
474 struct Qdisc *q,
475 enum tcf_block_binder_type binder_type)
476{
477 struct tcf_block_owner_item *item;
478
479 item = kmalloc(sizeof(*item), GFP_KERNEL);
480 if (!item)
481 return -ENOMEM;
482 item->q = q;
483 item->binder_type = binder_type;
484 list_add(&item->list, &block->owner_list);
485 return 0;
486}
487
488static void tcf_block_owner_del(struct tcf_block *block,
489 struct Qdisc *q,
490 enum tcf_block_binder_type binder_type)
491{
492 struct tcf_block_owner_item *item;
493
494 list_for_each_entry(item, &block->owner_list, list) {
495 if (item->q == q && item->binder_type == binder_type) {
496 list_del(&item->list);
497 kfree(item);
498 return;
499 }
500 }
501 WARN_ON(1);
502}
503
504int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
505 struct tcf_block_ext_info *ei,
506 struct netlink_ext_ack *extack)
507{
508 struct net *net = qdisc_net(q);
509 struct tcf_block *block = NULL;
510 bool created = false;
511 int err;
512
513 if (ei->block_index) {
514 /* block_index not 0 means the shared block is requested */
515 block = tcf_block_lookup(net, ei->block_index);
516 if (block)
517 block->refcnt++;
518 }
519
520 if (!block) {
521 block = tcf_block_create(net, q, extack);
522 if (IS_ERR(block))
523 return PTR_ERR(block);
524 created = true;
525 if (ei->block_index) {
526 err = tcf_block_insert(block, net,
527 ei->block_index, extack);
528 if (err)
529 goto err_block_insert;
530 }
531 }
532
533 err = tcf_block_owner_add(block, q, ei->binder_type);
534 if (err)
535 goto err_block_owner_add;
536
537 tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);
538
539 err = tcf_chain_head_change_cb_add(tcf_block_chain_zero(block),
540 ei, extack);
541 if (err)
542 goto err_chain_head_change_cb_add;
543
544 err = tcf_block_offload_bind(block, q, ei);
545 if (err)
546 goto err_block_offload_bind;
547
548 *p_block = block;
549 return 0;
550
551err_block_offload_bind:
552 tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei);
553err_chain_head_change_cb_add:
554 tcf_block_owner_del(block, q, ei->binder_type);
555err_block_owner_add:
556 if (created) {
557 if (tcf_block_shared(block))
558 tcf_block_remove(block, net);
559err_block_insert:
560 kfree(tcf_block_chain_zero(block));
561 kfree(block);
562 } else {
563 block->refcnt--;
564 }
308 return err; 565 return err;
309} 566}
310EXPORT_SYMBOL(tcf_block_get_ext); 567EXPORT_SYMBOL(tcf_block_get_ext);
@@ -317,7 +574,8 @@ static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
317} 574}
318 575
319int tcf_block_get(struct tcf_block **p_block, 576int tcf_block_get(struct tcf_block **p_block,
320 struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q) 577 struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
578 struct netlink_ext_ack *extack)
321{ 579{
322 struct tcf_block_ext_info ei = { 580 struct tcf_block_ext_info ei = {
323 .chain_head_change = tcf_chain_head_change_dflt, 581 .chain_head_change = tcf_chain_head_change_dflt,
@@ -325,53 +583,47 @@ int tcf_block_get(struct tcf_block **p_block,
325 }; 583 };
326 584
327 WARN_ON(!p_filter_chain); 585 WARN_ON(!p_filter_chain);
328 return tcf_block_get_ext(p_block, q, &ei); 586 return tcf_block_get_ext(p_block, q, &ei, extack);
329} 587}
330EXPORT_SYMBOL(tcf_block_get); 588EXPORT_SYMBOL(tcf_block_get);
331 589
332static void tcf_block_put_final(struct work_struct *work)
333{
334 struct tcf_block *block = container_of(work, struct tcf_block, work);
335 struct tcf_chain *chain, *tmp;
336
337 rtnl_lock();
338
339 /* At this point, all the chains should have refcnt == 1. */
340 list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
341 tcf_chain_put(chain);
342 rtnl_unlock();
343 kfree(block);
344}
345
346/* XXX: Standalone actions are not allowed to jump to any chain, and bound 590/* XXX: Standalone actions are not allowed to jump to any chain, and bound
347 * actions should be all removed after flushing. 591 * actions should be all removed after flushing.
348 */ 592 */
349void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, 593void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
350 struct tcf_block_ext_info *ei) 594 struct tcf_block_ext_info *ei)
351{ 595{
352 struct tcf_chain *chain; 596 struct tcf_chain *chain, *tmp;
353 597
354 if (!block) 598 if (!block)
355 return; 599 return;
356 /* Hold a refcnt for all chains, except 0, so that they don't disappear 600 tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei);
357 * while we are iterating. 601 tcf_block_owner_del(block, q, ei->binder_type);
358 */ 602
359 list_for_each_entry(chain, &block->chain_list, list) 603 if (--block->refcnt == 0) {
360 if (chain->index) 604 if (tcf_block_shared(block))
605 tcf_block_remove(block, block->net);
606
607 /* Hold a refcnt for all chains, so that they don't disappear
608 * while we are iterating.
609 */
610 list_for_each_entry(chain, &block->chain_list, list)
361 tcf_chain_hold(chain); 611 tcf_chain_hold(chain);
362 612
363 list_for_each_entry(chain, &block->chain_list, list) 613 list_for_each_entry(chain, &block->chain_list, list)
364 tcf_chain_flush(chain); 614 tcf_chain_flush(chain);
615 }
365 616
366 tcf_block_offload_unbind(block, q, ei); 617 tcf_block_offload_unbind(block, q, ei);
367 618
368 INIT_WORK(&block->work, tcf_block_put_final); 619 if (block->refcnt == 0) {
369 /* Wait for existing RCU callbacks to cool down, make sure their works 620 /* At this point, all the chains should have refcnt >= 1. */
370 * have been queued before this. We can not flush pending works here 621 list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
371 * because we are holding the RTNL lock. 622 tcf_chain_put(chain);
372 */ 623
373 rcu_barrier(); 624 /* Finally, put chain 0 and allow block to be freed. */
374 tcf_queue_work(&block->work); 625 tcf_chain_put(tcf_block_chain_zero(block));
626 }
375} 627}
376EXPORT_SYMBOL(tcf_block_put_ext); 628EXPORT_SYMBOL(tcf_block_put_ext);
377 629
@@ -429,9 +681,16 @@ struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
429{ 681{
430 struct tcf_block_cb *block_cb; 682 struct tcf_block_cb *block_cb;
431 683
684 /* At this point, playback of previous block cb calls is not supported,
685 * so forbid to register to block which already has some offloaded
686 * filters present.
687 */
688 if (tcf_block_offload_in_use(block))
689 return ERR_PTR(-EOPNOTSUPP);
690
432 block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL); 691 block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
433 if (!block_cb) 692 if (!block_cb)
434 return NULL; 693 return ERR_PTR(-ENOMEM);
435 block_cb->cb = cb; 694 block_cb->cb = cb;
436 block_cb->cb_ident = cb_ident; 695 block_cb->cb_ident = cb_ident;
437 block_cb->cb_priv = cb_priv; 696 block_cb->cb_priv = cb_priv;
@@ -447,7 +706,7 @@ int tcf_block_cb_register(struct tcf_block *block,
447 struct tcf_block_cb *block_cb; 706 struct tcf_block_cb *block_cb;
448 707
449 block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv); 708 block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
450 return block_cb ? 0 : -ENOMEM; 709 return IS_ERR(block_cb) ? PTR_ERR(block_cb) : 0;
451} 710}
452EXPORT_SYMBOL(tcf_block_cb_register); 711EXPORT_SYMBOL(tcf_block_cb_register);
453 712
@@ -477,6 +736,10 @@ static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
477 int ok_count = 0; 736 int ok_count = 0;
478 int err; 737 int err;
479 738
739 /* Make sure all netdevs sharing this block are offload-capable. */
740 if (block->nooffloaddevcnt && err_stop)
741 return -EOPNOTSUPP;
742
480 list_for_each_entry(block_cb, &block->cb_list, list) { 743 list_for_each_entry(block_cb, &block->cb_list, list) {
481 err = block_cb->cb(type, type_data, block_cb->cb_priv); 744 err = block_cb->cb(type, type_data, block_cb->cb_priv);
482 if (err) { 745 if (err) {
@@ -530,8 +793,9 @@ reclassify:
530#ifdef CONFIG_NET_CLS_ACT 793#ifdef CONFIG_NET_CLS_ACT
531reset: 794reset:
532 if (unlikely(limit++ >= max_reclassify_loop)) { 795 if (unlikely(limit++ >= max_reclassify_loop)) {
533 net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n", 796 net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
534 tp->q->ops->id, tp->prio & 0xffff, 797 tp->chain->block->index,
798 tp->prio & 0xffff,
535 ntohs(tp->protocol)); 799 ntohs(tp->protocol));
536 return TC_ACT_SHOT; 800 return TC_ACT_SHOT;
537 } 801 }
@@ -604,8 +868,9 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
604} 868}
605 869
606static int tcf_fill_node(struct net *net, struct sk_buff *skb, 870static int tcf_fill_node(struct net *net, struct sk_buff *skb,
607 struct tcf_proto *tp, struct Qdisc *q, u32 parent, 871 struct tcf_proto *tp, struct tcf_block *block,
608 void *fh, u32 portid, u32 seq, u16 flags, int event) 872 struct Qdisc *q, u32 parent, void *fh,
873 u32 portid, u32 seq, u16 flags, int event)
609{ 874{
610 struct tcmsg *tcm; 875 struct tcmsg *tcm;
611 struct nlmsghdr *nlh; 876 struct nlmsghdr *nlh;
@@ -618,8 +883,13 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
618 tcm->tcm_family = AF_UNSPEC; 883 tcm->tcm_family = AF_UNSPEC;
619 tcm->tcm__pad1 = 0; 884 tcm->tcm__pad1 = 0;
620 tcm->tcm__pad2 = 0; 885 tcm->tcm__pad2 = 0;
621 tcm->tcm_ifindex = qdisc_dev(q)->ifindex; 886 if (q) {
622 tcm->tcm_parent = parent; 887 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
888 tcm->tcm_parent = parent;
889 } else {
890 tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
891 tcm->tcm_block_index = block->index;
892 }
623 tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); 893 tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
624 if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) 894 if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
625 goto nla_put_failure; 895 goto nla_put_failure;
@@ -642,8 +912,8 @@ nla_put_failure:
642 912
643static int tfilter_notify(struct net *net, struct sk_buff *oskb, 913static int tfilter_notify(struct net *net, struct sk_buff *oskb,
644 struct nlmsghdr *n, struct tcf_proto *tp, 914 struct nlmsghdr *n, struct tcf_proto *tp,
645 struct Qdisc *q, u32 parent, 915 struct tcf_block *block, struct Qdisc *q,
646 void *fh, int event, bool unicast) 916 u32 parent, void *fh, int event, bool unicast)
647{ 917{
648 struct sk_buff *skb; 918 struct sk_buff *skb;
649 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 919 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -652,8 +922,8 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
652 if (!skb) 922 if (!skb)
653 return -ENOBUFS; 923 return -ENOBUFS;
654 924
655 if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq, 925 if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
656 n->nlmsg_flags, event) <= 0) { 926 n->nlmsg_seq, n->nlmsg_flags, event) <= 0) {
657 kfree_skb(skb); 927 kfree_skb(skb);
658 return -EINVAL; 928 return -EINVAL;
659 } 929 }
@@ -667,8 +937,9 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
667 937
668static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, 938static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
669 struct nlmsghdr *n, struct tcf_proto *tp, 939 struct nlmsghdr *n, struct tcf_proto *tp,
670 struct Qdisc *q, u32 parent, 940 struct tcf_block *block, struct Qdisc *q,
671 void *fh, bool unicast, bool *last) 941 u32 parent, void *fh, bool unicast, bool *last,
942 struct netlink_ext_ack *extack)
672{ 943{
673 struct sk_buff *skb; 944 struct sk_buff *skb;
674 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 945 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -678,13 +949,14 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
678 if (!skb) 949 if (!skb)
679 return -ENOBUFS; 950 return -ENOBUFS;
680 951
681 if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq, 952 if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
682 n->nlmsg_flags, RTM_DELTFILTER) <= 0) { 953 n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
954 NL_SET_ERR_MSG(extack, "Failed to build del event notification");
683 kfree_skb(skb); 955 kfree_skb(skb);
684 return -EINVAL; 956 return -EINVAL;
685 } 957 }
686 958
687 err = tp->ops->delete(tp, fh, last); 959 err = tp->ops->delete(tp, fh, last, extack);
688 if (err) { 960 if (err) {
689 kfree_skb(skb); 961 kfree_skb(skb);
690 return err; 962 return err;
@@ -693,20 +965,24 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
693 if (unicast) 965 if (unicast)
694 return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); 966 return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
695 967
696 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, 968 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
697 n->nlmsg_flags & NLM_F_ECHO); 969 n->nlmsg_flags & NLM_F_ECHO);
970 if (err < 0)
971 NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
972 return err;
698} 973}
699 974
700static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, 975static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
701 struct Qdisc *q, u32 parent, 976 struct tcf_block *block, struct Qdisc *q,
702 struct nlmsghdr *n, 977 u32 parent, struct nlmsghdr *n,
703 struct tcf_chain *chain, int event) 978 struct tcf_chain *chain, int event)
704{ 979{
705 struct tcf_proto *tp; 980 struct tcf_proto *tp;
706 981
707 for (tp = rtnl_dereference(chain->filter_chain); 982 for (tp = rtnl_dereference(chain->filter_chain);
708 tp; tp = rtnl_dereference(tp->next)) 983 tp; tp = rtnl_dereference(tp->next))
709 tfilter_notify(net, oskb, n, tp, q, parent, 0, event, false); 984 tfilter_notify(net, oskb, n, tp, block,
985 q, parent, 0, event, false);
710} 986}
711 987
712/* Add/change/delete/get a filter node */ 988/* Add/change/delete/get a filter node */
@@ -722,13 +998,11 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
722 bool prio_allocate; 998 bool prio_allocate;
723 u32 parent; 999 u32 parent;
724 u32 chain_index; 1000 u32 chain_index;
725 struct net_device *dev; 1001 struct Qdisc *q = NULL;
726 struct Qdisc *q;
727 struct tcf_chain_info chain_info; 1002 struct tcf_chain_info chain_info;
728 struct tcf_chain *chain = NULL; 1003 struct tcf_chain *chain = NULL;
729 struct tcf_block *block; 1004 struct tcf_block *block;
730 struct tcf_proto *tp; 1005 struct tcf_proto *tp;
731 const struct Qdisc_class_ops *cops;
732 unsigned long cl; 1006 unsigned long cl;
733 void *fh; 1007 void *fh;
734 int err; 1008 int err;
@@ -755,8 +1029,10 @@ replay:
755 if (prio == 0) { 1029 if (prio == 0) {
756 switch (n->nlmsg_type) { 1030 switch (n->nlmsg_type) {
757 case RTM_DELTFILTER: 1031 case RTM_DELTFILTER:
758 if (protocol || t->tcm_handle || tca[TCA_KIND]) 1032 if (protocol || t->tcm_handle || tca[TCA_KIND]) {
1033 NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
759 return -ENOENT; 1034 return -ENOENT;
1035 }
760 break; 1036 break;
761 case RTM_NEWTFILTER: 1037 case RTM_NEWTFILTER:
762 /* If no priority is provided by the user, 1038 /* If no priority is provided by the user,
@@ -769,63 +1045,91 @@ replay:
769 } 1045 }
770 /* fall-through */ 1046 /* fall-through */
771 default: 1047 default:
1048 NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
772 return -ENOENT; 1049 return -ENOENT;
773 } 1050 }
774 } 1051 }
775 1052
776 /* Find head of filter chain. */ 1053 /* Find head of filter chain. */
777 1054
778 /* Find link */ 1055 if (t->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
779 dev = __dev_get_by_index(net, t->tcm_ifindex); 1056 block = tcf_block_lookup(net, t->tcm_block_index);
780 if (dev == NULL) 1057 if (!block) {
781 return -ENODEV; 1058 NL_SET_ERR_MSG(extack, "Block of given index was not found");
782 1059 err = -EINVAL;
783 /* Find qdisc */ 1060 goto errout;
784 if (!parent) { 1061 }
785 q = dev->qdisc;
786 parent = q->handle;
787 } else { 1062 } else {
788 q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent)); 1063 const struct Qdisc_class_ops *cops;
789 if (q == NULL) 1064 struct net_device *dev;
790 return -EINVAL;
791 }
792 1065
793 /* Is it classful? */ 1066 /* Find link */
794 cops = q->ops->cl_ops; 1067 dev = __dev_get_by_index(net, t->tcm_ifindex);
795 if (!cops) 1068 if (!dev)
796 return -EINVAL; 1069 return -ENODEV;
797 1070
798 if (!cops->tcf_block) 1071 /* Find qdisc */
799 return -EOPNOTSUPP; 1072 if (!parent) {
1073 q = dev->qdisc;
1074 parent = q->handle;
1075 } else {
1076 q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
1077 if (!q) {
1078 NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
1079 return -EINVAL;
1080 }
1081 }
800 1082
801 /* Do we search for filter, attached to class? */ 1083 /* Is it classful? */
802 if (TC_H_MIN(parent)) { 1084 cops = q->ops->cl_ops;
803 cl = cops->find(q, parent); 1085 if (!cops) {
804 if (cl == 0) 1086 NL_SET_ERR_MSG(extack, "Qdisc not classful");
805 return -ENOENT; 1087 return -EINVAL;
806 } 1088 }
807 1089
808 /* And the last stroke */ 1090 if (!cops->tcf_block) {
809 block = cops->tcf_block(q, cl); 1091 NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
810 if (!block) { 1092 return -EOPNOTSUPP;
811 err = -EINVAL; 1093 }
812 goto errout; 1094
1095 /* Do we search for filter, attached to class? */
1096 if (TC_H_MIN(parent)) {
1097 cl = cops->find(q, parent);
1098 if (cl == 0) {
1099 NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
1100 return -ENOENT;
1101 }
1102 }
1103
1104 /* And the last stroke */
1105 block = cops->tcf_block(q, cl, extack);
1106 if (!block) {
1107 err = -EINVAL;
1108 goto errout;
1109 }
1110 if (tcf_block_shared(block)) {
1111 NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
1112 err = -EOPNOTSUPP;
1113 goto errout;
1114 }
813 } 1115 }
814 1116
815 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; 1117 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
816 if (chain_index > TC_ACT_EXT_VAL_MASK) { 1118 if (chain_index > TC_ACT_EXT_VAL_MASK) {
1119 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
817 err = -EINVAL; 1120 err = -EINVAL;
818 goto errout; 1121 goto errout;
819 } 1122 }
820 chain = tcf_chain_get(block, chain_index, 1123 chain = tcf_chain_get(block, chain_index,
821 n->nlmsg_type == RTM_NEWTFILTER); 1124 n->nlmsg_type == RTM_NEWTFILTER);
822 if (!chain) { 1125 if (!chain) {
1126 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
823 err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL; 1127 err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL;
824 goto errout; 1128 goto errout;
825 } 1129 }
826 1130
827 if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) { 1131 if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
828 tfilter_notify_chain(net, skb, q, parent, n, 1132 tfilter_notify_chain(net, skb, block, q, parent, n,
829 chain, RTM_DELTFILTER); 1133 chain, RTM_DELTFILTER);
830 tcf_chain_flush(chain); 1134 tcf_chain_flush(chain);
831 err = 0; 1135 err = 0;
@@ -835,6 +1139,7 @@ replay:
835 tp = tcf_chain_tp_find(chain, &chain_info, protocol, 1139 tp = tcf_chain_tp_find(chain, &chain_info, protocol,
836 prio, prio_allocate); 1140 prio, prio_allocate);
837 if (IS_ERR(tp)) { 1141 if (IS_ERR(tp)) {
1142 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
838 err = PTR_ERR(tp); 1143 err = PTR_ERR(tp);
839 goto errout; 1144 goto errout;
840 } 1145 }
@@ -843,12 +1148,14 @@ replay:
843 /* Proto-tcf does not exist, create new one */ 1148 /* Proto-tcf does not exist, create new one */
844 1149
845 if (tca[TCA_KIND] == NULL || !protocol) { 1150 if (tca[TCA_KIND] == NULL || !protocol) {
1151 NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
846 err = -EINVAL; 1152 err = -EINVAL;
847 goto errout; 1153 goto errout;
848 } 1154 }
849 1155
850 if (n->nlmsg_type != RTM_NEWTFILTER || 1156 if (n->nlmsg_type != RTM_NEWTFILTER ||
851 !(n->nlmsg_flags & NLM_F_CREATE)) { 1157 !(n->nlmsg_flags & NLM_F_CREATE)) {
1158 NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
852 err = -ENOENT; 1159 err = -ENOENT;
853 goto errout; 1160 goto errout;
854 } 1161 }
@@ -857,13 +1164,14 @@ replay:
857 prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info)); 1164 prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
858 1165
859 tp = tcf_proto_create(nla_data(tca[TCA_KIND]), 1166 tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
860 protocol, prio, parent, q, chain); 1167 protocol, prio, chain, extack);
861 if (IS_ERR(tp)) { 1168 if (IS_ERR(tp)) {
862 err = PTR_ERR(tp); 1169 err = PTR_ERR(tp);
863 goto errout; 1170 goto errout;
864 } 1171 }
865 tp_created = 1; 1172 tp_created = 1;
866 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { 1173 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
1174 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
867 err = -EINVAL; 1175 err = -EINVAL;
868 goto errout; 1176 goto errout;
869 } 1177 }
@@ -873,15 +1181,16 @@ replay:
873 if (!fh) { 1181 if (!fh) {
874 if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { 1182 if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
875 tcf_chain_tp_remove(chain, &chain_info, tp); 1183 tcf_chain_tp_remove(chain, &chain_info, tp);
876 tfilter_notify(net, skb, n, tp, q, parent, fh, 1184 tfilter_notify(net, skb, n, tp, block, q, parent, fh,
877 RTM_DELTFILTER, false); 1185 RTM_DELTFILTER, false);
878 tcf_proto_destroy(tp); 1186 tcf_proto_destroy(tp, extack);
879 err = 0; 1187 err = 0;
880 goto errout; 1188 goto errout;
881 } 1189 }
882 1190
883 if (n->nlmsg_type != RTM_NEWTFILTER || 1191 if (n->nlmsg_type != RTM_NEWTFILTER ||
884 !(n->nlmsg_flags & NLM_F_CREATE)) { 1192 !(n->nlmsg_flags & NLM_F_CREATE)) {
1193 NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
885 err = -ENOENT; 1194 err = -ENOENT;
886 goto errout; 1195 goto errout;
887 } 1196 }
@@ -892,41 +1201,47 @@ replay:
892 case RTM_NEWTFILTER: 1201 case RTM_NEWTFILTER:
893 if (n->nlmsg_flags & NLM_F_EXCL) { 1202 if (n->nlmsg_flags & NLM_F_EXCL) {
894 if (tp_created) 1203 if (tp_created)
895 tcf_proto_destroy(tp); 1204 tcf_proto_destroy(tp, NULL);
1205 NL_SET_ERR_MSG(extack, "Filter already exists");
896 err = -EEXIST; 1206 err = -EEXIST;
897 goto errout; 1207 goto errout;
898 } 1208 }
899 break; 1209 break;
900 case RTM_DELTFILTER: 1210 case RTM_DELTFILTER:
901 err = tfilter_del_notify(net, skb, n, tp, q, parent, 1211 err = tfilter_del_notify(net, skb, n, tp, block,
902 fh, false, &last); 1212 q, parent, fh, false, &last,
1213 extack);
903 if (err) 1214 if (err)
904 goto errout; 1215 goto errout;
905 if (last) { 1216 if (last) {
906 tcf_chain_tp_remove(chain, &chain_info, tp); 1217 tcf_chain_tp_remove(chain, &chain_info, tp);
907 tcf_proto_destroy(tp); 1218 tcf_proto_destroy(tp, extack);
908 } 1219 }
909 goto errout; 1220 goto errout;
910 case RTM_GETTFILTER: 1221 case RTM_GETTFILTER:
911 err = tfilter_notify(net, skb, n, tp, q, parent, fh, 1222 err = tfilter_notify(net, skb, n, tp, block, q, parent,
912 RTM_NEWTFILTER, true); 1223 fh, RTM_NEWTFILTER, true);
1224 if (err < 0)
1225 NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
913 goto errout; 1226 goto errout;
914 default: 1227 default:
1228 NL_SET_ERR_MSG(extack, "Invalid netlink message type");
915 err = -EINVAL; 1229 err = -EINVAL;
916 goto errout; 1230 goto errout;
917 } 1231 }
918 } 1232 }
919 1233
920 err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, 1234 err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
921 n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE); 1235 n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
1236 extack);
922 if (err == 0) { 1237 if (err == 0) {
923 if (tp_created) 1238 if (tp_created)
924 tcf_chain_tp_insert(chain, &chain_info, tp); 1239 tcf_chain_tp_insert(chain, &chain_info, tp);
925 tfilter_notify(net, skb, n, tp, q, parent, fh, 1240 tfilter_notify(net, skb, n, tp, block, q, parent, fh,
926 RTM_NEWTFILTER, false); 1241 RTM_NEWTFILTER, false);
927 } else { 1242 } else {
928 if (tp_created) 1243 if (tp_created)
929 tcf_proto_destroy(tp); 1244 tcf_proto_destroy(tp, NULL);
930 } 1245 }
931 1246
932errout: 1247errout:
@@ -942,6 +1257,7 @@ struct tcf_dump_args {
942 struct tcf_walker w; 1257 struct tcf_walker w;
943 struct sk_buff *skb; 1258 struct sk_buff *skb;
944 struct netlink_callback *cb; 1259 struct netlink_callback *cb;
1260 struct tcf_block *block;
945 struct Qdisc *q; 1261 struct Qdisc *q;
946 u32 parent; 1262 u32 parent;
947}; 1263};
@@ -951,7 +1267,7 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
951 struct tcf_dump_args *a = (void *)arg; 1267 struct tcf_dump_args *a = (void *)arg;
952 struct net *net = sock_net(a->skb->sk); 1268 struct net *net = sock_net(a->skb->sk);
953 1269
954 return tcf_fill_node(net, a->skb, tp, a->q, a->parent, 1270 return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
955 n, NETLINK_CB(a->cb->skb).portid, 1271 n, NETLINK_CB(a->cb->skb).portid,
956 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, 1272 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
957 RTM_NEWTFILTER); 1273 RTM_NEWTFILTER);
@@ -962,6 +1278,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
962 long index_start, long *p_index) 1278 long index_start, long *p_index)
963{ 1279{
964 struct net *net = sock_net(skb->sk); 1280 struct net *net = sock_net(skb->sk);
1281 struct tcf_block *block = chain->block;
965 struct tcmsg *tcm = nlmsg_data(cb->nlh); 1282 struct tcmsg *tcm = nlmsg_data(cb->nlh);
966 struct tcf_dump_args arg; 1283 struct tcf_dump_args arg;
967 struct tcf_proto *tp; 1284 struct tcf_proto *tp;
@@ -980,7 +1297,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
980 memset(&cb->args[1], 0, 1297 memset(&cb->args[1], 0,
981 sizeof(cb->args) - sizeof(cb->args[0])); 1298 sizeof(cb->args) - sizeof(cb->args[0]));
982 if (cb->args[1] == 0) { 1299 if (cb->args[1] == 0) {
983 if (tcf_fill_node(net, skb, tp, q, parent, 0, 1300 if (tcf_fill_node(net, skb, tp, block, q, parent, 0,
984 NETLINK_CB(cb->skb).portid, 1301 NETLINK_CB(cb->skb).portid,
985 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1302 cb->nlh->nlmsg_seq, NLM_F_MULTI,
986 RTM_NEWTFILTER) <= 0) 1303 RTM_NEWTFILTER) <= 0)
@@ -993,6 +1310,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
993 arg.w.fn = tcf_node_dump; 1310 arg.w.fn = tcf_node_dump;
994 arg.skb = skb; 1311 arg.skb = skb;
995 arg.cb = cb; 1312 arg.cb = cb;
1313 arg.block = block;
996 arg.q = q; 1314 arg.q = q;
997 arg.parent = parent; 1315 arg.parent = parent;
998 arg.w.stop = 0; 1316 arg.w.stop = 0;
@@ -1011,13 +1329,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
1011{ 1329{
1012 struct net *net = sock_net(skb->sk); 1330 struct net *net = sock_net(skb->sk);
1013 struct nlattr *tca[TCA_MAX + 1]; 1331 struct nlattr *tca[TCA_MAX + 1];
1014 struct net_device *dev; 1332 struct Qdisc *q = NULL;
1015 struct Qdisc *q;
1016 struct tcf_block *block; 1333 struct tcf_block *block;
1017 struct tcf_chain *chain; 1334 struct tcf_chain *chain;
1018 struct tcmsg *tcm = nlmsg_data(cb->nlh); 1335 struct tcmsg *tcm = nlmsg_data(cb->nlh);
1019 unsigned long cl = 0;
1020 const struct Qdisc_class_ops *cops;
1021 long index_start; 1336 long index_start;
1022 long index; 1337 long index;
1023 u32 parent; 1338 u32 parent;
@@ -1030,32 +1345,51 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
1030 if (err) 1345 if (err)
1031 return err; 1346 return err;
1032 1347
1033 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1348 if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
1034 if (!dev) 1349 block = tcf_block_lookup(net, tcm->tcm_block_index);
1035 return skb->len; 1350 if (!block)
1036 1351 goto out;
1037 parent = tcm->tcm_parent; 1352 /* If we work with block index, q is NULL and parent value
1038 if (!parent) { 1353 * will never be used in the following code. The check
1039 q = dev->qdisc; 1354 * in tcf_fill_node prevents it. However, compiler does not
1040 parent = q->handle; 1355 * see that far, so set parent to zero to silence the warning
1356 * about parent being uninitialized.
1357 */
1358 parent = 0;
1041 } else { 1359 } else {
1042 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); 1360 const struct Qdisc_class_ops *cops;
1043 } 1361 struct net_device *dev;
1044 if (!q) 1362 unsigned long cl = 0;
1045 goto out; 1363
1046 cops = q->ops->cl_ops; 1364 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1047 if (!cops) 1365 if (!dev)
1048 goto out; 1366 return skb->len;
1049 if (!cops->tcf_block) 1367
1050 goto out; 1368 parent = tcm->tcm_parent;
1051 if (TC_H_MIN(tcm->tcm_parent)) { 1369 if (!parent) {
1052 cl = cops->find(q, tcm->tcm_parent); 1370 q = dev->qdisc;
1053 if (cl == 0) 1371 parent = q->handle;
1372 } else {
1373 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
1374 }
1375 if (!q)
1376 goto out;
1377 cops = q->ops->cl_ops;
1378 if (!cops)
1379 goto out;
1380 if (!cops->tcf_block)
1381 goto out;
1382 if (TC_H_MIN(tcm->tcm_parent)) {
1383 cl = cops->find(q, tcm->tcm_parent);
1384 if (cl == 0)
1385 goto out;
1386 }
1387 block = cops->tcf_block(q, cl, NULL);
1388 if (!block)
1054 goto out; 1389 goto out;
1390 if (tcf_block_shared(block))
1391 q = NULL;
1055 } 1392 }
1056 block = cops->tcf_block(q, cl);
1057 if (!block)
1058 goto out;
1059 1393
1060 index_start = cb->args[0]; 1394 index_start = cb->args[0];
1061 index = 0; 1395 index = 0;
@@ -1090,7 +1424,8 @@ void tcf_exts_destroy(struct tcf_exts *exts)
1090EXPORT_SYMBOL(tcf_exts_destroy); 1424EXPORT_SYMBOL(tcf_exts_destroy);
1091 1425
1092int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, 1426int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
1093 struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr) 1427 struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
1428 struct netlink_ext_ack *extack)
1094{ 1429{
1095#ifdef CONFIG_NET_CLS_ACT 1430#ifdef CONFIG_NET_CLS_ACT
1096 { 1431 {
@@ -1123,8 +1458,10 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
1123 } 1458 }
1124#else 1459#else
1125 if ((exts->action && tb[exts->action]) || 1460 if ((exts->action && tb[exts->action]) ||
1126 (exts->police && tb[exts->police])) 1461 (exts->police && tb[exts->police])) {
1462 NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
1127 return -EOPNOTSUPP; 1463 return -EOPNOTSUPP;
1464 }
1128#endif 1465#endif
1129 1466
1130 return 0; 1467 return 0;
@@ -1258,18 +1595,50 @@ int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
1258} 1595}
1259EXPORT_SYMBOL(tc_setup_cb_call); 1596EXPORT_SYMBOL(tc_setup_cb_call);
1260 1597
1598static __net_init int tcf_net_init(struct net *net)
1599{
1600 struct tcf_net *tn = net_generic(net, tcf_net_id);
1601
1602 idr_init(&tn->idr);
1603 return 0;
1604}
1605
1606static void __net_exit tcf_net_exit(struct net *net)
1607{
1608 struct tcf_net *tn = net_generic(net, tcf_net_id);
1609
1610 idr_destroy(&tn->idr);
1611}
1612
1613static struct pernet_operations tcf_net_ops = {
1614 .init = tcf_net_init,
1615 .exit = tcf_net_exit,
1616 .id = &tcf_net_id,
1617 .size = sizeof(struct tcf_net),
1618};
1619
1261static int __init tc_filter_init(void) 1620static int __init tc_filter_init(void)
1262{ 1621{
1622 int err;
1623
1263 tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0); 1624 tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
1264 if (!tc_filter_wq) 1625 if (!tc_filter_wq)
1265 return -ENOMEM; 1626 return -ENOMEM;
1266 1627
1628 err = register_pernet_subsys(&tcf_net_ops);
1629 if (err)
1630 goto err_register_pernet_subsys;
1631
1267 rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0); 1632 rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
1268 rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0); 1633 rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
1269 rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter, 1634 rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
1270 tc_dump_tfilter, 0); 1635 tc_dump_tfilter, 0);
1271 1636
1272 return 0; 1637 return 0;
1638
1639err_register_pernet_subsys:
1640 destroy_workqueue(tc_filter_wq);
1641 return err;
1273} 1642}
1274 1643
1275subsys_initcall(tc_filter_init); 1644subsys_initcall(tc_filter_init);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 5f169ded347e..6b7ab3512f5b 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -112,7 +112,7 @@ static void basic_delete_filter(struct rcu_head *head)
112 tcf_queue_work(&f->work); 112 tcf_queue_work(&f->work);
113} 113}
114 114
115static void basic_destroy(struct tcf_proto *tp) 115static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
116{ 116{
117 struct basic_head *head = rtnl_dereference(tp->root); 117 struct basic_head *head = rtnl_dereference(tp->root);
118 struct basic_filter *f, *n; 118 struct basic_filter *f, *n;
@@ -120,7 +120,7 @@ static void basic_destroy(struct tcf_proto *tp)
120 list_for_each_entry_safe(f, n, &head->flist, link) { 120 list_for_each_entry_safe(f, n, &head->flist, link) {
121 list_del_rcu(&f->link); 121 list_del_rcu(&f->link);
122 tcf_unbind_filter(tp, &f->res); 122 tcf_unbind_filter(tp, &f->res);
123 idr_remove_ext(&head->handle_idr, f->handle); 123 idr_remove(&head->handle_idr, f->handle);
124 if (tcf_exts_get_net(&f->exts)) 124 if (tcf_exts_get_net(&f->exts))
125 call_rcu(&f->rcu, basic_delete_filter); 125 call_rcu(&f->rcu, basic_delete_filter);
126 else 126 else
@@ -130,14 +130,15 @@ static void basic_destroy(struct tcf_proto *tp)
130 kfree_rcu(head, rcu); 130 kfree_rcu(head, rcu);
131} 131}
132 132
133static int basic_delete(struct tcf_proto *tp, void *arg, bool *last) 133static int basic_delete(struct tcf_proto *tp, void *arg, bool *last,
134 struct netlink_ext_ack *extack)
134{ 135{
135 struct basic_head *head = rtnl_dereference(tp->root); 136 struct basic_head *head = rtnl_dereference(tp->root);
136 struct basic_filter *f = arg; 137 struct basic_filter *f = arg;
137 138
138 list_del_rcu(&f->link); 139 list_del_rcu(&f->link);
139 tcf_unbind_filter(tp, &f->res); 140 tcf_unbind_filter(tp, &f->res);
140 idr_remove_ext(&head->handle_idr, f->handle); 141 idr_remove(&head->handle_idr, f->handle);
141 tcf_exts_get_net(&f->exts); 142 tcf_exts_get_net(&f->exts);
142 call_rcu(&f->rcu, basic_delete_filter); 143 call_rcu(&f->rcu, basic_delete_filter);
143 *last = list_empty(&head->flist); 144 *last = list_empty(&head->flist);
@@ -152,11 +153,12 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
152static int basic_set_parms(struct net *net, struct tcf_proto *tp, 153static int basic_set_parms(struct net *net, struct tcf_proto *tp,
153 struct basic_filter *f, unsigned long base, 154 struct basic_filter *f, unsigned long base,
154 struct nlattr **tb, 155 struct nlattr **tb,
155 struct nlattr *est, bool ovr) 156 struct nlattr *est, bool ovr,
157 struct netlink_ext_ack *extack)
156{ 158{
157 int err; 159 int err;
158 160
159 err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr); 161 err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
160 if (err < 0) 162 if (err < 0)
161 return err; 163 return err;
162 164
@@ -175,14 +177,14 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
175 177
176static int basic_change(struct net *net, struct sk_buff *in_skb, 178static int basic_change(struct net *net, struct sk_buff *in_skb,
177 struct tcf_proto *tp, unsigned long base, u32 handle, 179 struct tcf_proto *tp, unsigned long base, u32 handle,
178 struct nlattr **tca, void **arg, bool ovr) 180 struct nlattr **tca, void **arg, bool ovr,
181 struct netlink_ext_ack *extack)
179{ 182{
180 int err; 183 int err;
181 struct basic_head *head = rtnl_dereference(tp->root); 184 struct basic_head *head = rtnl_dereference(tp->root);
182 struct nlattr *tb[TCA_BASIC_MAX + 1]; 185 struct nlattr *tb[TCA_BASIC_MAX + 1];
183 struct basic_filter *fold = (struct basic_filter *) *arg; 186 struct basic_filter *fold = (struct basic_filter *) *arg;
184 struct basic_filter *fnew; 187 struct basic_filter *fnew;
185 unsigned long idr_index;
186 188
187 if (tca[TCA_OPTIONS] == NULL) 189 if (tca[TCA_OPTIONS] == NULL)
188 return -EINVAL; 190 return -EINVAL;
@@ -205,33 +207,30 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
205 if (err < 0) 207 if (err < 0)
206 goto errout; 208 goto errout;
207 209
208 if (handle) { 210 if (!handle) {
209 fnew->handle = handle; 211 handle = 1;
210 if (!fold) { 212 err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
211 err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index, 213 INT_MAX, GFP_KERNEL);
212 handle, handle + 1, GFP_KERNEL); 214 } else if (!fold) {
213 if (err) 215 err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
214 goto errout; 216 handle, GFP_KERNEL);
215 }
216 } else {
217 err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
218 1, 0x7FFFFFFF, GFP_KERNEL);
219 if (err)
220 goto errout;
221 fnew->handle = idr_index;
222 } 217 }
218 if (err)
219 goto errout;
220 fnew->handle = handle;
223 221
224 err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr); 222 err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr,
223 extack);
225 if (err < 0) { 224 if (err < 0) {
226 if (!fold) 225 if (!fold)
227 idr_remove_ext(&head->handle_idr, fnew->handle); 226 idr_remove(&head->handle_idr, fnew->handle);
228 goto errout; 227 goto errout;
229 } 228 }
230 229
231 *arg = fnew; 230 *arg = fnew;
232 231
233 if (fold) { 232 if (fold) {
234 idr_replace_ext(&head->handle_idr, fnew, fnew->handle); 233 idr_replace(&head->handle_idr, fnew, fnew->handle);
235 list_replace_rcu(&fold->link, &fnew->link); 234 list_replace_rcu(&fold->link, &fnew->link);
236 tcf_unbind_filter(tp, &fold->res); 235 tcf_unbind_filter(tp, &fold->res);
237 tcf_exts_get_net(&fold->exts); 236 tcf_exts_get_net(&fold->exts);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index a62586e2dbdb..b07c1fa8bc0d 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -147,7 +147,8 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
147} 147}
148 148
149static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, 149static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
150 struct cls_bpf_prog *oldprog) 150 struct cls_bpf_prog *oldprog,
151 struct netlink_ext_ack *extack)
151{ 152{
152 struct tcf_block *block = tp->chain->block; 153 struct tcf_block *block = tp->chain->block;
153 struct tc_cls_bpf_offload cls_bpf = {}; 154 struct tc_cls_bpf_offload cls_bpf = {};
@@ -158,22 +159,25 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
158 skip_sw = prog && tc_skip_sw(prog->gen_flags); 159 skip_sw = prog && tc_skip_sw(prog->gen_flags);
159 obj = prog ?: oldprog; 160 obj = prog ?: oldprog;
160 161
161 tc_cls_common_offload_init(&cls_bpf.common, tp); 162 tc_cls_common_offload_init(&cls_bpf.common, tp, obj->gen_flags,
163 extack);
162 cls_bpf.command = TC_CLSBPF_OFFLOAD; 164 cls_bpf.command = TC_CLSBPF_OFFLOAD;
163 cls_bpf.exts = &obj->exts; 165 cls_bpf.exts = &obj->exts;
164 cls_bpf.prog = prog ? prog->filter : NULL; 166 cls_bpf.prog = prog ? prog->filter : NULL;
165 cls_bpf.oldprog = oldprog ? oldprog->filter : NULL; 167 cls_bpf.oldprog = oldprog ? oldprog->filter : NULL;
166 cls_bpf.name = obj->bpf_name; 168 cls_bpf.name = obj->bpf_name;
167 cls_bpf.exts_integrated = obj->exts_integrated; 169 cls_bpf.exts_integrated = obj->exts_integrated;
168 cls_bpf.gen_flags = obj->gen_flags; 170
171 if (oldprog)
172 tcf_block_offload_dec(block, &oldprog->gen_flags);
169 173
170 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); 174 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
171 if (prog) { 175 if (prog) {
172 if (err < 0) { 176 if (err < 0) {
173 cls_bpf_offload_cmd(tp, oldprog, prog); 177 cls_bpf_offload_cmd(tp, oldprog, prog, extack);
174 return err; 178 return err;
175 } else if (err > 0) { 179 } else if (err > 0) {
176 prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; 180 tcf_block_offload_inc(block, &prog->gen_flags);
177 } 181 }
178 } 182 }
179 183
@@ -189,7 +193,8 @@ static u32 cls_bpf_flags(u32 flags)
189} 193}
190 194
191static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, 195static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
192 struct cls_bpf_prog *oldprog) 196 struct cls_bpf_prog *oldprog,
197 struct netlink_ext_ack *extack)
193{ 198{
194 if (prog && oldprog && 199 if (prog && oldprog &&
195 cls_bpf_flags(prog->gen_flags) != 200 cls_bpf_flags(prog->gen_flags) !=
@@ -203,15 +208,16 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
203 if (!prog && !oldprog) 208 if (!prog && !oldprog)
204 return 0; 209 return 0;
205 210
206 return cls_bpf_offload_cmd(tp, prog, oldprog); 211 return cls_bpf_offload_cmd(tp, prog, oldprog, extack);
207} 212}
208 213
209static void cls_bpf_stop_offload(struct tcf_proto *tp, 214static void cls_bpf_stop_offload(struct tcf_proto *tp,
210 struct cls_bpf_prog *prog) 215 struct cls_bpf_prog *prog,
216 struct netlink_ext_ack *extack)
211{ 217{
212 int err; 218 int err;
213 219
214 err = cls_bpf_offload_cmd(tp, NULL, prog); 220 err = cls_bpf_offload_cmd(tp, NULL, prog, extack);
215 if (err) 221 if (err)
216 pr_err("Stopping hardware offload failed: %d\n", err); 222 pr_err("Stopping hardware offload failed: %d\n", err);
217} 223}
@@ -222,13 +228,12 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
222 struct tcf_block *block = tp->chain->block; 228 struct tcf_block *block = tp->chain->block;
223 struct tc_cls_bpf_offload cls_bpf = {}; 229 struct tc_cls_bpf_offload cls_bpf = {};
224 230
225 tc_cls_common_offload_init(&cls_bpf.common, tp); 231 tc_cls_common_offload_init(&cls_bpf.common, tp, prog->gen_flags, NULL);
226 cls_bpf.command = TC_CLSBPF_STATS; 232 cls_bpf.command = TC_CLSBPF_STATS;
227 cls_bpf.exts = &prog->exts; 233 cls_bpf.exts = &prog->exts;
228 cls_bpf.prog = prog->filter; 234 cls_bpf.prog = prog->filter;
229 cls_bpf.name = prog->bpf_name; 235 cls_bpf.name = prog->bpf_name;
230 cls_bpf.exts_integrated = prog->exts_integrated; 236 cls_bpf.exts_integrated = prog->exts_integrated;
231 cls_bpf.gen_flags = prog->gen_flags;
232 237
233 tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false); 238 tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
234} 239}
@@ -285,12 +290,13 @@ static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
285 tcf_queue_work(&prog->work); 290 tcf_queue_work(&prog->work);
286} 291}
287 292
288static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) 293static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog,
294 struct netlink_ext_ack *extack)
289{ 295{
290 struct cls_bpf_head *head = rtnl_dereference(tp->root); 296 struct cls_bpf_head *head = rtnl_dereference(tp->root);
291 297
292 idr_remove_ext(&head->handle_idr, prog->handle); 298 idr_remove(&head->handle_idr, prog->handle);
293 cls_bpf_stop_offload(tp, prog); 299 cls_bpf_stop_offload(tp, prog, extack);
294 list_del_rcu(&prog->link); 300 list_del_rcu(&prog->link);
295 tcf_unbind_filter(tp, &prog->res); 301 tcf_unbind_filter(tp, &prog->res);
296 if (tcf_exts_get_net(&prog->exts)) 302 if (tcf_exts_get_net(&prog->exts))
@@ -299,22 +305,24 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
299 __cls_bpf_delete_prog(prog); 305 __cls_bpf_delete_prog(prog);
300} 306}
301 307
302static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last) 308static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last,
309 struct netlink_ext_ack *extack)
303{ 310{
304 struct cls_bpf_head *head = rtnl_dereference(tp->root); 311 struct cls_bpf_head *head = rtnl_dereference(tp->root);
305 312
306 __cls_bpf_delete(tp, arg); 313 __cls_bpf_delete(tp, arg, extack);
307 *last = list_empty(&head->plist); 314 *last = list_empty(&head->plist);
308 return 0; 315 return 0;
309} 316}
310 317
311static void cls_bpf_destroy(struct tcf_proto *tp) 318static void cls_bpf_destroy(struct tcf_proto *tp,
319 struct netlink_ext_ack *extack)
312{ 320{
313 struct cls_bpf_head *head = rtnl_dereference(tp->root); 321 struct cls_bpf_head *head = rtnl_dereference(tp->root);
314 struct cls_bpf_prog *prog, *tmp; 322 struct cls_bpf_prog *prog, *tmp;
315 323
316 list_for_each_entry_safe(prog, tmp, &head->plist, link) 324 list_for_each_entry_safe(prog, tmp, &head->plist, link)
317 __cls_bpf_delete(tp, prog); 325 __cls_bpf_delete(tp, prog, extack);
318 326
319 idr_destroy(&head->handle_idr); 327 idr_destroy(&head->handle_idr);
320 kfree_rcu(head, rcu); 328 kfree_rcu(head, rcu);
@@ -399,15 +407,16 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
399 prog->bpf_name = name; 407 prog->bpf_name = name;
400 prog->filter = fp; 408 prog->filter = fp;
401 409
402 if (fp->dst_needed && !(tp->q->flags & TCQ_F_INGRESS)) 410 if (fp->dst_needed)
403 netif_keep_dst(qdisc_dev(tp->q)); 411 tcf_block_netif_keep_dst(tp->chain->block);
404 412
405 return 0; 413 return 0;
406} 414}
407 415
408static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, 416static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
409 struct cls_bpf_prog *prog, unsigned long base, 417 struct cls_bpf_prog *prog, unsigned long base,
410 struct nlattr **tb, struct nlattr *est, bool ovr) 418 struct nlattr **tb, struct nlattr *est, bool ovr,
419 struct netlink_ext_ack *extack)
411{ 420{
412 bool is_bpf, is_ebpf, have_exts = false; 421 bool is_bpf, is_ebpf, have_exts = false;
413 u32 gen_flags = 0; 422 u32 gen_flags = 0;
@@ -418,7 +427,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
418 if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) 427 if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
419 return -EINVAL; 428 return -EINVAL;
420 429
421 ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr); 430 ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, extack);
422 if (ret < 0) 431 if (ret < 0)
423 return ret; 432 return ret;
424 433
@@ -456,13 +465,12 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
456static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, 465static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
457 struct tcf_proto *tp, unsigned long base, 466 struct tcf_proto *tp, unsigned long base,
458 u32 handle, struct nlattr **tca, 467 u32 handle, struct nlattr **tca,
459 void **arg, bool ovr) 468 void **arg, bool ovr, struct netlink_ext_ack *extack)
460{ 469{
461 struct cls_bpf_head *head = rtnl_dereference(tp->root); 470 struct cls_bpf_head *head = rtnl_dereference(tp->root);
462 struct cls_bpf_prog *oldprog = *arg; 471 struct cls_bpf_prog *oldprog = *arg;
463 struct nlattr *tb[TCA_BPF_MAX + 1]; 472 struct nlattr *tb[TCA_BPF_MAX + 1];
464 struct cls_bpf_prog *prog; 473 struct cls_bpf_prog *prog;
465 unsigned long idr_index;
466 int ret; 474 int ret;
467 475
468 if (tca[TCA_OPTIONS] == NULL) 476 if (tca[TCA_OPTIONS] == NULL)
@@ -489,26 +497,24 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
489 } 497 }
490 498
491 if (handle == 0) { 499 if (handle == 0) {
492 ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index, 500 handle = 1;
493 1, 0x7FFFFFFF, GFP_KERNEL); 501 ret = idr_alloc_u32(&head->handle_idr, prog, &handle,
494 if (ret) 502 INT_MAX, GFP_KERNEL);
495 goto errout; 503 } else if (!oldprog) {
496 prog->handle = idr_index; 504 ret = idr_alloc_u32(&head->handle_idr, prog, &handle,
497 } else { 505 handle, GFP_KERNEL);
498 if (!oldprog) {
499 ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index,
500 handle, handle + 1, GFP_KERNEL);
501 if (ret)
502 goto errout;
503 }
504 prog->handle = handle;
505 } 506 }
506 507
507 ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr); 508 if (ret)
509 goto errout;
510 prog->handle = handle;
511
512 ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr,
513 extack);
508 if (ret < 0) 514 if (ret < 0)
509 goto errout_idr; 515 goto errout_idr;
510 516
511 ret = cls_bpf_offload(tp, prog, oldprog); 517 ret = cls_bpf_offload(tp, prog, oldprog, extack);
512 if (ret) 518 if (ret)
513 goto errout_parms; 519 goto errout_parms;
514 520
@@ -516,7 +522,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
516 prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; 522 prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
517 523
518 if (oldprog) { 524 if (oldprog) {
519 idr_replace_ext(&head->handle_idr, prog, handle); 525 idr_replace(&head->handle_idr, prog, handle);
520 list_replace_rcu(&oldprog->link, &prog->link); 526 list_replace_rcu(&oldprog->link, &prog->link);
521 tcf_unbind_filter(tp, &oldprog->res); 527 tcf_unbind_filter(tp, &oldprog->res);
522 tcf_exts_get_net(&oldprog->exts); 528 tcf_exts_get_net(&oldprog->exts);
@@ -532,7 +538,7 @@ errout_parms:
532 cls_bpf_free_parms(prog); 538 cls_bpf_free_parms(prog);
533errout_idr: 539errout_idr:
534 if (!oldprog) 540 if (!oldprog)
535 idr_remove_ext(&head->handle_idr, prog->handle); 541 idr_remove(&head->handle_idr, prog->handle);
536errout: 542errout:
537 tcf_exts_destroy(&prog->exts); 543 tcf_exts_destroy(&prog->exts);
538 kfree(prog); 544 kfree(prog);
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 309d5899265f..762da5c0cf5e 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -91,7 +91,8 @@ static void cls_cgroup_destroy_rcu(struct rcu_head *root)
91static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, 91static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
92 struct tcf_proto *tp, unsigned long base, 92 struct tcf_proto *tp, unsigned long base,
93 u32 handle, struct nlattr **tca, 93 u32 handle, struct nlattr **tca,
94 void **arg, bool ovr) 94 void **arg, bool ovr,
95 struct netlink_ext_ack *extack)
95{ 96{
96 struct nlattr *tb[TCA_CGROUP_MAX + 1]; 97 struct nlattr *tb[TCA_CGROUP_MAX + 1];
97 struct cls_cgroup_head *head = rtnl_dereference(tp->root); 98 struct cls_cgroup_head *head = rtnl_dereference(tp->root);
@@ -121,7 +122,8 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
121 if (err < 0) 122 if (err < 0)
122 goto errout; 123 goto errout;
123 124
124 err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr); 125 err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr,
126 extack);
125 if (err < 0) 127 if (err < 0)
126 goto errout; 128 goto errout;
127 129
@@ -141,7 +143,8 @@ errout:
141 return err; 143 return err;
142} 144}
143 145
144static void cls_cgroup_destroy(struct tcf_proto *tp) 146static void cls_cgroup_destroy(struct tcf_proto *tp,
147 struct netlink_ext_ack *extack)
145{ 148{
146 struct cls_cgroup_head *head = rtnl_dereference(tp->root); 149 struct cls_cgroup_head *head = rtnl_dereference(tp->root);
147 150
@@ -154,7 +157,8 @@ static void cls_cgroup_destroy(struct tcf_proto *tp)
154 } 157 }
155} 158}
156 159
157static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last) 160static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last,
161 struct netlink_ext_ack *extack)
158{ 162{
159 return -EOPNOTSUPP; 163 return -EOPNOTSUPP;
160} 164}
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 25c2a888e1f0..cd5fe383afdd 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -401,7 +401,7 @@ static void flow_destroy_filter(struct rcu_head *head)
401static int flow_change(struct net *net, struct sk_buff *in_skb, 401static int flow_change(struct net *net, struct sk_buff *in_skb,
402 struct tcf_proto *tp, unsigned long base, 402 struct tcf_proto *tp, unsigned long base,
403 u32 handle, struct nlattr **tca, 403 u32 handle, struct nlattr **tca,
404 void **arg, bool ovr) 404 void **arg, bool ovr, struct netlink_ext_ack *extack)
405{ 405{
406 struct flow_head *head = rtnl_dereference(tp->root); 406 struct flow_head *head = rtnl_dereference(tp->root);
407 struct flow_filter *fold, *fnew; 407 struct flow_filter *fold, *fnew;
@@ -454,7 +454,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
454 if (err < 0) 454 if (err < 0)
455 goto err2; 455 goto err2;
456 456
457 err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr); 457 err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr,
458 extack);
458 if (err < 0) 459 if (err < 0)
459 goto err2; 460 goto err2;
460 461
@@ -526,7 +527,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
526 527
527 timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE); 528 timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE);
528 529
529 netif_keep_dst(qdisc_dev(tp->q)); 530 tcf_block_netif_keep_dst(tp->chain->block);
530 531
531 if (tb[TCA_FLOW_KEYS]) { 532 if (tb[TCA_FLOW_KEYS]) {
532 fnew->keymask = keymask; 533 fnew->keymask = keymask;
@@ -574,7 +575,8 @@ err1:
574 return err; 575 return err;
575} 576}
576 577
577static int flow_delete(struct tcf_proto *tp, void *arg, bool *last) 578static int flow_delete(struct tcf_proto *tp, void *arg, bool *last,
579 struct netlink_ext_ack *extack)
578{ 580{
579 struct flow_head *head = rtnl_dereference(tp->root); 581 struct flow_head *head = rtnl_dereference(tp->root);
580 struct flow_filter *f = arg; 582 struct flow_filter *f = arg;
@@ -598,7 +600,7 @@ static int flow_init(struct tcf_proto *tp)
598 return 0; 600 return 0;
599} 601}
600 602
601static void flow_destroy(struct tcf_proto *tp) 603static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
602{ 604{
603 struct flow_head *head = rtnl_dereference(tp->root); 605 struct flow_head *head = rtnl_dereference(tp->root);
604 struct flow_filter *f, *next; 606 struct flow_filter *f, *next;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 543a3e875d05..7d0ce2c40f93 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -166,6 +166,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
166 * so do it rather here. 166 * so do it rather here.
167 */ 167 */
168 skb_key.basic.n_proto = skb->protocol; 168 skb_key.basic.n_proto = skb->protocol;
169 skb_flow_dissect_tunnel_info(skb, &head->dissector, &skb_key);
169 skb_flow_dissect(skb, &head->dissector, &skb_key, 0); 170 skb_flow_dissect(skb, &head->dissector, &skb_key, 0);
170 171
171 fl_set_masked_key(&skb_mkey, &skb_key, &head->mask); 172 fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
@@ -217,30 +218,33 @@ static void fl_destroy_filter(struct rcu_head *head)
217 tcf_queue_work(&f->work); 218 tcf_queue_work(&f->work);
218} 219}
219 220
220static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) 221static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
222 struct netlink_ext_ack *extack)
221{ 223{
222 struct tc_cls_flower_offload cls_flower = {}; 224 struct tc_cls_flower_offload cls_flower = {};
223 struct tcf_block *block = tp->chain->block; 225 struct tcf_block *block = tp->chain->block;
224 226
225 tc_cls_common_offload_init(&cls_flower.common, tp); 227 tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
226 cls_flower.command = TC_CLSFLOWER_DESTROY; 228 cls_flower.command = TC_CLSFLOWER_DESTROY;
227 cls_flower.cookie = (unsigned long) f; 229 cls_flower.cookie = (unsigned long) f;
228 230
229 tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, 231 tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
230 &cls_flower, false); 232 &cls_flower, false);
233 tcf_block_offload_dec(block, &f->flags);
231} 234}
232 235
233static int fl_hw_replace_filter(struct tcf_proto *tp, 236static int fl_hw_replace_filter(struct tcf_proto *tp,
234 struct flow_dissector *dissector, 237 struct flow_dissector *dissector,
235 struct fl_flow_key *mask, 238 struct fl_flow_key *mask,
236 struct cls_fl_filter *f) 239 struct cls_fl_filter *f,
240 struct netlink_ext_ack *extack)
237{ 241{
238 struct tc_cls_flower_offload cls_flower = {}; 242 struct tc_cls_flower_offload cls_flower = {};
239 struct tcf_block *block = tp->chain->block; 243 struct tcf_block *block = tp->chain->block;
240 bool skip_sw = tc_skip_sw(f->flags); 244 bool skip_sw = tc_skip_sw(f->flags);
241 int err; 245 int err;
242 246
243 tc_cls_common_offload_init(&cls_flower.common, tp); 247 tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
244 cls_flower.command = TC_CLSFLOWER_REPLACE; 248 cls_flower.command = TC_CLSFLOWER_REPLACE;
245 cls_flower.cookie = (unsigned long) f; 249 cls_flower.cookie = (unsigned long) f;
246 cls_flower.dissector = dissector; 250 cls_flower.dissector = dissector;
@@ -252,10 +256,10 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
252 err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, 256 err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
253 &cls_flower, skip_sw); 257 &cls_flower, skip_sw);
254 if (err < 0) { 258 if (err < 0) {
255 fl_hw_destroy_filter(tp, f); 259 fl_hw_destroy_filter(tp, f, NULL);
256 return err; 260 return err;
257 } else if (err > 0) { 261 } else if (err > 0) {
258 f->flags |= TCA_CLS_FLAGS_IN_HW; 262 tcf_block_offload_inc(block, &f->flags);
259 } 263 }
260 264
261 if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) 265 if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW))
@@ -269,7 +273,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
269 struct tc_cls_flower_offload cls_flower = {}; 273 struct tc_cls_flower_offload cls_flower = {};
270 struct tcf_block *block = tp->chain->block; 274 struct tcf_block *block = tp->chain->block;
271 275
272 tc_cls_common_offload_init(&cls_flower.common, tp); 276 tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL);
273 cls_flower.command = TC_CLSFLOWER_STATS; 277 cls_flower.command = TC_CLSFLOWER_STATS;
274 cls_flower.cookie = (unsigned long) f; 278 cls_flower.cookie = (unsigned long) f;
275 cls_flower.exts = &f->exts; 279 cls_flower.exts = &f->exts;
@@ -279,14 +283,15 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
279 &cls_flower, false); 283 &cls_flower, false);
280} 284}
281 285
282static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) 286static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
287 struct netlink_ext_ack *extack)
283{ 288{
284 struct cls_fl_head *head = rtnl_dereference(tp->root); 289 struct cls_fl_head *head = rtnl_dereference(tp->root);
285 290
286 idr_remove_ext(&head->handle_idr, f->handle); 291 idr_remove(&head->handle_idr, f->handle);
287 list_del_rcu(&f->list); 292 list_del_rcu(&f->list);
288 if (!tc_skip_hw(f->flags)) 293 if (!tc_skip_hw(f->flags))
289 fl_hw_destroy_filter(tp, f); 294 fl_hw_destroy_filter(tp, f, extack);
290 tcf_unbind_filter(tp, &f->res); 295 tcf_unbind_filter(tp, &f->res);
291 if (tcf_exts_get_net(&f->exts)) 296 if (tcf_exts_get_net(&f->exts))
292 call_rcu(&f->rcu, fl_destroy_filter); 297 call_rcu(&f->rcu, fl_destroy_filter);
@@ -312,13 +317,13 @@ static void fl_destroy_rcu(struct rcu_head *rcu)
312 schedule_work(&head->work); 317 schedule_work(&head->work);
313} 318}
314 319
315static void fl_destroy(struct tcf_proto *tp) 320static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
316{ 321{
317 struct cls_fl_head *head = rtnl_dereference(tp->root); 322 struct cls_fl_head *head = rtnl_dereference(tp->root);
318 struct cls_fl_filter *f, *next; 323 struct cls_fl_filter *f, *next;
319 324
320 list_for_each_entry_safe(f, next, &head->filters, list) 325 list_for_each_entry_safe(f, next, &head->filters, list)
321 __fl_delete(tp, f); 326 __fl_delete(tp, f, extack);
322 idr_destroy(&head->handle_idr); 327 idr_destroy(&head->handle_idr);
323 328
324 __module_get(THIS_MODULE); 329 __module_get(THIS_MODULE);
@@ -329,7 +334,7 @@ static void *fl_get(struct tcf_proto *tp, u32 handle)
329{ 334{
330 struct cls_fl_head *head = rtnl_dereference(tp->root); 335 struct cls_fl_head *head = rtnl_dereference(tp->root);
331 336
332 return idr_find_ext(&head->handle_idr, handle); 337 return idr_find(&head->handle_idr, handle);
333} 338}
334 339
335static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { 340static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
@@ -524,13 +529,14 @@ static void fl_set_key_ip(struct nlattr **tb,
524} 529}
525 530
526static int fl_set_key(struct net *net, struct nlattr **tb, 531static int fl_set_key(struct net *net, struct nlattr **tb,
527 struct fl_flow_key *key, struct fl_flow_key *mask) 532 struct fl_flow_key *key, struct fl_flow_key *mask,
533 struct netlink_ext_ack *extack)
528{ 534{
529 __be16 ethertype; 535 __be16 ethertype;
530 int ret = 0; 536 int ret = 0;
531#ifdef CONFIG_NET_CLS_IND 537#ifdef CONFIG_NET_CLS_IND
532 if (tb[TCA_FLOWER_INDEV]) { 538 if (tb[TCA_FLOWER_INDEV]) {
533 int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]); 539 int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV], extack);
534 if (err < 0) 540 if (err < 0)
535 return err; 541 return err;
536 key->indev_ifindex = err; 542 key->indev_ifindex = err;
@@ -825,11 +831,12 @@ static int fl_check_assign_mask(struct cls_fl_head *head,
825static int fl_set_parms(struct net *net, struct tcf_proto *tp, 831static int fl_set_parms(struct net *net, struct tcf_proto *tp,
826 struct cls_fl_filter *f, struct fl_flow_mask *mask, 832 struct cls_fl_filter *f, struct fl_flow_mask *mask,
827 unsigned long base, struct nlattr **tb, 833 unsigned long base, struct nlattr **tb,
828 struct nlattr *est, bool ovr) 834 struct nlattr *est, bool ovr,
835 struct netlink_ext_ack *extack)
829{ 836{
830 int err; 837 int err;
831 838
832 err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr); 839 err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
833 if (err < 0) 840 if (err < 0)
834 return err; 841 return err;
835 842
@@ -838,7 +845,7 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
838 tcf_bind_filter(tp, &f->res, base); 845 tcf_bind_filter(tp, &f->res, base);
839 } 846 }
840 847
841 err = fl_set_key(net, tb, &f->key, &mask->key); 848 err = fl_set_key(net, tb, &f->key, &mask->key, extack);
842 if (err) 849 if (err)
843 return err; 850 return err;
844 851
@@ -851,14 +858,13 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
851static int fl_change(struct net *net, struct sk_buff *in_skb, 858static int fl_change(struct net *net, struct sk_buff *in_skb,
852 struct tcf_proto *tp, unsigned long base, 859 struct tcf_proto *tp, unsigned long base,
853 u32 handle, struct nlattr **tca, 860 u32 handle, struct nlattr **tca,
854 void **arg, bool ovr) 861 void **arg, bool ovr, struct netlink_ext_ack *extack)
855{ 862{
856 struct cls_fl_head *head = rtnl_dereference(tp->root); 863 struct cls_fl_head *head = rtnl_dereference(tp->root);
857 struct cls_fl_filter *fold = *arg; 864 struct cls_fl_filter *fold = *arg;
858 struct cls_fl_filter *fnew; 865 struct cls_fl_filter *fnew;
859 struct nlattr **tb; 866 struct nlattr **tb;
860 struct fl_flow_mask mask = {}; 867 struct fl_flow_mask mask = {};
861 unsigned long idr_index;
862 int err; 868 int err;
863 869
864 if (!tca[TCA_OPTIONS]) 870 if (!tca[TCA_OPTIONS])
@@ -889,21 +895,17 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
889 goto errout; 895 goto errout;
890 896
891 if (!handle) { 897 if (!handle) {
892 err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index, 898 handle = 1;
893 1, 0x80000000, GFP_KERNEL); 899 err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
894 if (err) 900 INT_MAX, GFP_KERNEL);
895 goto errout; 901 } else if (!fold) {
896 fnew->handle = idr_index; 902 /* user specifies a handle and it doesn't exist */
897 } 903 err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
898 904 handle, GFP_KERNEL);
899 /* user specifies a handle and it doesn't exist */
900 if (handle && !fold) {
901 err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
902 handle, handle + 1, GFP_KERNEL);
903 if (err)
904 goto errout;
905 fnew->handle = idr_index;
906 } 905 }
906 if (err)
907 goto errout;
908 fnew->handle = handle;
907 909
908 if (tb[TCA_FLOWER_FLAGS]) { 910 if (tb[TCA_FLOWER_FLAGS]) {
909 fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]); 911 fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
@@ -914,7 +916,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
914 } 916 }
915 } 917 }
916 918
917 err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr); 919 err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr,
920 extack);
918 if (err) 921 if (err)
919 goto errout_idr; 922 goto errout_idr;
920 923
@@ -938,7 +941,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
938 err = fl_hw_replace_filter(tp, 941 err = fl_hw_replace_filter(tp,
939 &head->dissector, 942 &head->dissector,
940 &mask.key, 943 &mask.key,
941 fnew); 944 fnew,
945 extack);
942 if (err) 946 if (err)
943 goto errout_idr; 947 goto errout_idr;
944 } 948 }
@@ -951,14 +955,13 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
951 rhashtable_remove_fast(&head->ht, &fold->ht_node, 955 rhashtable_remove_fast(&head->ht, &fold->ht_node,
952 head->ht_params); 956 head->ht_params);
953 if (!tc_skip_hw(fold->flags)) 957 if (!tc_skip_hw(fold->flags))
954 fl_hw_destroy_filter(tp, fold); 958 fl_hw_destroy_filter(tp, fold, NULL);
955 } 959 }
956 960
957 *arg = fnew; 961 *arg = fnew;
958 962
959 if (fold) { 963 if (fold) {
960 fnew->handle = handle; 964 idr_replace(&head->handle_idr, fnew, fnew->handle);
961 idr_replace_ext(&head->handle_idr, fnew, fnew->handle);
962 list_replace_rcu(&fold->list, &fnew->list); 965 list_replace_rcu(&fold->list, &fnew->list);
963 tcf_unbind_filter(tp, &fold->res); 966 tcf_unbind_filter(tp, &fold->res);
964 tcf_exts_get_net(&fold->exts); 967 tcf_exts_get_net(&fold->exts);
@@ -972,7 +975,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
972 975
973errout_idr: 976errout_idr:
974 if (fnew->handle) 977 if (fnew->handle)
975 idr_remove_ext(&head->handle_idr, fnew->handle); 978 idr_remove(&head->handle_idr, fnew->handle);
976errout: 979errout:
977 tcf_exts_destroy(&fnew->exts); 980 tcf_exts_destroy(&fnew->exts);
978 kfree(fnew); 981 kfree(fnew);
@@ -981,7 +984,8 @@ errout_tb:
981 return err; 984 return err;
982} 985}
983 986
984static int fl_delete(struct tcf_proto *tp, void *arg, bool *last) 987static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
988 struct netlink_ext_ack *extack)
985{ 989{
986 struct cls_fl_head *head = rtnl_dereference(tp->root); 990 struct cls_fl_head *head = rtnl_dereference(tp->root);
987 struct cls_fl_filter *f = arg; 991 struct cls_fl_filter *f = arg;
@@ -989,7 +993,7 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last)
989 if (!tc_skip_sw(f->flags)) 993 if (!tc_skip_sw(f->flags))
990 rhashtable_remove_fast(&head->ht, &f->ht_node, 994 rhashtable_remove_fast(&head->ht, &f->ht_node,
991 head->ht_params); 995 head->ht_params);
992 __fl_delete(tp, f); 996 __fl_delete(tp, f, extack);
993 *last = list_empty(&head->filters); 997 *last = list_empty(&head->filters);
994 return 0; 998 return 0;
995} 999}
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 20f0de1a960a..8b207723fbc2 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -149,7 +149,7 @@ static void fw_delete_filter(struct rcu_head *head)
149 tcf_queue_work(&f->work); 149 tcf_queue_work(&f->work);
150} 150}
151 151
152static void fw_destroy(struct tcf_proto *tp) 152static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
153{ 153{
154 struct fw_head *head = rtnl_dereference(tp->root); 154 struct fw_head *head = rtnl_dereference(tp->root);
155 struct fw_filter *f; 155 struct fw_filter *f;
@@ -172,7 +172,8 @@ static void fw_destroy(struct tcf_proto *tp)
172 kfree_rcu(head, rcu); 172 kfree_rcu(head, rcu);
173} 173}
174 174
175static int fw_delete(struct tcf_proto *tp, void *arg, bool *last) 175static int fw_delete(struct tcf_proto *tp, void *arg, bool *last,
176 struct netlink_ext_ack *extack)
176{ 177{
177 struct fw_head *head = rtnl_dereference(tp->root); 178 struct fw_head *head = rtnl_dereference(tp->root);
178 struct fw_filter *f = arg; 179 struct fw_filter *f = arg;
@@ -218,13 +219,15 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
218 219
219static int fw_set_parms(struct net *net, struct tcf_proto *tp, 220static int fw_set_parms(struct net *net, struct tcf_proto *tp,
220 struct fw_filter *f, struct nlattr **tb, 221 struct fw_filter *f, struct nlattr **tb,
221 struct nlattr **tca, unsigned long base, bool ovr) 222 struct nlattr **tca, unsigned long base, bool ovr,
223 struct netlink_ext_ack *extack)
222{ 224{
223 struct fw_head *head = rtnl_dereference(tp->root); 225 struct fw_head *head = rtnl_dereference(tp->root);
224 u32 mask; 226 u32 mask;
225 int err; 227 int err;
226 228
227 err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr); 229 err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr,
230 extack);
228 if (err < 0) 231 if (err < 0)
229 return err; 232 return err;
230 233
@@ -236,7 +239,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
236#ifdef CONFIG_NET_CLS_IND 239#ifdef CONFIG_NET_CLS_IND
237 if (tb[TCA_FW_INDEV]) { 240 if (tb[TCA_FW_INDEV]) {
238 int ret; 241 int ret;
239 ret = tcf_change_indev(net, tb[TCA_FW_INDEV]); 242 ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack);
240 if (ret < 0) 243 if (ret < 0)
241 return ret; 244 return ret;
242 f->ifindex = ret; 245 f->ifindex = ret;
@@ -257,7 +260,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
257static int fw_change(struct net *net, struct sk_buff *in_skb, 260static int fw_change(struct net *net, struct sk_buff *in_skb,
258 struct tcf_proto *tp, unsigned long base, 261 struct tcf_proto *tp, unsigned long base,
259 u32 handle, struct nlattr **tca, void **arg, 262 u32 handle, struct nlattr **tca, void **arg,
260 bool ovr) 263 bool ovr, struct netlink_ext_ack *extack)
261{ 264{
262 struct fw_head *head = rtnl_dereference(tp->root); 265 struct fw_head *head = rtnl_dereference(tp->root);
263 struct fw_filter *f = *arg; 266 struct fw_filter *f = *arg;
@@ -296,7 +299,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
296 return err; 299 return err;
297 } 300 }
298 301
299 err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr); 302 err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr, extack);
300 if (err < 0) { 303 if (err < 0) {
301 tcf_exts_destroy(&fnew->exts); 304 tcf_exts_destroy(&fnew->exts);
302 kfree(fnew); 305 kfree(fnew);
@@ -345,7 +348,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
345 f->id = handle; 348 f->id = handle;
346 f->tp = tp; 349 f->tp = tp;
347 350
348 err = fw_set_parms(net, tp, f, tb, tca, base, ovr); 351 err = fw_set_parms(net, tp, f, tb, tca, base, ovr, extack);
349 if (err < 0) 352 if (err < 0)
350 goto errout; 353 goto errout;
351 354
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 66d4e0099158..2ba721a590a7 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -71,28 +71,31 @@ static void mall_destroy_rcu(struct rcu_head *rcu)
71 71
72static void mall_destroy_hw_filter(struct tcf_proto *tp, 72static void mall_destroy_hw_filter(struct tcf_proto *tp,
73 struct cls_mall_head *head, 73 struct cls_mall_head *head,
74 unsigned long cookie) 74 unsigned long cookie,
75 struct netlink_ext_ack *extack)
75{ 76{
76 struct tc_cls_matchall_offload cls_mall = {}; 77 struct tc_cls_matchall_offload cls_mall = {};
77 struct tcf_block *block = tp->chain->block; 78 struct tcf_block *block = tp->chain->block;
78 79
79 tc_cls_common_offload_init(&cls_mall.common, tp); 80 tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack);
80 cls_mall.command = TC_CLSMATCHALL_DESTROY; 81 cls_mall.command = TC_CLSMATCHALL_DESTROY;
81 cls_mall.cookie = cookie; 82 cls_mall.cookie = cookie;
82 83
83 tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false); 84 tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false);
85 tcf_block_offload_dec(block, &head->flags);
84} 86}
85 87
86static int mall_replace_hw_filter(struct tcf_proto *tp, 88static int mall_replace_hw_filter(struct tcf_proto *tp,
87 struct cls_mall_head *head, 89 struct cls_mall_head *head,
88 unsigned long cookie) 90 unsigned long cookie,
91 struct netlink_ext_ack *extack)
89{ 92{
90 struct tc_cls_matchall_offload cls_mall = {}; 93 struct tc_cls_matchall_offload cls_mall = {};
91 struct tcf_block *block = tp->chain->block; 94 struct tcf_block *block = tp->chain->block;
92 bool skip_sw = tc_skip_sw(head->flags); 95 bool skip_sw = tc_skip_sw(head->flags);
93 int err; 96 int err;
94 97
95 tc_cls_common_offload_init(&cls_mall.common, tp); 98 tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack);
96 cls_mall.command = TC_CLSMATCHALL_REPLACE; 99 cls_mall.command = TC_CLSMATCHALL_REPLACE;
97 cls_mall.exts = &head->exts; 100 cls_mall.exts = &head->exts;
98 cls_mall.cookie = cookie; 101 cls_mall.cookie = cookie;
@@ -100,10 +103,10 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
100 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, 103 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL,
101 &cls_mall, skip_sw); 104 &cls_mall, skip_sw);
102 if (err < 0) { 105 if (err < 0) {
103 mall_destroy_hw_filter(tp, head, cookie); 106 mall_destroy_hw_filter(tp, head, cookie, NULL);
104 return err; 107 return err;
105 } else if (err > 0) { 108 } else if (err > 0) {
106 head->flags |= TCA_CLS_FLAGS_IN_HW; 109 tcf_block_offload_inc(block, &head->flags);
107 } 110 }
108 111
109 if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW)) 112 if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW))
@@ -112,7 +115,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
112 return 0; 115 return 0;
113} 116}
114 117
115static void mall_destroy(struct tcf_proto *tp) 118static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
116{ 119{
117 struct cls_mall_head *head = rtnl_dereference(tp->root); 120 struct cls_mall_head *head = rtnl_dereference(tp->root);
118 121
@@ -120,7 +123,7 @@ static void mall_destroy(struct tcf_proto *tp)
120 return; 123 return;
121 124
122 if (!tc_skip_hw(head->flags)) 125 if (!tc_skip_hw(head->flags))
123 mall_destroy_hw_filter(tp, head, (unsigned long) head); 126 mall_destroy_hw_filter(tp, head, (unsigned long) head, extack);
124 127
125 if (tcf_exts_get_net(&head->exts)) 128 if (tcf_exts_get_net(&head->exts))
126 call_rcu(&head->rcu, mall_destroy_rcu); 129 call_rcu(&head->rcu, mall_destroy_rcu);
@@ -141,11 +144,12 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
141static int mall_set_parms(struct net *net, struct tcf_proto *tp, 144static int mall_set_parms(struct net *net, struct tcf_proto *tp,
142 struct cls_mall_head *head, 145 struct cls_mall_head *head,
143 unsigned long base, struct nlattr **tb, 146 unsigned long base, struct nlattr **tb,
144 struct nlattr *est, bool ovr) 147 struct nlattr *est, bool ovr,
148 struct netlink_ext_ack *extack)
145{ 149{
146 int err; 150 int err;
147 151
148 err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr); 152 err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, extack);
149 if (err < 0) 153 if (err < 0)
150 return err; 154 return err;
151 155
@@ -159,7 +163,7 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
159static int mall_change(struct net *net, struct sk_buff *in_skb, 163static int mall_change(struct net *net, struct sk_buff *in_skb,
160 struct tcf_proto *tp, unsigned long base, 164 struct tcf_proto *tp, unsigned long base,
161 u32 handle, struct nlattr **tca, 165 u32 handle, struct nlattr **tca,
162 void **arg, bool ovr) 166 void **arg, bool ovr, struct netlink_ext_ack *extack)
163{ 167{
164 struct cls_mall_head *head = rtnl_dereference(tp->root); 168 struct cls_mall_head *head = rtnl_dereference(tp->root);
165 struct nlattr *tb[TCA_MATCHALL_MAX + 1]; 169 struct nlattr *tb[TCA_MATCHALL_MAX + 1];
@@ -197,12 +201,14 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
197 new->handle = handle; 201 new->handle = handle;
198 new->flags = flags; 202 new->flags = flags;
199 203
200 err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr); 204 err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr,
205 extack);
201 if (err) 206 if (err)
202 goto err_set_parms; 207 goto err_set_parms;
203 208
204 if (!tc_skip_hw(new->flags)) { 209 if (!tc_skip_hw(new->flags)) {
205 err = mall_replace_hw_filter(tp, new, (unsigned long) new); 210 err = mall_replace_hw_filter(tp, new, (unsigned long)new,
211 extack);
206 if (err) 212 if (err)
207 goto err_replace_hw_filter; 213 goto err_replace_hw_filter;
208 } 214 }
@@ -222,7 +228,8 @@ err_exts_init:
222 return err; 228 return err;
223} 229}
224 230
225static int mall_delete(struct tcf_proto *tp, void *arg, bool *last) 231static int mall_delete(struct tcf_proto *tp, void *arg, bool *last,
232 struct netlink_ext_ack *extack)
226{ 233{
227 return -EOPNOTSUPP; 234 return -EOPNOTSUPP;
228} 235}
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index ac9a5b8825b9..21a03a8ee029 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -281,7 +281,7 @@ static void route4_delete_filter(struct rcu_head *head)
281 tcf_queue_work(&f->work); 281 tcf_queue_work(&f->work);
282} 282}
283 283
284static void route4_destroy(struct tcf_proto *tp) 284static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
285{ 285{
286 struct route4_head *head = rtnl_dereference(tp->root); 286 struct route4_head *head = rtnl_dereference(tp->root);
287 int h1, h2; 287 int h1, h2;
@@ -316,7 +316,8 @@ static void route4_destroy(struct tcf_proto *tp)
316 kfree_rcu(head, rcu); 316 kfree_rcu(head, rcu);
317} 317}
318 318
319static int route4_delete(struct tcf_proto *tp, void *arg, bool *last) 319static int route4_delete(struct tcf_proto *tp, void *arg, bool *last,
320 struct netlink_ext_ack *extack)
320{ 321{
321 struct route4_head *head = rtnl_dereference(tp->root); 322 struct route4_head *head = rtnl_dereference(tp->root);
322 struct route4_filter *f = arg; 323 struct route4_filter *f = arg;
@@ -389,7 +390,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
389 unsigned long base, struct route4_filter *f, 390 unsigned long base, struct route4_filter *f,
390 u32 handle, struct route4_head *head, 391 u32 handle, struct route4_head *head,
391 struct nlattr **tb, struct nlattr *est, int new, 392 struct nlattr **tb, struct nlattr *est, int new,
392 bool ovr) 393 bool ovr, struct netlink_ext_ack *extack)
393{ 394{
394 u32 id = 0, to = 0, nhandle = 0x8000; 395 u32 id = 0, to = 0, nhandle = 0x8000;
395 struct route4_filter *fp; 396 struct route4_filter *fp;
@@ -397,7 +398,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
397 struct route4_bucket *b; 398 struct route4_bucket *b;
398 int err; 399 int err;
399 400
400 err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr); 401 err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
401 if (err < 0) 402 if (err < 0)
402 return err; 403 return err;
403 404
@@ -471,7 +472,8 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
471 472
472static int route4_change(struct net *net, struct sk_buff *in_skb, 473static int route4_change(struct net *net, struct sk_buff *in_skb,
473 struct tcf_proto *tp, unsigned long base, u32 handle, 474 struct tcf_proto *tp, unsigned long base, u32 handle,
474 struct nlattr **tca, void **arg, bool ovr) 475 struct nlattr **tca, void **arg, bool ovr,
476 struct netlink_ext_ack *extack)
475{ 477{
476 struct route4_head *head = rtnl_dereference(tp->root); 478 struct route4_head *head = rtnl_dereference(tp->root);
477 struct route4_filter __rcu **fp; 479 struct route4_filter __rcu **fp;
@@ -515,7 +517,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
515 } 517 }
516 518
517 err = route4_set_parms(net, tp, base, f, handle, head, tb, 519 err = route4_set_parms(net, tp, base, f, handle, head, tb,
518 tca[TCA_RATE], new, ovr); 520 tca[TCA_RATE], new, ovr, extack);
519 if (err < 0) 521 if (err < 0)
520 goto errout; 522 goto errout;
521 523
@@ -527,7 +529,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
527 if (f->handle < f1->handle) 529 if (f->handle < f1->handle)
528 break; 530 break;
529 531
530 netif_keep_dst(qdisc_dev(tp->q)); 532 tcf_block_netif_keep_dst(tp->chain->block);
531 rcu_assign_pointer(f->next, f1); 533 rcu_assign_pointer(f->next, f1);
532 rcu_assign_pointer(*fp, f); 534 rcu_assign_pointer(*fp, f);
533 535
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index cf325625c99d..4f1297657c27 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -322,7 +322,7 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
322 __rsvp_delete_filter(f); 322 __rsvp_delete_filter(f);
323} 323}
324 324
325static void rsvp_destroy(struct tcf_proto *tp) 325static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
326{ 326{
327 struct rsvp_head *data = rtnl_dereference(tp->root); 327 struct rsvp_head *data = rtnl_dereference(tp->root);
328 int h1, h2; 328 int h1, h2;
@@ -350,7 +350,8 @@ static void rsvp_destroy(struct tcf_proto *tp)
350 kfree_rcu(data, rcu); 350 kfree_rcu(data, rcu);
351} 351}
352 352
353static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last) 353static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last,
354 struct netlink_ext_ack *extack)
354{ 355{
355 struct rsvp_head *head = rtnl_dereference(tp->root); 356 struct rsvp_head *head = rtnl_dereference(tp->root);
356 struct rsvp_filter *nfp, *f = arg; 357 struct rsvp_filter *nfp, *f = arg;
@@ -486,7 +487,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
486 struct tcf_proto *tp, unsigned long base, 487 struct tcf_proto *tp, unsigned long base,
487 u32 handle, 488 u32 handle,
488 struct nlattr **tca, 489 struct nlattr **tca,
489 void **arg, bool ovr) 490 void **arg, bool ovr, struct netlink_ext_ack *extack)
490{ 491{
491 struct rsvp_head *data = rtnl_dereference(tp->root); 492 struct rsvp_head *data = rtnl_dereference(tp->root);
492 struct rsvp_filter *f, *nfp; 493 struct rsvp_filter *f, *nfp;
@@ -511,7 +512,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
511 err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); 512 err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
512 if (err < 0) 513 if (err < 0)
513 return err; 514 return err;
514 err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); 515 err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack);
515 if (err < 0) 516 if (err < 0)
516 goto errout2; 517 goto errout2;
517 518
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 67467ae24c97..b49cc990a000 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -193,7 +193,8 @@ static void tcindex_destroy_fexts(struct rcu_head *head)
193 tcf_queue_work(&f->work); 193 tcf_queue_work(&f->work);
194} 194}
195 195
196static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last) 196static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last,
197 struct netlink_ext_ack *extack)
197{ 198{
198 struct tcindex_data *p = rtnl_dereference(tp->root); 199 struct tcindex_data *p = rtnl_dereference(tp->root);
199 struct tcindex_filter_result *r = arg; 200 struct tcindex_filter_result *r = arg;
@@ -246,7 +247,7 @@ static int tcindex_destroy_element(struct tcf_proto *tp,
246{ 247{
247 bool last; 248 bool last;
248 249
249 return tcindex_delete(tp, arg, &last); 250 return tcindex_delete(tp, arg, &last, NULL);
250} 251}
251 252
252static void __tcindex_destroy(struct rcu_head *head) 253static void __tcindex_destroy(struct rcu_head *head)
@@ -322,7 +323,7 @@ static int
322tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, 323tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
323 u32 handle, struct tcindex_data *p, 324 u32 handle, struct tcindex_data *p,
324 struct tcindex_filter_result *r, struct nlattr **tb, 325 struct tcindex_filter_result *r, struct nlattr **tb,
325 struct nlattr *est, bool ovr) 326 struct nlattr *est, bool ovr, struct netlink_ext_ack *extack)
326{ 327{
327 struct tcindex_filter_result new_filter_result, *old_r = r; 328 struct tcindex_filter_result new_filter_result, *old_r = r;
328 struct tcindex_filter_result cr; 329 struct tcindex_filter_result cr;
@@ -334,7 +335,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
334 err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); 335 err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
335 if (err < 0) 336 if (err < 0)
336 return err; 337 return err;
337 err = tcf_exts_validate(net, tp, tb, est, &e, ovr); 338 err = tcf_exts_validate(net, tp, tb, est, &e, ovr, extack);
338 if (err < 0) 339 if (err < 0)
339 goto errout; 340 goto errout;
340 341
@@ -520,7 +521,8 @@ errout:
520static int 521static int
521tcindex_change(struct net *net, struct sk_buff *in_skb, 522tcindex_change(struct net *net, struct sk_buff *in_skb,
522 struct tcf_proto *tp, unsigned long base, u32 handle, 523 struct tcf_proto *tp, unsigned long base, u32 handle,
523 struct nlattr **tca, void **arg, bool ovr) 524 struct nlattr **tca, void **arg, bool ovr,
525 struct netlink_ext_ack *extack)
524{ 526{
525 struct nlattr *opt = tca[TCA_OPTIONS]; 527 struct nlattr *opt = tca[TCA_OPTIONS];
526 struct nlattr *tb[TCA_TCINDEX_MAX + 1]; 528 struct nlattr *tb[TCA_TCINDEX_MAX + 1];
@@ -540,7 +542,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
540 return err; 542 return err;
541 543
542 return tcindex_set_parms(net, tp, base, handle, p, r, tb, 544 return tcindex_set_parms(net, tp, base, handle, p, r, tb,
543 tca[TCA_RATE], ovr); 545 tca[TCA_RATE], ovr, extack);
544} 546}
545 547
546static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) 548static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
@@ -579,7 +581,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
579 } 581 }
580} 582}
581 583
582static void tcindex_destroy(struct tcf_proto *tp) 584static void tcindex_destroy(struct tcf_proto *tp,
585 struct netlink_ext_ack *extack)
583{ 586{
584 struct tcindex_data *p = rtnl_dereference(tp->root); 587 struct tcindex_data *p = rtnl_dereference(tp->root);
585 struct tcf_walker walker; 588 struct tcf_walker walker;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 507859cdd1cb..6c7601a530e3 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -87,6 +87,7 @@ struct tc_u_hnode {
87 unsigned int divisor; 87 unsigned int divisor;
88 struct idr handle_idr; 88 struct idr handle_idr;
89 struct rcu_head rcu; 89 struct rcu_head rcu;
90 u32 flags;
90 /* The 'ht' field MUST be the last field in structure to allow for 91 /* The 'ht' field MUST be the last field in structure to allow for
91 * more entries allocated at end of structure. 92 * more entries allocated at end of structure.
92 */ 93 */
@@ -315,19 +316,13 @@ static void *u32_get(struct tcf_proto *tp, u32 handle)
315 return u32_lookup_key(ht, handle); 316 return u32_lookup_key(ht, handle);
316} 317}
317 318
319/* Protected by rtnl lock */
318static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr) 320static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
319{ 321{
320 unsigned long idr_index; 322 int id = idr_alloc_cyclic(&tp_c->handle_idr, ptr, 1, 0x7FF, GFP_KERNEL);
321 int err; 323 if (id < 0)
322
323 /* This is only used inside rtnl lock it is safe to increment
324 * without read _copy_ update semantics
325 */
326 err = idr_alloc_ext(&tp_c->handle_idr, ptr, &idr_index,
327 1, 0x7FF, GFP_KERNEL);
328 if (err)
329 return 0; 324 return 0;
330 return (u32)(idr_index | 0x800) << 20; 325 return (id | 0x800U) << 20;
331} 326}
332 327
333static struct hlist_head *tc_u_common_hash; 328static struct hlist_head *tc_u_common_hash;
@@ -397,10 +392,12 @@ static int u32_init(struct tcf_proto *tp)
397static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, 392static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
398 bool free_pf) 393 bool free_pf)
399{ 394{
395 struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
396
400 tcf_exts_destroy(&n->exts); 397 tcf_exts_destroy(&n->exts);
401 tcf_exts_put_net(&n->exts); 398 tcf_exts_put_net(&n->exts);
402 if (n->ht_down) 399 if (ht && --ht->refcnt == 0)
403 n->ht_down->refcnt--; 400 kfree(ht);
404#ifdef CONFIG_CLS_U32_PERF 401#ifdef CONFIG_CLS_U32_PERF
405 if (free_pf) 402 if (free_pf)
406 free_percpu(n->pf); 403 free_percpu(n->pf);
@@ -486,12 +483,13 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
486 return 0; 483 return 0;
487} 484}
488 485
489static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) 486static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
487 struct netlink_ext_ack *extack)
490{ 488{
491 struct tcf_block *block = tp->chain->block; 489 struct tcf_block *block = tp->chain->block;
492 struct tc_cls_u32_offload cls_u32 = {}; 490 struct tc_cls_u32_offload cls_u32 = {};
493 491
494 tc_cls_common_offload_init(&cls_u32.common, tp); 492 tc_cls_common_offload_init(&cls_u32.common, tp, h->flags, extack);
495 cls_u32.command = TC_CLSU32_DELETE_HNODE; 493 cls_u32.command = TC_CLSU32_DELETE_HNODE;
496 cls_u32.hnode.divisor = h->divisor; 494 cls_u32.hnode.divisor = h->divisor;
497 cls_u32.hnode.handle = h->handle; 495 cls_u32.hnode.handle = h->handle;
@@ -501,7 +499,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
501} 499}
502 500
503static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, 501static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
504 u32 flags) 502 u32 flags, struct netlink_ext_ack *extack)
505{ 503{
506 struct tcf_block *block = tp->chain->block; 504 struct tcf_block *block = tp->chain->block;
507 struct tc_cls_u32_offload cls_u32 = {}; 505 struct tc_cls_u32_offload cls_u32 = {};
@@ -509,7 +507,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
509 bool offloaded = false; 507 bool offloaded = false;
510 int err; 508 int err;
511 509
512 tc_cls_common_offload_init(&cls_u32.common, tp); 510 tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
513 cls_u32.command = TC_CLSU32_NEW_HNODE; 511 cls_u32.command = TC_CLSU32_NEW_HNODE;
514 cls_u32.hnode.divisor = h->divisor; 512 cls_u32.hnode.divisor = h->divisor;
515 cls_u32.hnode.handle = h->handle; 513 cls_u32.hnode.handle = h->handle;
@@ -517,7 +515,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
517 515
518 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); 516 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
519 if (err < 0) { 517 if (err < 0) {
520 u32_clear_hw_hnode(tp, h); 518 u32_clear_hw_hnode(tp, h, NULL);
521 return err; 519 return err;
522 } else if (err > 0) { 520 } else if (err > 0) {
523 offloaded = true; 521 offloaded = true;
@@ -529,27 +527,30 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
529 return 0; 527 return 0;
530} 528}
531 529
532static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) 530static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
531 struct netlink_ext_ack *extack)
533{ 532{
534 struct tcf_block *block = tp->chain->block; 533 struct tcf_block *block = tp->chain->block;
535 struct tc_cls_u32_offload cls_u32 = {}; 534 struct tc_cls_u32_offload cls_u32 = {};
536 535
537 tc_cls_common_offload_init(&cls_u32.common, tp); 536 tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack);
538 cls_u32.command = TC_CLSU32_DELETE_KNODE; 537 cls_u32.command = TC_CLSU32_DELETE_KNODE;
539 cls_u32.knode.handle = handle; 538 cls_u32.knode.handle = n->handle;
540 539
541 tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false); 540 tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
541 tcf_block_offload_dec(block, &n->flags);
542} 542}
543 543
544static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, 544static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
545 u32 flags) 545 u32 flags, struct netlink_ext_ack *extack)
546{ 546{
547 struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
547 struct tcf_block *block = tp->chain->block; 548 struct tcf_block *block = tp->chain->block;
548 struct tc_cls_u32_offload cls_u32 = {}; 549 struct tc_cls_u32_offload cls_u32 = {};
549 bool skip_sw = tc_skip_sw(flags); 550 bool skip_sw = tc_skip_sw(flags);
550 int err; 551 int err;
551 552
552 tc_cls_common_offload_init(&cls_u32.common, tp); 553 tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
553 cls_u32.command = TC_CLSU32_REPLACE_KNODE; 554 cls_u32.command = TC_CLSU32_REPLACE_KNODE;
554 cls_u32.knode.handle = n->handle; 555 cls_u32.knode.handle = n->handle;
555 cls_u32.knode.fshift = n->fshift; 556 cls_u32.knode.fshift = n->fshift;
@@ -563,14 +564,14 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
563 cls_u32.knode.sel = &n->sel; 564 cls_u32.knode.sel = &n->sel;
564 cls_u32.knode.exts = &n->exts; 565 cls_u32.knode.exts = &n->exts;
565 if (n->ht_down) 566 if (n->ht_down)
566 cls_u32.knode.link_handle = n->ht_down->handle; 567 cls_u32.knode.link_handle = ht->handle;
567 568
568 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); 569 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
569 if (err < 0) { 570 if (err < 0) {
570 u32_remove_hw_knode(tp, n->handle); 571 u32_remove_hw_knode(tp, n, NULL);
571 return err; 572 return err;
572 } else if (err > 0) { 573 } else if (err > 0) {
573 n->flags |= TCA_CLS_FLAGS_IN_HW; 574 tcf_block_offload_inc(block, &n->flags);
574 } 575 }
575 576
576 if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW)) 577 if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW))
@@ -579,7 +580,8 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
579 return 0; 580 return 0;
580} 581}
581 582
582static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) 583static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
584 struct netlink_ext_ack *extack)
583{ 585{
584 struct tc_u_knode *n; 586 struct tc_u_knode *n;
585 unsigned int h; 587 unsigned int h;
@@ -589,8 +591,8 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
589 RCU_INIT_POINTER(ht->ht[h], 591 RCU_INIT_POINTER(ht->ht[h],
590 rtnl_dereference(n->next)); 592 rtnl_dereference(n->next));
591 tcf_unbind_filter(tp, &n->res); 593 tcf_unbind_filter(tp, &n->res);
592 u32_remove_hw_knode(tp, n->handle); 594 u32_remove_hw_knode(tp, n, extack);
593 idr_remove_ext(&ht->handle_idr, n->handle); 595 idr_remove(&ht->handle_idr, n->handle);
594 if (tcf_exts_get_net(&n->exts)) 596 if (tcf_exts_get_net(&n->exts))
595 call_rcu(&n->rcu, u32_delete_key_freepf_rcu); 597 call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
596 else 598 else
@@ -599,7 +601,8 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
599 } 601 }
600} 602}
601 603
602static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) 604static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
605 struct netlink_ext_ack *extack)
603{ 606{
604 struct tc_u_common *tp_c = tp->data; 607 struct tc_u_common *tp_c = tp->data;
605 struct tc_u_hnode __rcu **hn; 608 struct tc_u_hnode __rcu **hn;
@@ -607,16 +610,16 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
607 610
608 WARN_ON(ht->refcnt); 611 WARN_ON(ht->refcnt);
609 612
610 u32_clear_hnode(tp, ht); 613 u32_clear_hnode(tp, ht, extack);
611 614
612 hn = &tp_c->hlist; 615 hn = &tp_c->hlist;
613 for (phn = rtnl_dereference(*hn); 616 for (phn = rtnl_dereference(*hn);
614 phn; 617 phn;
615 hn = &phn->next, phn = rtnl_dereference(*hn)) { 618 hn = &phn->next, phn = rtnl_dereference(*hn)) {
616 if (phn == ht) { 619 if (phn == ht) {
617 u32_clear_hw_hnode(tp, ht); 620 u32_clear_hw_hnode(tp, ht, extack);
618 idr_destroy(&ht->handle_idr); 621 idr_destroy(&ht->handle_idr);
619 idr_remove_ext(&tp_c->handle_idr, ht->handle); 622 idr_remove(&tp_c->handle_idr, ht->handle);
620 RCU_INIT_POINTER(*hn, ht->next); 623 RCU_INIT_POINTER(*hn, ht->next);
621 kfree_rcu(ht, rcu); 624 kfree_rcu(ht, rcu);
622 return 0; 625 return 0;
@@ -637,7 +640,7 @@ static bool ht_empty(struct tc_u_hnode *ht)
637 return true; 640 return true;
638} 641}
639 642
640static void u32_destroy(struct tcf_proto *tp) 643static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
641{ 644{
642 struct tc_u_common *tp_c = tp->data; 645 struct tc_u_common *tp_c = tp->data;
643 struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); 646 struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
@@ -645,23 +648,22 @@ static void u32_destroy(struct tcf_proto *tp)
645 WARN_ON(root_ht == NULL); 648 WARN_ON(root_ht == NULL);
646 649
647 if (root_ht && --root_ht->refcnt == 0) 650 if (root_ht && --root_ht->refcnt == 0)
648 u32_destroy_hnode(tp, root_ht); 651 u32_destroy_hnode(tp, root_ht, extack);
649 652
650 if (--tp_c->refcnt == 0) { 653 if (--tp_c->refcnt == 0) {
651 struct tc_u_hnode *ht; 654 struct tc_u_hnode *ht;
652 655
653 hlist_del(&tp_c->hnode); 656 hlist_del(&tp_c->hnode);
654 657
655 for (ht = rtnl_dereference(tp_c->hlist);
656 ht;
657 ht = rtnl_dereference(ht->next)) {
658 ht->refcnt--;
659 u32_clear_hnode(tp, ht);
660 }
661
662 while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) { 658 while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
659 u32_clear_hnode(tp, ht, extack);
663 RCU_INIT_POINTER(tp_c->hlist, ht->next); 660 RCU_INIT_POINTER(tp_c->hlist, ht->next);
664 kfree_rcu(ht, rcu); 661
662 /* u32_destroy_key() will later free ht for us, if it's
663 * still referenced by some knode
664 */
665 if (--ht->refcnt == 0)
666 kfree_rcu(ht, rcu);
665 } 667 }
666 668
667 idr_destroy(&tp_c->handle_idr); 669 idr_destroy(&tp_c->handle_idr);
@@ -671,7 +673,8 @@ static void u32_destroy(struct tcf_proto *tp)
671 tp->data = NULL; 673 tp->data = NULL;
672} 674}
673 675
674static int u32_delete(struct tcf_proto *tp, void *arg, bool *last) 676static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
677 struct netlink_ext_ack *extack)
675{ 678{
676 struct tc_u_hnode *ht = arg; 679 struct tc_u_hnode *ht = arg;
677 struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); 680 struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
@@ -682,18 +685,21 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last)
682 goto out; 685 goto out;
683 686
684 if (TC_U32_KEY(ht->handle)) { 687 if (TC_U32_KEY(ht->handle)) {
685 u32_remove_hw_knode(tp, ht->handle); 688 u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack);
686 ret = u32_delete_key(tp, (struct tc_u_knode *)ht); 689 ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
687 goto out; 690 goto out;
688 } 691 }
689 692
690 if (root_ht == ht) 693 if (root_ht == ht) {
694 NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node");
691 return -EINVAL; 695 return -EINVAL;
696 }
692 697
693 if (ht->refcnt == 1) { 698 if (ht->refcnt == 1) {
694 ht->refcnt--; 699 ht->refcnt--;
695 u32_destroy_hnode(tp, ht); 700 u32_destroy_hnode(tp, ht, extack);
696 } else { 701 } else {
702 NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
697 return -EBUSY; 703 return -EBUSY;
698 } 704 }
699 705
@@ -735,19 +741,17 @@ ret:
735 741
736static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid) 742static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid)
737{ 743{
738 unsigned long idr_index; 744 u32 index = htid | 0x800;
739 u32 start = htid | 0x800;
740 u32 max = htid | 0xFFF; 745 u32 max = htid | 0xFFF;
741 u32 min = htid;
742 746
743 if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index, 747 if (idr_alloc_u32(&ht->handle_idr, NULL, &index, max, GFP_KERNEL)) {
744 start, max + 1, GFP_KERNEL)) { 748 index = htid + 1;
745 if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index, 749 if (idr_alloc_u32(&ht->handle_idr, NULL, &index, max,
746 min + 1, max + 1, GFP_KERNEL)) 750 GFP_KERNEL))
747 return max; 751 index = max;
748 } 752 }
749 753
750 return (u32)idr_index; 754 return index;
751} 755}
752 756
753static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { 757static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
@@ -764,11 +768,12 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
764static int u32_set_parms(struct net *net, struct tcf_proto *tp, 768static int u32_set_parms(struct net *net, struct tcf_proto *tp,
765 unsigned long base, struct tc_u_hnode *ht, 769 unsigned long base, struct tc_u_hnode *ht,
766 struct tc_u_knode *n, struct nlattr **tb, 770 struct tc_u_knode *n, struct nlattr **tb,
767 struct nlattr *est, bool ovr) 771 struct nlattr *est, bool ovr,
772 struct netlink_ext_ack *extack)
768{ 773{
769 int err; 774 int err;
770 775
771 err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr); 776 err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack);
772 if (err < 0) 777 if (err < 0)
773 return err; 778 return err;
774 779
@@ -776,14 +781,18 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
776 u32 handle = nla_get_u32(tb[TCA_U32_LINK]); 781 u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
777 struct tc_u_hnode *ht_down = NULL, *ht_old; 782 struct tc_u_hnode *ht_down = NULL, *ht_old;
778 783
779 if (TC_U32_KEY(handle)) 784 if (TC_U32_KEY(handle)) {
785 NL_SET_ERR_MSG_MOD(extack, "u32 Link handle must be a hash table");
780 return -EINVAL; 786 return -EINVAL;
787 }
781 788
782 if (handle) { 789 if (handle) {
783 ht_down = u32_lookup_ht(ht->tp_c, handle); 790 ht_down = u32_lookup_ht(ht->tp_c, handle);
784 791
785 if (ht_down == NULL) 792 if (!ht_down) {
793 NL_SET_ERR_MSG_MOD(extack, "Link hash table not found");
786 return -EINVAL; 794 return -EINVAL;
795 }
787 ht_down->refcnt++; 796 ht_down->refcnt++;
788 } 797 }
789 798
@@ -801,7 +810,7 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
801#ifdef CONFIG_NET_CLS_IND 810#ifdef CONFIG_NET_CLS_IND
802 if (tb[TCA_U32_INDEV]) { 811 if (tb[TCA_U32_INDEV]) {
803 int ret; 812 int ret;
804 ret = tcf_change_indev(net, tb[TCA_U32_INDEV]); 813 ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
805 if (ret < 0) 814 if (ret < 0)
806 return -EINVAL; 815 return -EINVAL;
807 n->ifindex = ret; 816 n->ifindex = ret;
@@ -832,7 +841,7 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
832 if (pins->handle == n->handle) 841 if (pins->handle == n->handle)
833 break; 842 break;
834 843
835 idr_replace_ext(&ht->handle_idr, n, n->handle); 844 idr_replace(&ht->handle_idr, n, n->handle);
836 RCU_INIT_POINTER(n->next, pins->next); 845 RCU_INIT_POINTER(n->next, pins->next);
837 rcu_assign_pointer(*ins, n); 846 rcu_assign_pointer(*ins, n);
838} 847}
@@ -840,8 +849,9 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
840static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, 849static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
841 struct tc_u_knode *n) 850 struct tc_u_knode *n)
842{ 851{
843 struct tc_u_knode *new; 852 struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
844 struct tc_u32_sel *s = &n->sel; 853 struct tc_u32_sel *s = &n->sel;
854 struct tc_u_knode *new;
845 855
846 new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), 856 new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
847 GFP_KERNEL); 857 GFP_KERNEL);
@@ -859,11 +869,11 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
859 new->fshift = n->fshift; 869 new->fshift = n->fshift;
860 new->res = n->res; 870 new->res = n->res;
861 new->flags = n->flags; 871 new->flags = n->flags;
862 RCU_INIT_POINTER(new->ht_down, n->ht_down); 872 RCU_INIT_POINTER(new->ht_down, ht);
863 873
864 /* bump reference count as long as we hold pointer to structure */ 874 /* bump reference count as long as we hold pointer to structure */
865 if (new->ht_down) 875 if (ht)
866 new->ht_down->refcnt++; 876 ht->refcnt++;
867 877
868#ifdef CONFIG_CLS_U32_PERF 878#ifdef CONFIG_CLS_U32_PERF
869 /* Statistics may be incremented by readers during update 879 /* Statistics may be incremented by readers during update
@@ -892,7 +902,8 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
892 902
893static int u32_change(struct net *net, struct sk_buff *in_skb, 903static int u32_change(struct net *net, struct sk_buff *in_skb,
894 struct tcf_proto *tp, unsigned long base, u32 handle, 904 struct tcf_proto *tp, unsigned long base, u32 handle,
895 struct nlattr **tca, void **arg, bool ovr) 905 struct nlattr **tca, void **arg, bool ovr,
906 struct netlink_ext_ack *extack)
896{ 907{
897 struct tc_u_common *tp_c = tp->data; 908 struct tc_u_common *tp_c = tp->data;
898 struct tc_u_hnode *ht; 909 struct tc_u_hnode *ht;
@@ -906,28 +917,41 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
906 size_t size; 917 size_t size;
907#endif 918#endif
908 919
909 if (opt == NULL) 920 if (!opt) {
910 return handle ? -EINVAL : 0; 921 if (handle) {
922 NL_SET_ERR_MSG_MOD(extack, "Filter handle requires options");
923 return -EINVAL;
924 } else {
925 return 0;
926 }
927 }
911 928
912 err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, NULL); 929 err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, extack);
913 if (err < 0) 930 if (err < 0)
914 return err; 931 return err;
915 932
916 if (tb[TCA_U32_FLAGS]) { 933 if (tb[TCA_U32_FLAGS]) {
917 flags = nla_get_u32(tb[TCA_U32_FLAGS]); 934 flags = nla_get_u32(tb[TCA_U32_FLAGS]);
918 if (!tc_flags_valid(flags)) 935 if (!tc_flags_valid(flags)) {
936 NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags");
919 return -EINVAL; 937 return -EINVAL;
938 }
920 } 939 }
921 940
922 n = *arg; 941 n = *arg;
923 if (n) { 942 if (n) {
924 struct tc_u_knode *new; 943 struct tc_u_knode *new;
925 944
926 if (TC_U32_KEY(n->handle) == 0) 945 if (TC_U32_KEY(n->handle) == 0) {
946 NL_SET_ERR_MSG_MOD(extack, "Key node id cannot be zero");
927 return -EINVAL; 947 return -EINVAL;
948 }
928 949
929 if (n->flags != flags) 950 if ((n->flags ^ flags) &
951 ~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) {
952 NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags");
930 return -EINVAL; 953 return -EINVAL;
954 }
931 955
932 new = u32_init_knode(tp, n); 956 new = u32_init_knode(tp, n);
933 if (!new) 957 if (!new)
@@ -935,14 +959,14 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
935 959
936 err = u32_set_parms(net, tp, base, 960 err = u32_set_parms(net, tp, base,
937 rtnl_dereference(n->ht_up), new, tb, 961 rtnl_dereference(n->ht_up), new, tb,
938 tca[TCA_RATE], ovr); 962 tca[TCA_RATE], ovr, extack);
939 963
940 if (err) { 964 if (err) {
941 u32_destroy_key(tp, new, false); 965 u32_destroy_key(tp, new, false);
942 return err; 966 return err;
943 } 967 }
944 968
945 err = u32_replace_hw_knode(tp, new, flags); 969 err = u32_replace_hw_knode(tp, new, flags, extack);
946 if (err) { 970 if (err) {
947 u32_destroy_key(tp, new, false); 971 u32_destroy_key(tp, new, false);
948 return err; 972 return err;
@@ -961,10 +985,14 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
961 if (tb[TCA_U32_DIVISOR]) { 985 if (tb[TCA_U32_DIVISOR]) {
962 unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]); 986 unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
963 987
964 if (--divisor > 0x100) 988 if (--divisor > 0x100) {
989 NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets");
965 return -EINVAL; 990 return -EINVAL;
966 if (TC_U32_KEY(handle)) 991 }
992 if (TC_U32_KEY(handle)) {
993 NL_SET_ERR_MSG_MOD(extack, "Divisor can only be used on a hash table");
967 return -EINVAL; 994 return -EINVAL;
995 }
968 ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL); 996 ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
969 if (ht == NULL) 997 if (ht == NULL)
970 return -ENOBUFS; 998 return -ENOBUFS;
@@ -975,8 +1003,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
975 return -ENOMEM; 1003 return -ENOMEM;
976 } 1004 }
977 } else { 1005 } else {
978 err = idr_alloc_ext(&tp_c->handle_idr, ht, NULL, 1006 err = idr_alloc_u32(&tp_c->handle_idr, ht, &handle,
979 handle, handle + 1, GFP_KERNEL); 1007 handle, GFP_KERNEL);
980 if (err) { 1008 if (err) {
981 kfree(ht); 1009 kfree(ht);
982 return err; 1010 return err;
@@ -988,10 +1016,11 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
988 ht->handle = handle; 1016 ht->handle = handle;
989 ht->prio = tp->prio; 1017 ht->prio = tp->prio;
990 idr_init(&ht->handle_idr); 1018 idr_init(&ht->handle_idr);
1019 ht->flags = flags;
991 1020
992 err = u32_replace_hw_hnode(tp, ht, flags); 1021 err = u32_replace_hw_hnode(tp, ht, flags, extack);
993 if (err) { 1022 if (err) {
994 idr_remove_ext(&tp_c->handle_idr, handle); 1023 idr_remove(&tp_c->handle_idr, handle);
995 kfree(ht); 1024 kfree(ht);
996 return err; 1025 return err;
997 } 1026 }
@@ -1010,23 +1039,28 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
1010 htid = ht->handle; 1039 htid = ht->handle;
1011 } else { 1040 } else {
1012 ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid)); 1041 ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
1013 if (ht == NULL) 1042 if (!ht) {
1043 NL_SET_ERR_MSG_MOD(extack, "Specified hash table not found");
1014 return -EINVAL; 1044 return -EINVAL;
1045 }
1015 } 1046 }
1016 } else { 1047 } else {
1017 ht = rtnl_dereference(tp->root); 1048 ht = rtnl_dereference(tp->root);
1018 htid = ht->handle; 1049 htid = ht->handle;
1019 } 1050 }
1020 1051
1021 if (ht->divisor < TC_U32_HASH(htid)) 1052 if (ht->divisor < TC_U32_HASH(htid)) {
1053 NL_SET_ERR_MSG_MOD(extack, "Specified hash table buckets exceed configured value");
1022 return -EINVAL; 1054 return -EINVAL;
1055 }
1023 1056
1024 if (handle) { 1057 if (handle) {
1025 if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid)) 1058 if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
1059 NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");
1026 return -EINVAL; 1060 return -EINVAL;
1061 }
1027 handle = htid | TC_U32_NODE(handle); 1062 handle = htid | TC_U32_NODE(handle);
1028 err = idr_alloc_ext(&ht->handle_idr, NULL, NULL, 1063 err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle,
1029 handle, handle + 1,
1030 GFP_KERNEL); 1064 GFP_KERNEL);
1031 if (err) 1065 if (err)
1032 return err; 1066 return err;
@@ -1034,6 +1068,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
1034 handle = gen_new_kid(ht, htid); 1068 handle = gen_new_kid(ht, htid);
1035 1069
1036 if (tb[TCA_U32_SEL] == NULL) { 1070 if (tb[TCA_U32_SEL] == NULL) {
1071 NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
1037 err = -EINVAL; 1072 err = -EINVAL;
1038 goto erridr; 1073 goto erridr;
1039 } 1074 }
@@ -1082,12 +1117,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
1082 } 1117 }
1083#endif 1118#endif
1084 1119
1085 err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr); 1120 err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr,
1121 extack);
1086 if (err == 0) { 1122 if (err == 0) {
1087 struct tc_u_knode __rcu **ins; 1123 struct tc_u_knode __rcu **ins;
1088 struct tc_u_knode *pins; 1124 struct tc_u_knode *pins;
1089 1125
1090 err = u32_replace_hw_knode(tp, n, flags); 1126 err = u32_replace_hw_knode(tp, n, flags, extack);
1091 if (err) 1127 if (err)
1092 goto errhw; 1128 goto errhw;
1093 1129
@@ -1119,7 +1155,7 @@ errfree:
1119#endif 1155#endif
1120 kfree(n); 1156 kfree(n);
1121erridr: 1157erridr:
1122 idr_remove_ext(&ht->handle_idr, handle); 1158 idr_remove(&ht->handle_idr, handle);
1123 return err; 1159 return err;
1124} 1160}
1125 1161
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 52529b7f8d96..d512f49ee83c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -393,13 +393,16 @@ static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
393static struct qdisc_rate_table *qdisc_rtab_list; 393static struct qdisc_rate_table *qdisc_rtab_list;
394 394
395struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, 395struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
396 struct nlattr *tab) 396 struct nlattr *tab,
397 struct netlink_ext_ack *extack)
397{ 398{
398 struct qdisc_rate_table *rtab; 399 struct qdisc_rate_table *rtab;
399 400
400 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || 401 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
401 nla_len(tab) != TC_RTAB_SIZE) 402 nla_len(tab) != TC_RTAB_SIZE) {
403 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
402 return NULL; 404 return NULL;
405 }
403 406
404 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) { 407 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
405 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) && 408 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
@@ -418,6 +421,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
418 r->linklayer = __detect_linklayer(r, rtab->data); 421 r->linklayer = __detect_linklayer(r, rtab->data);
419 rtab->next = qdisc_rtab_list; 422 rtab->next = qdisc_rtab_list;
420 qdisc_rtab_list = rtab; 423 qdisc_rtab_list = rtab;
424 } else {
425 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
421 } 426 }
422 return rtab; 427 return rtab;
423} 428}
@@ -449,7 +454,8 @@ static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
449 [TCA_STAB_DATA] = { .type = NLA_BINARY }, 454 [TCA_STAB_DATA] = { .type = NLA_BINARY },
450}; 455};
451 456
452static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) 457static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
458 struct netlink_ext_ack *extack)
453{ 459{
454 struct nlattr *tb[TCA_STAB_MAX + 1]; 460 struct nlattr *tb[TCA_STAB_MAX + 1];
455 struct qdisc_size_table *stab; 461 struct qdisc_size_table *stab;
@@ -458,23 +464,29 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
458 u16 *tab = NULL; 464 u16 *tab = NULL;
459 int err; 465 int err;
460 466
461 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, NULL); 467 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, extack);
462 if (err < 0) 468 if (err < 0)
463 return ERR_PTR(err); 469 return ERR_PTR(err);
464 if (!tb[TCA_STAB_BASE]) 470 if (!tb[TCA_STAB_BASE]) {
471 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
465 return ERR_PTR(-EINVAL); 472 return ERR_PTR(-EINVAL);
473 }
466 474
467 s = nla_data(tb[TCA_STAB_BASE]); 475 s = nla_data(tb[TCA_STAB_BASE]);
468 476
469 if (s->tsize > 0) { 477 if (s->tsize > 0) {
470 if (!tb[TCA_STAB_DATA]) 478 if (!tb[TCA_STAB_DATA]) {
479 NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
471 return ERR_PTR(-EINVAL); 480 return ERR_PTR(-EINVAL);
481 }
472 tab = nla_data(tb[TCA_STAB_DATA]); 482 tab = nla_data(tb[TCA_STAB_DATA]);
473 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); 483 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
474 } 484 }
475 485
476 if (tsize != s->tsize || (!tab && tsize > 0)) 486 if (tsize != s->tsize || (!tab && tsize > 0)) {
487 NL_SET_ERR_MSG(extack, "Invalid size of size table");
477 return ERR_PTR(-EINVAL); 488 return ERR_PTR(-EINVAL);
489 }
478 490
479 list_for_each_entry(stab, &qdisc_stab_list, list) { 491 list_for_each_entry(stab, &qdisc_stab_list, list) {
480 if (memcmp(&stab->szopts, s, sizeof(*s))) 492 if (memcmp(&stab->szopts, s, sizeof(*s)))
@@ -669,7 +681,7 @@ int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
669 unsigned int size = 4; 681 unsigned int size = 4;
670 682
671 clhash->hash = qdisc_class_hash_alloc(size); 683 clhash->hash = qdisc_class_hash_alloc(size);
672 if (clhash->hash == NULL) 684 if (!clhash->hash)
673 return -ENOMEM; 685 return -ENOMEM;
674 clhash->hashsize = size; 686 clhash->hashsize = size;
675 clhash->hashmask = size - 1; 687 clhash->hashmask = size - 1;
@@ -779,6 +791,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
779 unsigned char *b = skb_tail_pointer(skb); 791 unsigned char *b = skb_tail_pointer(skb);
780 struct gnet_dump d; 792 struct gnet_dump d;
781 struct qdisc_size_table *stab; 793 struct qdisc_size_table *stab;
794 u32 block_index;
782 __u32 qlen; 795 __u32 qlen;
783 796
784 cond_resched(); 797 cond_resched();
@@ -795,11 +808,23 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
795 tcm->tcm_info = refcount_read(&q->refcnt); 808 tcm->tcm_info = refcount_read(&q->refcnt);
796 if (nla_put_string(skb, TCA_KIND, q->ops->id)) 809 if (nla_put_string(skb, TCA_KIND, q->ops->id))
797 goto nla_put_failure; 810 goto nla_put_failure;
798 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED))) 811 if (q->ops->ingress_block_get) {
799 goto nla_put_failure; 812 block_index = q->ops->ingress_block_get(q);
813 if (block_index &&
814 nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
815 goto nla_put_failure;
816 }
817 if (q->ops->egress_block_get) {
818 block_index = q->ops->egress_block_get(q);
819 if (block_index &&
820 nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
821 goto nla_put_failure;
822 }
800 if (q->ops->dump && q->ops->dump(q, skb) < 0) 823 if (q->ops->dump && q->ops->dump(q, skb) < 0)
801 goto nla_put_failure; 824 goto nla_put_failure;
802 qlen = q->q.qlen; 825 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
826 goto nla_put_failure;
827 qlen = qdisc_qlen_sum(q);
803 828
804 stab = rtnl_dereference(q->stab); 829 stab = rtnl_dereference(q->stab);
805 if (stab && qdisc_dump_stab(skb, stab) < 0) 830 if (stab && qdisc_dump_stab(skb, stab) < 0)
@@ -898,7 +923,8 @@ static void notify_and_destroy(struct net *net, struct sk_buff *skb,
898 923
899static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, 924static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
900 struct sk_buff *skb, struct nlmsghdr *n, u32 classid, 925 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
901 struct Qdisc *new, struct Qdisc *old) 926 struct Qdisc *new, struct Qdisc *old,
927 struct netlink_ext_ack *extack)
902{ 928{
903 struct Qdisc *q = old; 929 struct Qdisc *q = old;
904 struct net *net = dev_net(dev); 930 struct net *net = dev_net(dev);
@@ -913,8 +939,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
913 (new && new->flags & TCQ_F_INGRESS)) { 939 (new && new->flags & TCQ_F_INGRESS)) {
914 num_q = 1; 940 num_q = 1;
915 ingress = 1; 941 ingress = 1;
916 if (!dev_ingress_queue(dev)) 942 if (!dev_ingress_queue(dev)) {
943 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
917 return -ENOENT; 944 return -ENOENT;
945 }
918 } 946 }
919 947
920 if (dev->flags & IFF_UP) 948 if (dev->flags & IFF_UP)
@@ -956,14 +984,22 @@ skip:
956 } else { 984 } else {
957 const struct Qdisc_class_ops *cops = parent->ops->cl_ops; 985 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
958 986
987 /* Only support running class lockless if parent is lockless */
988 if (new && (new->flags & TCQ_F_NOLOCK) &&
989 parent && !(parent->flags & TCQ_F_NOLOCK))
990 new->flags &= ~TCQ_F_NOLOCK;
991
959 err = -EOPNOTSUPP; 992 err = -EOPNOTSUPP;
960 if (cops && cops->graft) { 993 if (cops && cops->graft) {
961 unsigned long cl = cops->find(parent, classid); 994 unsigned long cl = cops->find(parent, classid);
962 995
963 if (cl) 996 if (cl) {
964 err = cops->graft(parent, cl, new, &old); 997 err = cops->graft(parent, cl, new, &old,
965 else 998 extack);
999 } else {
1000 NL_SET_ERR_MSG(extack, "Specified class not found");
966 err = -ENOENT; 1001 err = -ENOENT;
1002 }
967 } 1003 }
968 if (!err) 1004 if (!err)
969 notify_and_destroy(net, skb, n, classid, old, new); 1005 notify_and_destroy(net, skb, n, classid, old, new);
@@ -971,6 +1007,40 @@ skip:
971 return err; 1007 return err;
972} 1008}
973 1009
1010static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1011 struct netlink_ext_ack *extack)
1012{
1013 u32 block_index;
1014
1015 if (tca[TCA_INGRESS_BLOCK]) {
1016 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1017
1018 if (!block_index) {
1019 NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1020 return -EINVAL;
1021 }
1022 if (!sch->ops->ingress_block_set) {
1023 NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1024 return -EOPNOTSUPP;
1025 }
1026 sch->ops->ingress_block_set(sch, block_index);
1027 }
1028 if (tca[TCA_EGRESS_BLOCK]) {
1029 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1030
1031 if (!block_index) {
1032 NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1033 return -EINVAL;
1034 }
1035 if (!sch->ops->egress_block_set) {
1036 NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1037 return -EOPNOTSUPP;
1038 }
1039 sch->ops->egress_block_set(sch, block_index);
1040 }
1041 return 0;
1042}
1043
974/* lockdep annotation is needed for ingress; egress gets it only for name */ 1044/* lockdep annotation is needed for ingress; egress gets it only for name */
975static struct lock_class_key qdisc_tx_lock; 1045static struct lock_class_key qdisc_tx_lock;
976static struct lock_class_key qdisc_rx_lock; 1046static struct lock_class_key qdisc_rx_lock;
@@ -984,7 +1054,8 @@ static struct lock_class_key qdisc_rx_lock;
984static struct Qdisc *qdisc_create(struct net_device *dev, 1054static struct Qdisc *qdisc_create(struct net_device *dev,
985 struct netdev_queue *dev_queue, 1055 struct netdev_queue *dev_queue,
986 struct Qdisc *p, u32 parent, u32 handle, 1056 struct Qdisc *p, u32 parent, u32 handle,
987 struct nlattr **tca, int *errp) 1057 struct nlattr **tca, int *errp,
1058 struct netlink_ext_ack *extack)
988{ 1059{
989 int err; 1060 int err;
990 struct nlattr *kind = tca[TCA_KIND]; 1061 struct nlattr *kind = tca[TCA_KIND];
@@ -1022,10 +1093,12 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
1022#endif 1093#endif
1023 1094
1024 err = -ENOENT; 1095 err = -ENOENT;
1025 if (ops == NULL) 1096 if (!ops) {
1097 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1026 goto err_out; 1098 goto err_out;
1099 }
1027 1100
1028 sch = qdisc_alloc(dev_queue, ops); 1101 sch = qdisc_alloc(dev_queue, ops, extack);
1029 if (IS_ERR(sch)) { 1102 if (IS_ERR(sch)) {
1030 err = PTR_ERR(sch); 1103 err = PTR_ERR(sch);
1031 goto err_out2; 1104 goto err_out2;
@@ -1062,43 +1135,57 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
1062 netdev_info(dev, "Caught tx_queue_len zero misconfig\n"); 1135 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1063 } 1136 }
1064 1137
1065 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { 1138 err = qdisc_block_indexes_set(sch, tca, extack);
1066 if (tca[TCA_STAB]) { 1139 if (err)
1067 stab = qdisc_get_stab(tca[TCA_STAB]); 1140 goto err_out3;
1068 if (IS_ERR(stab)) { 1141
1069 err = PTR_ERR(stab); 1142 if (ops->init) {
1070 goto err_out4; 1143 err = ops->init(sch, tca[TCA_OPTIONS], extack);
1071 } 1144 if (err != 0)
1072 rcu_assign_pointer(sch->stab, stab); 1145 goto err_out5;
1073 } 1146 }
1074 if (tca[TCA_RATE]) { 1147
1075 seqcount_t *running; 1148 if (tca[TCA_STAB]) {
1076 1149 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1077 err = -EOPNOTSUPP; 1150 if (IS_ERR(stab)) {
1078 if (sch->flags & TCQ_F_MQROOT) 1151 err = PTR_ERR(stab);
1079 goto err_out4; 1152 goto err_out4;
1080
1081 if ((sch->parent != TC_H_ROOT) &&
1082 !(sch->flags & TCQ_F_INGRESS) &&
1083 (!p || !(p->flags & TCQ_F_MQROOT)))
1084 running = qdisc_root_sleeping_running(sch);
1085 else
1086 running = &sch->running;
1087
1088 err = gen_new_estimator(&sch->bstats,
1089 sch->cpu_bstats,
1090 &sch->rate_est,
1091 NULL,
1092 running,
1093 tca[TCA_RATE]);
1094 if (err)
1095 goto err_out4;
1096 } 1153 }
1154 rcu_assign_pointer(sch->stab, stab);
1155 }
1156 if (tca[TCA_RATE]) {
1157 seqcount_t *running;
1097 1158
1098 qdisc_hash_add(sch, false); 1159 err = -EOPNOTSUPP;
1160 if (sch->flags & TCQ_F_MQROOT) {
1161 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1162 goto err_out4;
1163 }
1099 1164
1100 return sch; 1165 if (sch->parent != TC_H_ROOT &&
1166 !(sch->flags & TCQ_F_INGRESS) &&
1167 (!p || !(p->flags & TCQ_F_MQROOT)))
1168 running = qdisc_root_sleeping_running(sch);
1169 else
1170 running = &sch->running;
1171
1172 err = gen_new_estimator(&sch->bstats,
1173 sch->cpu_bstats,
1174 &sch->rate_est,
1175 NULL,
1176 running,
1177 tca[TCA_RATE]);
1178 if (err) {
1179 NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1180 goto err_out4;
1181 }
1101 } 1182 }
1183
1184 qdisc_hash_add(sch, false);
1185
1186 return sch;
1187
1188err_out5:
1102 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */ 1189 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1103 if (ops->destroy) 1190 if (ops->destroy)
1104 ops->destroy(sch); 1191 ops->destroy(sch);
@@ -1122,21 +1209,28 @@ err_out4:
1122 goto err_out3; 1209 goto err_out3;
1123} 1210}
1124 1211
1125static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) 1212static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1213 struct netlink_ext_ack *extack)
1126{ 1214{
1127 struct qdisc_size_table *ostab, *stab = NULL; 1215 struct qdisc_size_table *ostab, *stab = NULL;
1128 int err = 0; 1216 int err = 0;
1129 1217
1130 if (tca[TCA_OPTIONS]) { 1218 if (tca[TCA_OPTIONS]) {
1131 if (sch->ops->change == NULL) 1219 if (!sch->ops->change) {
1220 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1132 return -EINVAL; 1221 return -EINVAL;
1133 err = sch->ops->change(sch, tca[TCA_OPTIONS]); 1222 }
1223 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1224 NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1225 return -EOPNOTSUPP;
1226 }
1227 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1134 if (err) 1228 if (err)
1135 return err; 1229 return err;
1136 } 1230 }
1137 1231
1138 if (tca[TCA_STAB]) { 1232 if (tca[TCA_STAB]) {
1139 stab = qdisc_get_stab(tca[TCA_STAB]); 1233 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1140 if (IS_ERR(stab)) 1234 if (IS_ERR(stab))
1141 return PTR_ERR(stab); 1235 return PTR_ERR(stab);
1142 } 1236 }
@@ -1234,8 +1328,10 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1234 if (clid != TC_H_ROOT) { 1328 if (clid != TC_H_ROOT) {
1235 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { 1329 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1236 p = qdisc_lookup(dev, TC_H_MAJ(clid)); 1330 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1237 if (!p) 1331 if (!p) {
1332 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1238 return -ENOENT; 1333 return -ENOENT;
1334 }
1239 q = qdisc_leaf(p, clid); 1335 q = qdisc_leaf(p, clid);
1240 } else if (dev_ingress_queue(dev)) { 1336 } else if (dev_ingress_queue(dev)) {
1241 q = dev_ingress_queue(dev)->qdisc_sleeping; 1337 q = dev_ingress_queue(dev)->qdisc_sleeping;
@@ -1243,26 +1339,38 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1243 } else { 1339 } else {
1244 q = dev->qdisc; 1340 q = dev->qdisc;
1245 } 1341 }
1246 if (!q) 1342 if (!q) {
1343 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1247 return -ENOENT; 1344 return -ENOENT;
1345 }
1248 1346
1249 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) 1347 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1348 NL_SET_ERR_MSG(extack, "Invalid handle");
1250 return -EINVAL; 1349 return -EINVAL;
1350 }
1251 } else { 1351 } else {
1252 q = qdisc_lookup(dev, tcm->tcm_handle); 1352 q = qdisc_lookup(dev, tcm->tcm_handle);
1253 if (!q) 1353 if (!q) {
1354 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1254 return -ENOENT; 1355 return -ENOENT;
1356 }
1255 } 1357 }
1256 1358
1257 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) 1359 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1360 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1258 return -EINVAL; 1361 return -EINVAL;
1362 }
1259 1363
1260 if (n->nlmsg_type == RTM_DELQDISC) { 1364 if (n->nlmsg_type == RTM_DELQDISC) {
1261 if (!clid) 1365 if (!clid) {
1366 NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1262 return -EINVAL; 1367 return -EINVAL;
1263 if (q->handle == 0) 1368 }
1369 if (q->handle == 0) {
1370 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1264 return -ENOENT; 1371 return -ENOENT;
1265 err = qdisc_graft(dev, p, skb, n, clid, NULL, q); 1372 }
1373 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1266 if (err != 0) 1374 if (err != 0)
1267 return err; 1375 return err;
1268 } else { 1376 } else {
@@ -1308,8 +1416,10 @@ replay:
1308 if (clid != TC_H_ROOT) { 1416 if (clid != TC_H_ROOT) {
1309 if (clid != TC_H_INGRESS) { 1417 if (clid != TC_H_INGRESS) {
1310 p = qdisc_lookup(dev, TC_H_MAJ(clid)); 1418 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1311 if (!p) 1419 if (!p) {
1420 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1312 return -ENOENT; 1421 return -ENOENT;
1422 }
1313 q = qdisc_leaf(p, clid); 1423 q = qdisc_leaf(p, clid);
1314 } else if (dev_ingress_queue_create(dev)) { 1424 } else if (dev_ingress_queue_create(dev)) {
1315 q = dev_ingress_queue(dev)->qdisc_sleeping; 1425 q = dev_ingress_queue(dev)->qdisc_sleeping;
@@ -1324,20 +1434,31 @@ replay:
1324 1434
1325 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { 1435 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1326 if (tcm->tcm_handle) { 1436 if (tcm->tcm_handle) {
1327 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) 1437 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1438 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1328 return -EEXIST; 1439 return -EEXIST;
1329 if (TC_H_MIN(tcm->tcm_handle)) 1440 }
1441 if (TC_H_MIN(tcm->tcm_handle)) {
1442 NL_SET_ERR_MSG(extack, "Invalid minor handle");
1330 return -EINVAL; 1443 return -EINVAL;
1444 }
1331 q = qdisc_lookup(dev, tcm->tcm_handle); 1445 q = qdisc_lookup(dev, tcm->tcm_handle);
1332 if (!q) 1446 if (!q)
1333 goto create_n_graft; 1447 goto create_n_graft;
1334 if (n->nlmsg_flags & NLM_F_EXCL) 1448 if (n->nlmsg_flags & NLM_F_EXCL) {
1449 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1335 return -EEXIST; 1450 return -EEXIST;
1336 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) 1451 }
1452 if (tca[TCA_KIND] &&
1453 nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1454 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1337 return -EINVAL; 1455 return -EINVAL;
1456 }
1338 if (q == p || 1457 if (q == p ||
1339 (p && check_loop(q, p, 0))) 1458 (p && check_loop(q, p, 0))) {
1459 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1340 return -ELOOP; 1460 return -ELOOP;
1461 }
1341 qdisc_refcount_inc(q); 1462 qdisc_refcount_inc(q);
1342 goto graft; 1463 goto graft;
1343 } else { 1464 } else {
@@ -1372,33 +1493,45 @@ replay:
1372 } 1493 }
1373 } 1494 }
1374 } else { 1495 } else {
1375 if (!tcm->tcm_handle) 1496 if (!tcm->tcm_handle) {
1497 NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1376 return -EINVAL; 1498 return -EINVAL;
1499 }
1377 q = qdisc_lookup(dev, tcm->tcm_handle); 1500 q = qdisc_lookup(dev, tcm->tcm_handle);
1378 } 1501 }
1379 1502
1380 /* Change qdisc parameters */ 1503 /* Change qdisc parameters */
1381 if (q == NULL) 1504 if (!q) {
1505 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1382 return -ENOENT; 1506 return -ENOENT;
1383 if (n->nlmsg_flags & NLM_F_EXCL) 1507 }
1508 if (n->nlmsg_flags & NLM_F_EXCL) {
1509 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1384 return -EEXIST; 1510 return -EEXIST;
1385 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) 1511 }
1512 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1513 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1386 return -EINVAL; 1514 return -EINVAL;
1387 err = qdisc_change(q, tca); 1515 }
1516 err = qdisc_change(q, tca, extack);
1388 if (err == 0) 1517 if (err == 0)
1389 qdisc_notify(net, skb, n, clid, NULL, q); 1518 qdisc_notify(net, skb, n, clid, NULL, q);
1390 return err; 1519 return err;
1391 1520
1392create_n_graft: 1521create_n_graft:
1393 if (!(n->nlmsg_flags & NLM_F_CREATE)) 1522 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1523 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1394 return -ENOENT; 1524 return -ENOENT;
1525 }
1395 if (clid == TC_H_INGRESS) { 1526 if (clid == TC_H_INGRESS) {
1396 if (dev_ingress_queue(dev)) 1527 if (dev_ingress_queue(dev)) {
1397 q = qdisc_create(dev, dev_ingress_queue(dev), p, 1528 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1398 tcm->tcm_parent, tcm->tcm_parent, 1529 tcm->tcm_parent, tcm->tcm_parent,
1399 tca, &err); 1530 tca, &err, extack);
1400 else 1531 } else {
1532 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1401 err = -ENOENT; 1533 err = -ENOENT;
1534 }
1402 } else { 1535 } else {
1403 struct netdev_queue *dev_queue; 1536 struct netdev_queue *dev_queue;
1404 1537
@@ -1411,7 +1544,7 @@ create_n_graft:
1411 1544
1412 q = qdisc_create(dev, dev_queue, p, 1545 q = qdisc_create(dev, dev_queue, p,
1413 tcm->tcm_parent, tcm->tcm_handle, 1546 tcm->tcm_parent, tcm->tcm_handle,
1414 tca, &err); 1547 tca, &err, extack);
1415 } 1548 }
1416 if (q == NULL) { 1549 if (q == NULL) {
1417 if (err == -EAGAIN) 1550 if (err == -EAGAIN)
@@ -1420,7 +1553,7 @@ create_n_graft:
1420 } 1553 }
1421 1554
1422graft: 1555graft:
1423 err = qdisc_graft(dev, p, skb, n, clid, q, NULL); 1556 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1424 if (err) { 1557 if (err) {
1425 if (q) 1558 if (q)
1426 qdisc_destroy(q); 1559 qdisc_destroy(q);
@@ -1672,7 +1805,7 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1672 cl = cops->find(q, portid); 1805 cl = cops->find(q, portid);
1673 if (!cl) 1806 if (!cl)
1674 return; 1807 return;
1675 block = cops->tcf_block(q, cl); 1808 block = cops->tcf_block(q, cl, NULL);
1676 if (!block) 1809 if (!block)
1677 return; 1810 return;
1678 list_for_each_entry(chain, &block->chain_list, list) { 1811 list_for_each_entry(chain, &block->chain_list, list) {
@@ -1816,10 +1949,15 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1816 } 1949 }
1817 } 1950 }
1818 1951
1952 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1953 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
1954 return -EOPNOTSUPP;
1955 }
1956
1819 new_cl = cl; 1957 new_cl = cl;
1820 err = -EOPNOTSUPP; 1958 err = -EOPNOTSUPP;
1821 if (cops->change) 1959 if (cops->change)
1822 err = cops->change(q, clid, portid, tca, &new_cl); 1960 err = cops->change(q, clid, portid, tca, &new_cl, extack);
1823 if (err == 0) { 1961 if (err == 0) {
1824 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); 1962 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1825 /* We just create a new class, need to do reverse binding. */ 1963 /* We just create a new class, need to do reverse binding. */
@@ -1955,7 +2093,6 @@ static int psched_open(struct inode *inode, struct file *file)
1955} 2093}
1956 2094
1957static const struct file_operations psched_fops = { 2095static const struct file_operations psched_fops = {
1958 .owner = THIS_MODULE,
1959 .open = psched_open, 2096 .open = psched_open,
1960 .read = seq_read, 2097 .read = seq_read,
1961 .llseek = seq_lseek, 2098 .llseek = seq_lseek,
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 2dbd249c0b2f..cd49afca9617 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -82,7 +82,8 @@ static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
82} 82}
83 83
84static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, 84static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
85 struct Qdisc *new, struct Qdisc **old) 85 struct Qdisc *new, struct Qdisc **old,
86 struct netlink_ext_ack *extack)
86{ 87{
87 struct atm_qdisc_data *p = qdisc_priv(sch); 88 struct atm_qdisc_data *p = qdisc_priv(sch);
88 struct atm_flow_data *flow = (struct atm_flow_data *)arg; 89 struct atm_flow_data *flow = (struct atm_flow_data *)arg;
@@ -191,7 +192,8 @@ static const struct nla_policy atm_policy[TCA_ATM_MAX + 1] = {
191}; 192};
192 193
193static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, 194static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
194 struct nlattr **tca, unsigned long *arg) 195 struct nlattr **tca, unsigned long *arg,
196 struct netlink_ext_ack *extack)
195{ 197{
196 struct atm_qdisc_data *p = qdisc_priv(sch); 198 struct atm_qdisc_data *p = qdisc_priv(sch);
197 struct atm_flow_data *flow = (struct atm_flow_data *)*arg; 199 struct atm_flow_data *flow = (struct atm_flow_data *)*arg;
@@ -281,13 +283,15 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
281 goto err_out; 283 goto err_out;
282 } 284 }
283 285
284 error = tcf_block_get(&flow->block, &flow->filter_list, sch); 286 error = tcf_block_get(&flow->block, &flow->filter_list, sch,
287 extack);
285 if (error) { 288 if (error) {
286 kfree(flow); 289 kfree(flow);
287 goto err_out; 290 goto err_out;
288 } 291 }
289 292
290 flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); 293 flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid,
294 extack);
291 if (!flow->q) 295 if (!flow->q)
292 flow->q = &noop_qdisc; 296 flow->q = &noop_qdisc;
293 pr_debug("atm_tc_change: qdisc %p\n", flow->q); 297 pr_debug("atm_tc_change: qdisc %p\n", flow->q);
@@ -356,7 +360,8 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
356 } 360 }
357} 361}
358 362
359static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl) 363static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl,
364 struct netlink_ext_ack *extack)
360{ 365{
361 struct atm_qdisc_data *p = qdisc_priv(sch); 366 struct atm_qdisc_data *p = qdisc_priv(sch);
362 struct atm_flow_data *flow = (struct atm_flow_data *)cl; 367 struct atm_flow_data *flow = (struct atm_flow_data *)cl;
@@ -531,7 +536,8 @@ static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
531 return p->link.q->ops->peek(p->link.q); 536 return p->link.q->ops->peek(p->link.q);
532} 537}
533 538
534static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) 539static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt,
540 struct netlink_ext_ack *extack)
535{ 541{
536 struct atm_qdisc_data *p = qdisc_priv(sch); 542 struct atm_qdisc_data *p = qdisc_priv(sch);
537 int err; 543 int err;
@@ -541,12 +547,13 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
541 INIT_LIST_HEAD(&p->link.list); 547 INIT_LIST_HEAD(&p->link.list);
542 list_add(&p->link.list, &p->flows); 548 list_add(&p->link.list, &p->flows);
543 p->link.q = qdisc_create_dflt(sch->dev_queue, 549 p->link.q = qdisc_create_dflt(sch->dev_queue,
544 &pfifo_qdisc_ops, sch->handle); 550 &pfifo_qdisc_ops, sch->handle, extack);
545 if (!p->link.q) 551 if (!p->link.q)
546 p->link.q = &noop_qdisc; 552 p->link.q = &noop_qdisc;
547 pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); 553 pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
548 554
549 err = tcf_block_get(&p->link.block, &p->link.filter_list, sch); 555 err = tcf_block_get(&p->link.block, &p->link.filter_list, sch,
556 extack);
550 if (err) 557 if (err)
551 return err; 558 return err;
552 559
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 525eb3a6d625..f42025d53cfe 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1132,7 +1132,8 @@ static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1132 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) }, 1132 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1133}; 1133};
1134 1134
1135static int cbq_init(struct Qdisc *sch, struct nlattr *opt) 1135static int cbq_init(struct Qdisc *sch, struct nlattr *opt,
1136 struct netlink_ext_ack *extack)
1136{ 1137{
1137 struct cbq_sched_data *q = qdisc_priv(sch); 1138 struct cbq_sched_data *q = qdisc_priv(sch);
1138 struct nlattr *tb[TCA_CBQ_MAX + 1]; 1139 struct nlattr *tb[TCA_CBQ_MAX + 1];
@@ -1143,22 +1144,27 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1143 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); 1144 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
1144 q->delay_timer.function = cbq_undelay; 1145 q->delay_timer.function = cbq_undelay;
1145 1146
1146 if (!opt) 1147 if (!opt) {
1148 NL_SET_ERR_MSG(extack, "CBQ options are required for this operation");
1147 return -EINVAL; 1149 return -EINVAL;
1150 }
1148 1151
1149 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL); 1152 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, extack);
1150 if (err < 0) 1153 if (err < 0)
1151 return err; 1154 return err;
1152 1155
1153 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL) 1156 if (!tb[TCA_CBQ_RTAB] || !tb[TCA_CBQ_RATE]) {
1157 NL_SET_ERR_MSG(extack, "Rate specification missing or incomplete");
1154 return -EINVAL; 1158 return -EINVAL;
1159 }
1155 1160
1156 r = nla_data(tb[TCA_CBQ_RATE]); 1161 r = nla_data(tb[TCA_CBQ_RATE]);
1157 1162
1158 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL) 1163 q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB], extack);
1164 if (!q->link.R_tab)
1159 return -EINVAL; 1165 return -EINVAL;
1160 1166
1161 err = tcf_block_get(&q->link.block, &q->link.filter_list, sch); 1167 err = tcf_block_get(&q->link.block, &q->link.filter_list, sch, extack);
1162 if (err) 1168 if (err)
1163 goto put_rtab; 1169 goto put_rtab;
1164 1170
@@ -1170,7 +1176,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1170 q->link.common.classid = sch->handle; 1176 q->link.common.classid = sch->handle;
1171 q->link.qdisc = sch; 1177 q->link.qdisc = sch;
1172 q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, 1178 q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1173 sch->handle); 1179 sch->handle, NULL);
1174 if (!q->link.q) 1180 if (!q->link.q)
1175 q->link.q = &noop_qdisc; 1181 q->link.q = &noop_qdisc;
1176 else 1182 else
@@ -1369,13 +1375,13 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1369} 1375}
1370 1376
1371static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 1377static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1372 struct Qdisc **old) 1378 struct Qdisc **old, struct netlink_ext_ack *extack)
1373{ 1379{
1374 struct cbq_class *cl = (struct cbq_class *)arg; 1380 struct cbq_class *cl = (struct cbq_class *)arg;
1375 1381
1376 if (new == NULL) { 1382 if (new == NULL) {
1377 new = qdisc_create_dflt(sch->dev_queue, 1383 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1378 &pfifo_qdisc_ops, cl->common.classid); 1384 cl->common.classid, extack);
1379 if (new == NULL) 1385 if (new == NULL)
1380 return -ENOBUFS; 1386 return -ENOBUFS;
1381 } 1387 }
@@ -1450,7 +1456,7 @@ static void cbq_destroy(struct Qdisc *sch)
1450 1456
1451static int 1457static int
1452cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, 1458cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1453 unsigned long *arg) 1459 unsigned long *arg, struct netlink_ext_ack *extack)
1454{ 1460{
1455 int err; 1461 int err;
1456 struct cbq_sched_data *q = qdisc_priv(sch); 1462 struct cbq_sched_data *q = qdisc_priv(sch);
@@ -1460,29 +1466,37 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1460 struct cbq_class *parent; 1466 struct cbq_class *parent;
1461 struct qdisc_rate_table *rtab = NULL; 1467 struct qdisc_rate_table *rtab = NULL;
1462 1468
1463 if (opt == NULL) 1469 if (!opt) {
1470 NL_SET_ERR_MSG(extack, "Mandatory qdisc options missing");
1464 return -EINVAL; 1471 return -EINVAL;
1472 }
1465 1473
1466 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL); 1474 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, extack);
1467 if (err < 0) 1475 if (err < 0)
1468 return err; 1476 return err;
1469 1477
1470 if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE]) 1478 if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE]) {
1479 NL_SET_ERR_MSG(extack, "Neither overlimit strategy nor policing attributes can be used for changing class params");
1471 return -EOPNOTSUPP; 1480 return -EOPNOTSUPP;
1481 }
1472 1482
1473 if (cl) { 1483 if (cl) {
1474 /* Check parent */ 1484 /* Check parent */
1475 if (parentid) { 1485 if (parentid) {
1476 if (cl->tparent && 1486 if (cl->tparent &&
1477 cl->tparent->common.classid != parentid) 1487 cl->tparent->common.classid != parentid) {
1488 NL_SET_ERR_MSG(extack, "Invalid parent id");
1478 return -EINVAL; 1489 return -EINVAL;
1479 if (!cl->tparent && parentid != TC_H_ROOT) 1490 }
1491 if (!cl->tparent && parentid != TC_H_ROOT) {
1492 NL_SET_ERR_MSG(extack, "Parent must be root");
1480 return -EINVAL; 1493 return -EINVAL;
1494 }
1481 } 1495 }
1482 1496
1483 if (tb[TCA_CBQ_RATE]) { 1497 if (tb[TCA_CBQ_RATE]) {
1484 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), 1498 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1485 tb[TCA_CBQ_RTAB]); 1499 tb[TCA_CBQ_RTAB], extack);
1486 if (rtab == NULL) 1500 if (rtab == NULL)
1487 return -EINVAL; 1501 return -EINVAL;
1488 } 1502 }
@@ -1494,6 +1508,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1494 qdisc_root_sleeping_running(sch), 1508 qdisc_root_sleeping_running(sch),
1495 tca[TCA_RATE]); 1509 tca[TCA_RATE]);
1496 if (err) { 1510 if (err) {
1511 NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator");
1497 qdisc_put_rtab(rtab); 1512 qdisc_put_rtab(rtab);
1498 return err; 1513 return err;
1499 } 1514 }
@@ -1532,19 +1547,23 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1532 if (parentid == TC_H_ROOT) 1547 if (parentid == TC_H_ROOT)
1533 return -EINVAL; 1548 return -EINVAL;
1534 1549
1535 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL || 1550 if (!tb[TCA_CBQ_WRROPT] || !tb[TCA_CBQ_RATE] || !tb[TCA_CBQ_LSSOPT]) {
1536 tb[TCA_CBQ_LSSOPT] == NULL) 1551 NL_SET_ERR_MSG(extack, "One of the following attributes MUST be specified: WRR, rate or link sharing");
1537 return -EINVAL; 1552 return -EINVAL;
1553 }
1538 1554
1539 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]); 1555 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB],
1556 extack);
1540 if (rtab == NULL) 1557 if (rtab == NULL)
1541 return -EINVAL; 1558 return -EINVAL;
1542 1559
1543 if (classid) { 1560 if (classid) {
1544 err = -EINVAL; 1561 err = -EINVAL;
1545 if (TC_H_MAJ(classid ^ sch->handle) || 1562 if (TC_H_MAJ(classid ^ sch->handle) ||
1546 cbq_class_lookup(q, classid)) 1563 cbq_class_lookup(q, classid)) {
1564 NL_SET_ERR_MSG(extack, "Specified class not found");
1547 goto failure; 1565 goto failure;
1566 }
1548 } else { 1567 } else {
1549 int i; 1568 int i;
1550 classid = TC_H_MAKE(sch->handle, 0x8000); 1569 classid = TC_H_MAKE(sch->handle, 0x8000);
@@ -1556,8 +1575,10 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1556 break; 1575 break;
1557 } 1576 }
1558 err = -ENOSR; 1577 err = -ENOSR;
1559 if (i >= 0x8000) 1578 if (i >= 0x8000) {
1579 NL_SET_ERR_MSG(extack, "Unable to generate classid");
1560 goto failure; 1580 goto failure;
1581 }
1561 classid = classid|q->hgenerator; 1582 classid = classid|q->hgenerator;
1562 } 1583 }
1563 1584
@@ -1565,8 +1586,10 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1565 if (parentid) { 1586 if (parentid) {
1566 parent = cbq_class_lookup(q, parentid); 1587 parent = cbq_class_lookup(q, parentid);
1567 err = -EINVAL; 1588 err = -EINVAL;
1568 if (parent == NULL) 1589 if (!parent) {
1590 NL_SET_ERR_MSG(extack, "Failed to find parentid");
1569 goto failure; 1591 goto failure;
1592 }
1570 } 1593 }
1571 1594
1572 err = -ENOBUFS; 1595 err = -ENOBUFS;
@@ -1574,7 +1597,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1574 if (cl == NULL) 1597 if (cl == NULL)
1575 goto failure; 1598 goto failure;
1576 1599
1577 err = tcf_block_get(&cl->block, &cl->filter_list, sch); 1600 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1578 if (err) { 1601 if (err) {
1579 kfree(cl); 1602 kfree(cl);
1580 return err; 1603 return err;
@@ -1586,6 +1609,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1586 qdisc_root_sleeping_running(sch), 1609 qdisc_root_sleeping_running(sch),
1587 tca[TCA_RATE]); 1610 tca[TCA_RATE]);
1588 if (err) { 1611 if (err) {
1612 NL_SET_ERR_MSG(extack, "Couldn't create new estimator");
1589 tcf_block_put(cl->block); 1613 tcf_block_put(cl->block);
1590 kfree(cl); 1614 kfree(cl);
1591 goto failure; 1615 goto failure;
@@ -1594,7 +1618,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1594 1618
1595 cl->R_tab = rtab; 1619 cl->R_tab = rtab;
1596 rtab = NULL; 1620 rtab = NULL;
1597 cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); 1621 cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid,
1622 NULL);
1598 if (!cl->q) 1623 if (!cl->q)
1599 cl->q = &noop_qdisc; 1624 cl->q = &noop_qdisc;
1600 else 1625 else
@@ -1678,7 +1703,8 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1678 return 0; 1703 return 0;
1679} 1704}
1680 1705
1681static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg) 1706static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg,
1707 struct netlink_ext_ack *extack)
1682{ 1708{
1683 struct cbq_sched_data *q = qdisc_priv(sch); 1709 struct cbq_sched_data *q = qdisc_priv(sch);
1684 struct cbq_class *cl = (struct cbq_class *)arg; 1710 struct cbq_class *cl = (struct cbq_class *)arg;
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index 7a72980c1509..cdd96b9a27bc 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -219,14 +219,17 @@ static void cbs_disable_offload(struct net_device *dev,
219} 219}
220 220
221static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q, 221static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
222 const struct tc_cbs_qopt *opt) 222 const struct tc_cbs_qopt *opt,
223 struct netlink_ext_ack *extack)
223{ 224{
224 const struct net_device_ops *ops = dev->netdev_ops; 225 const struct net_device_ops *ops = dev->netdev_ops;
225 struct tc_cbs_qopt_offload cbs = { }; 226 struct tc_cbs_qopt_offload cbs = { };
226 int err; 227 int err;
227 228
228 if (!ops->ndo_setup_tc) 229 if (!ops->ndo_setup_tc) {
230 NL_SET_ERR_MSG(extack, "Specified device does not support cbs offload");
229 return -EOPNOTSUPP; 231 return -EOPNOTSUPP;
232 }
230 233
231 cbs.queue = q->queue; 234 cbs.queue = q->queue;
232 235
@@ -237,8 +240,10 @@ static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
237 cbs.sendslope = opt->sendslope; 240 cbs.sendslope = opt->sendslope;
238 241
239 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs); 242 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
240 if (err < 0) 243 if (err < 0) {
244 NL_SET_ERR_MSG(extack, "Specified device failed to setup cbs hardware offload");
241 return err; 245 return err;
246 }
242 247
243 q->enqueue = cbs_enqueue_offload; 248 q->enqueue = cbs_enqueue_offload;
244 q->dequeue = cbs_dequeue_offload; 249 q->dequeue = cbs_dequeue_offload;
@@ -246,7 +251,8 @@ static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
246 return 0; 251 return 0;
247} 252}
248 253
249static int cbs_change(struct Qdisc *sch, struct nlattr *opt) 254static int cbs_change(struct Qdisc *sch, struct nlattr *opt,
255 struct netlink_ext_ack *extack)
250{ 256{
251 struct cbs_sched_data *q = qdisc_priv(sch); 257 struct cbs_sched_data *q = qdisc_priv(sch);
252 struct net_device *dev = qdisc_dev(sch); 258 struct net_device *dev = qdisc_dev(sch);
@@ -254,12 +260,14 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
254 struct tc_cbs_qopt *qopt; 260 struct tc_cbs_qopt *qopt;
255 int err; 261 int err;
256 262
257 err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL); 263 err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, extack);
258 if (err < 0) 264 if (err < 0)
259 return err; 265 return err;
260 266
261 if (!tb[TCA_CBS_PARMS]) 267 if (!tb[TCA_CBS_PARMS]) {
268 NL_SET_ERR_MSG(extack, "Missing CBS parameter which are mandatory");
262 return -EINVAL; 269 return -EINVAL;
270 }
263 271
264 qopt = nla_data(tb[TCA_CBS_PARMS]); 272 qopt = nla_data(tb[TCA_CBS_PARMS]);
265 273
@@ -276,7 +284,7 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
276 284
277 cbs_disable_offload(dev, q); 285 cbs_disable_offload(dev, q);
278 } else { 286 } else {
279 err = cbs_enable_offload(dev, q, qopt); 287 err = cbs_enable_offload(dev, q, qopt, extack);
280 if (err < 0) 288 if (err < 0)
281 return err; 289 return err;
282 } 290 }
@@ -291,13 +299,16 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
291 return 0; 299 return 0;
292} 300}
293 301
294static int cbs_init(struct Qdisc *sch, struct nlattr *opt) 302static int cbs_init(struct Qdisc *sch, struct nlattr *opt,
303 struct netlink_ext_ack *extack)
295{ 304{
296 struct cbs_sched_data *q = qdisc_priv(sch); 305 struct cbs_sched_data *q = qdisc_priv(sch);
297 struct net_device *dev = qdisc_dev(sch); 306 struct net_device *dev = qdisc_dev(sch);
298 307
299 if (!opt) 308 if (!opt) {
309 NL_SET_ERR_MSG(extack, "Missing CBS qdisc options which are mandatory");
300 return -EINVAL; 310 return -EINVAL;
311 }
301 312
302 q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0); 313 q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
303 314
@@ -306,7 +317,7 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt)
306 317
307 qdisc_watchdog_init(&q->watchdog, sch); 318 qdisc_watchdog_init(&q->watchdog, sch);
308 319
309 return cbs_change(sch, opt); 320 return cbs_change(sch, opt, extack);
310} 321}
311 322
312static void cbs_destroy(struct Qdisc *sch) 323static void cbs_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 531250fceb9e..eafc0d17d174 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -344,7 +344,8 @@ static void choke_free(void *addr)
344 kvfree(addr); 344 kvfree(addr);
345} 345}
346 346
347static int choke_change(struct Qdisc *sch, struct nlattr *opt) 347static int choke_change(struct Qdisc *sch, struct nlattr *opt,
348 struct netlink_ext_ack *extack)
348{ 349{
349 struct choke_sched_data *q = qdisc_priv(sch); 350 struct choke_sched_data *q = qdisc_priv(sch);
350 struct nlattr *tb[TCA_CHOKE_MAX + 1]; 351 struct nlattr *tb[TCA_CHOKE_MAX + 1];
@@ -431,9 +432,10 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
431 return 0; 432 return 0;
432} 433}
433 434
434static int choke_init(struct Qdisc *sch, struct nlattr *opt) 435static int choke_init(struct Qdisc *sch, struct nlattr *opt,
436 struct netlink_ext_ack *extack)
435{ 437{
436 return choke_change(sch, opt); 438 return choke_change(sch, opt, extack);
437} 439}
438 440
439static int choke_dump(struct Qdisc *sch, struct sk_buff *skb) 441static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index c518a1efcb9d..17cd81f84b5d 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -130,7 +130,8 @@ static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
130 [TCA_CODEL_CE_THRESHOLD]= { .type = NLA_U32 }, 130 [TCA_CODEL_CE_THRESHOLD]= { .type = NLA_U32 },
131}; 131};
132 132
133static int codel_change(struct Qdisc *sch, struct nlattr *opt) 133static int codel_change(struct Qdisc *sch, struct nlattr *opt,
134 struct netlink_ext_ack *extack)
134{ 135{
135 struct codel_sched_data *q = qdisc_priv(sch); 136 struct codel_sched_data *q = qdisc_priv(sch);
136 struct nlattr *tb[TCA_CODEL_MAX + 1]; 137 struct nlattr *tb[TCA_CODEL_MAX + 1];
@@ -184,7 +185,8 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
184 return 0; 185 return 0;
185} 186}
186 187
187static int codel_init(struct Qdisc *sch, struct nlattr *opt) 188static int codel_init(struct Qdisc *sch, struct nlattr *opt,
189 struct netlink_ext_ack *extack)
188{ 190{
189 struct codel_sched_data *q = qdisc_priv(sch); 191 struct codel_sched_data *q = qdisc_priv(sch);
190 192
@@ -196,7 +198,7 @@ static int codel_init(struct Qdisc *sch, struct nlattr *opt)
196 q->params.mtu = psched_mtu(qdisc_dev(sch)); 198 q->params.mtu = psched_mtu(qdisc_dev(sch));
197 199
198 if (opt) { 200 if (opt) {
199 int err = codel_change(sch, opt); 201 int err = codel_change(sch, opt, extack);
200 202
201 if (err) 203 if (err)
202 return err; 204 return err;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 5bbcef3dcd8c..e0b0cf8a9939 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -64,7 +64,8 @@ static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
64}; 64};
65 65
66static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 66static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
67 struct nlattr **tca, unsigned long *arg) 67 struct nlattr **tca, unsigned long *arg,
68 struct netlink_ext_ack *extack)
68{ 69{
69 struct drr_sched *q = qdisc_priv(sch); 70 struct drr_sched *q = qdisc_priv(sch);
70 struct drr_class *cl = (struct drr_class *)*arg; 71 struct drr_class *cl = (struct drr_class *)*arg;
@@ -73,17 +74,21 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
73 u32 quantum; 74 u32 quantum;
74 int err; 75 int err;
75 76
76 if (!opt) 77 if (!opt) {
78 NL_SET_ERR_MSG(extack, "DRR options are required for this operation");
77 return -EINVAL; 79 return -EINVAL;
80 }
78 81
79 err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy, NULL); 82 err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy, extack);
80 if (err < 0) 83 if (err < 0)
81 return err; 84 return err;
82 85
83 if (tb[TCA_DRR_QUANTUM]) { 86 if (tb[TCA_DRR_QUANTUM]) {
84 quantum = nla_get_u32(tb[TCA_DRR_QUANTUM]); 87 quantum = nla_get_u32(tb[TCA_DRR_QUANTUM]);
85 if (quantum == 0) 88 if (quantum == 0) {
89 NL_SET_ERR_MSG(extack, "Specified DRR quantum cannot be zero");
86 return -EINVAL; 90 return -EINVAL;
91 }
87 } else 92 } else
88 quantum = psched_mtu(qdisc_dev(sch)); 93 quantum = psched_mtu(qdisc_dev(sch));
89 94
@@ -94,8 +99,10 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
94 NULL, 99 NULL,
95 qdisc_root_sleeping_running(sch), 100 qdisc_root_sleeping_running(sch),
96 tca[TCA_RATE]); 101 tca[TCA_RATE]);
97 if (err) 102 if (err) {
103 NL_SET_ERR_MSG(extack, "Failed to replace estimator");
98 return err; 104 return err;
105 }
99 } 106 }
100 107
101 sch_tree_lock(sch); 108 sch_tree_lock(sch);
@@ -113,7 +120,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
113 cl->common.classid = classid; 120 cl->common.classid = classid;
114 cl->quantum = quantum; 121 cl->quantum = quantum;
115 cl->qdisc = qdisc_create_dflt(sch->dev_queue, 122 cl->qdisc = qdisc_create_dflt(sch->dev_queue,
116 &pfifo_qdisc_ops, classid); 123 &pfifo_qdisc_ops, classid,
124 NULL);
117 if (cl->qdisc == NULL) 125 if (cl->qdisc == NULL)
118 cl->qdisc = &noop_qdisc; 126 cl->qdisc = &noop_qdisc;
119 else 127 else
@@ -125,6 +133,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
125 qdisc_root_sleeping_running(sch), 133 qdisc_root_sleeping_running(sch),
126 tca[TCA_RATE]); 134 tca[TCA_RATE]);
127 if (err) { 135 if (err) {
136 NL_SET_ERR_MSG(extack, "Failed to replace estimator");
128 qdisc_destroy(cl->qdisc); 137 qdisc_destroy(cl->qdisc);
129 kfree(cl); 138 kfree(cl);
130 return err; 139 return err;
@@ -172,12 +181,15 @@ static unsigned long drr_search_class(struct Qdisc *sch, u32 classid)
172 return (unsigned long)drr_find_class(sch, classid); 181 return (unsigned long)drr_find_class(sch, classid);
173} 182}
174 183
175static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl) 184static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl,
185 struct netlink_ext_ack *extack)
176{ 186{
177 struct drr_sched *q = qdisc_priv(sch); 187 struct drr_sched *q = qdisc_priv(sch);
178 188
179 if (cl) 189 if (cl) {
190 NL_SET_ERR_MSG(extack, "DRR classid must be zero");
180 return NULL; 191 return NULL;
192 }
181 193
182 return q->block; 194 return q->block;
183} 195}
@@ -201,13 +213,14 @@ static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg)
201} 213}
202 214
203static int drr_graft_class(struct Qdisc *sch, unsigned long arg, 215static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
204 struct Qdisc *new, struct Qdisc **old) 216 struct Qdisc *new, struct Qdisc **old,
217 struct netlink_ext_ack *extack)
205{ 218{
206 struct drr_class *cl = (struct drr_class *)arg; 219 struct drr_class *cl = (struct drr_class *)arg;
207 220
208 if (new == NULL) { 221 if (new == NULL) {
209 new = qdisc_create_dflt(sch->dev_queue, 222 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
210 &pfifo_qdisc_ops, cl->common.classid); 223 cl->common.classid, NULL);
211 if (new == NULL) 224 if (new == NULL)
212 new = &noop_qdisc; 225 new = &noop_qdisc;
213 } 226 }
@@ -408,12 +421,13 @@ out:
408 return NULL; 421 return NULL;
409} 422}
410 423
411static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt) 424static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
425 struct netlink_ext_ack *extack)
412{ 426{
413 struct drr_sched *q = qdisc_priv(sch); 427 struct drr_sched *q = qdisc_priv(sch);
414 int err; 428 int err;
415 429
416 err = tcf_block_get(&q->block, &q->filter_list, sch); 430 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
417 if (err) 431 if (err)
418 return err; 432 return err;
419 err = qdisc_class_hash_init(&q->clhash); 433 err = qdisc_class_hash_init(&q->clhash);
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index fb4fb71c68cf..049714c57075 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -61,7 +61,8 @@ static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
61/* ------------------------- Class/flow operations ------------------------- */ 61/* ------------------------- Class/flow operations ------------------------- */
62 62
63static int dsmark_graft(struct Qdisc *sch, unsigned long arg, 63static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
64 struct Qdisc *new, struct Qdisc **old) 64 struct Qdisc *new, struct Qdisc **old,
65 struct netlink_ext_ack *extack)
65{ 66{
66 struct dsmark_qdisc_data *p = qdisc_priv(sch); 67 struct dsmark_qdisc_data *p = qdisc_priv(sch);
67 68
@@ -70,7 +71,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
70 71
71 if (new == NULL) { 72 if (new == NULL) {
72 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, 73 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
73 sch->handle); 74 sch->handle, NULL);
74 if (new == NULL) 75 if (new == NULL)
75 new = &noop_qdisc; 76 new = &noop_qdisc;
76 } 77 }
@@ -112,7 +113,8 @@ static const struct nla_policy dsmark_policy[TCA_DSMARK_MAX + 1] = {
112}; 113};
113 114
114static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, 115static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
115 struct nlattr **tca, unsigned long *arg) 116 struct nlattr **tca, unsigned long *arg,
117 struct netlink_ext_ack *extack)
116{ 118{
117 struct dsmark_qdisc_data *p = qdisc_priv(sch); 119 struct dsmark_qdisc_data *p = qdisc_priv(sch);
118 struct nlattr *opt = tca[TCA_OPTIONS]; 120 struct nlattr *opt = tca[TCA_OPTIONS];
@@ -184,7 +186,8 @@ ignore:
184 } 186 }
185} 187}
186 188
187static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl) 189static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl,
190 struct netlink_ext_ack *extack)
188{ 191{
189 struct dsmark_qdisc_data *p = qdisc_priv(sch); 192 struct dsmark_qdisc_data *p = qdisc_priv(sch);
190 193
@@ -330,7 +333,8 @@ static struct sk_buff *dsmark_peek(struct Qdisc *sch)
330 return p->q->ops->peek(p->q); 333 return p->q->ops->peek(p->q);
331} 334}
332 335
333static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) 336static int dsmark_init(struct Qdisc *sch, struct nlattr *opt,
337 struct netlink_ext_ack *extack)
334{ 338{
335 struct dsmark_qdisc_data *p = qdisc_priv(sch); 339 struct dsmark_qdisc_data *p = qdisc_priv(sch);
336 struct nlattr *tb[TCA_DSMARK_MAX + 1]; 340 struct nlattr *tb[TCA_DSMARK_MAX + 1];
@@ -344,7 +348,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
344 if (!opt) 348 if (!opt)
345 goto errout; 349 goto errout;
346 350
347 err = tcf_block_get(&p->block, &p->filter_list, sch); 351 err = tcf_block_get(&p->block, &p->filter_list, sch, extack);
348 if (err) 352 if (err)
349 return err; 353 return err;
350 354
@@ -377,7 +381,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
377 p->default_index = default_index; 381 p->default_index = default_index;
378 p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); 382 p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
379 383
380 p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle); 384 p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle,
385 NULL);
381 if (p->q == NULL) 386 if (p->q == NULL)
382 p->q = &noop_qdisc; 387 p->q = &noop_qdisc;
383 else 388 else
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 1e37247656f8..24893d3b5d22 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -55,7 +55,8 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
55 return NET_XMIT_CN; 55 return NET_XMIT_CN;
56} 56}
57 57
58static int fifo_init(struct Qdisc *sch, struct nlattr *opt) 58static int fifo_init(struct Qdisc *sch, struct nlattr *opt,
59 struct netlink_ext_ack *extack)
59{ 60{
60 bool bypass; 61 bool bypass;
61 bool is_bfifo = sch->ops == &bfifo_qdisc_ops; 62 bool is_bfifo = sch->ops == &bfifo_qdisc_ops;
@@ -157,7 +158,7 @@ int fifo_set_limit(struct Qdisc *q, unsigned int limit)
157 nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt)); 158 nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
158 ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit; 159 ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
159 160
160 ret = q->ops->change(q, nla); 161 ret = q->ops->change(q, nla, NULL);
161 kfree(nla); 162 kfree(nla);
162 } 163 }
163 return ret; 164 return ret;
@@ -165,12 +166,14 @@ int fifo_set_limit(struct Qdisc *q, unsigned int limit)
165EXPORT_SYMBOL(fifo_set_limit); 166EXPORT_SYMBOL(fifo_set_limit);
166 167
167struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, 168struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
168 unsigned int limit) 169 unsigned int limit,
170 struct netlink_ext_ack *extack)
169{ 171{
170 struct Qdisc *q; 172 struct Qdisc *q;
171 int err = -ENOMEM; 173 int err = -ENOMEM;
172 174
173 q = qdisc_create_dflt(sch->dev_queue, ops, TC_H_MAKE(sch->handle, 1)); 175 q = qdisc_create_dflt(sch->dev_queue, ops, TC_H_MAKE(sch->handle, 1),
176 extack);
174 if (q) { 177 if (q) {
175 err = fifo_set_limit(q, limit); 178 err = fifo_set_limit(q, limit);
176 if (err < 0) { 179 if (err < 0) {
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 263d16e3219e..a366e4c9413a 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -685,7 +685,8 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
685 [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, 685 [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
686}; 686};
687 687
688static int fq_change(struct Qdisc *sch, struct nlattr *opt) 688static int fq_change(struct Qdisc *sch, struct nlattr *opt,
689 struct netlink_ext_ack *extack)
689{ 690{
690 struct fq_sched_data *q = qdisc_priv(sch); 691 struct fq_sched_data *q = qdisc_priv(sch);
691 struct nlattr *tb[TCA_FQ_MAX + 1]; 692 struct nlattr *tb[TCA_FQ_MAX + 1];
@@ -788,7 +789,8 @@ static void fq_destroy(struct Qdisc *sch)
788 qdisc_watchdog_cancel(&q->watchdog); 789 qdisc_watchdog_cancel(&q->watchdog);
789} 790}
790 791
791static int fq_init(struct Qdisc *sch, struct nlattr *opt) 792static int fq_init(struct Qdisc *sch, struct nlattr *opt,
793 struct netlink_ext_ack *extack)
792{ 794{
793 struct fq_sched_data *q = qdisc_priv(sch); 795 struct fq_sched_data *q = qdisc_priv(sch);
794 int err; 796 int err;
@@ -811,7 +813,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
811 qdisc_watchdog_init(&q->watchdog, sch); 813 qdisc_watchdog_init(&q->watchdog, sch);
812 814
813 if (opt) 815 if (opt)
814 err = fq_change(sch, opt); 816 err = fq_change(sch, opt, extack);
815 else 817 else
816 err = fq_resize(sch, q->fq_trees_log); 818 err = fq_resize(sch, q->fq_trees_log);
817 819
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 0305d791ea94..22fa13cf5d8b 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -377,7 +377,8 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
377 [TCA_FQ_CODEL_MEMORY_LIMIT] = { .type = NLA_U32 }, 377 [TCA_FQ_CODEL_MEMORY_LIMIT] = { .type = NLA_U32 },
378}; 378};
379 379
380static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) 380static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
381 struct netlink_ext_ack *extack)
381{ 382{
382 struct fq_codel_sched_data *q = qdisc_priv(sch); 383 struct fq_codel_sched_data *q = qdisc_priv(sch);
383 struct nlattr *tb[TCA_FQ_CODEL_MAX + 1]; 384 struct nlattr *tb[TCA_FQ_CODEL_MAX + 1];
@@ -458,7 +459,8 @@ static void fq_codel_destroy(struct Qdisc *sch)
458 kvfree(q->flows); 459 kvfree(q->flows);
459} 460}
460 461
461static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) 462static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt,
463 struct netlink_ext_ack *extack)
462{ 464{
463 struct fq_codel_sched_data *q = qdisc_priv(sch); 465 struct fq_codel_sched_data *q = qdisc_priv(sch);
464 int i; 466 int i;
@@ -477,12 +479,12 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
477 q->cparams.mtu = psched_mtu(qdisc_dev(sch)); 479 q->cparams.mtu = psched_mtu(qdisc_dev(sch));
478 480
479 if (opt) { 481 if (opt) {
480 int err = fq_codel_change(sch, opt); 482 int err = fq_codel_change(sch, opt, extack);
481 if (err) 483 if (err)
482 return err; 484 return err;
483 } 485 }
484 486
485 err = tcf_block_get(&q->block, &q->filter_list, sch); 487 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
486 if (err) 488 if (err)
487 return err; 489 return err;
488 490
@@ -595,7 +597,8 @@ static void fq_codel_unbind(struct Qdisc *q, unsigned long cl)
595{ 597{
596} 598}
597 599
598static struct tcf_block *fq_codel_tcf_block(struct Qdisc *sch, unsigned long cl) 600static struct tcf_block *fq_codel_tcf_block(struct Qdisc *sch, unsigned long cl,
601 struct netlink_ext_ack *extack)
599{ 602{
600 struct fq_codel_sched_data *q = qdisc_priv(sch); 603 struct fq_codel_sched_data *q = qdisc_priv(sch);
601 604
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index cac003fddf3e..190570f21b20 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -26,11 +26,13 @@
26#include <linux/list.h> 26#include <linux/list.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/if_vlan.h> 28#include <linux/if_vlan.h>
29#include <linux/skb_array.h>
29#include <linux/if_macvlan.h> 30#include <linux/if_macvlan.h>
30#include <net/sch_generic.h> 31#include <net/sch_generic.h>
31#include <net/pkt_sched.h> 32#include <net/pkt_sched.h>
32#include <net/dst.h> 33#include <net/dst.h>
33#include <trace/events/qdisc.h> 34#include <trace/events/qdisc.h>
35#include <net/xfrm.h>
34 36
35/* Qdisc to use by default */ 37/* Qdisc to use by default */
36const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops; 38const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
@@ -47,17 +49,115 @@ EXPORT_SYMBOL(default_qdisc_ops);
47 * - updates to tree and tree walking are only done under the rtnl mutex. 49 * - updates to tree and tree walking are only done under the rtnl mutex.
48 */ 50 */
49 51
50static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) 52static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
53{
54 const struct netdev_queue *txq = q->dev_queue;
55 spinlock_t *lock = NULL;
56 struct sk_buff *skb;
57
58 if (q->flags & TCQ_F_NOLOCK) {
59 lock = qdisc_lock(q);
60 spin_lock(lock);
61 }
62
63 skb = skb_peek(&q->skb_bad_txq);
64 if (skb) {
65 /* check the reason of requeuing without tx lock first */
66 txq = skb_get_tx_queue(txq->dev, skb);
67 if (!netif_xmit_frozen_or_stopped(txq)) {
68 skb = __skb_dequeue(&q->skb_bad_txq);
69 if (qdisc_is_percpu_stats(q)) {
70 qdisc_qstats_cpu_backlog_dec(q, skb);
71 qdisc_qstats_cpu_qlen_dec(q);
72 } else {
73 qdisc_qstats_backlog_dec(q, skb);
74 q->q.qlen--;
75 }
76 } else {
77 skb = NULL;
78 }
79 }
80
81 if (lock)
82 spin_unlock(lock);
83
84 return skb;
85}
86
87static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q)
51{ 88{
52 q->gso_skb = skb; 89 struct sk_buff *skb = skb_peek(&q->skb_bad_txq);
53 q->qstats.requeues++; 90
54 qdisc_qstats_backlog_inc(q, skb); 91 if (unlikely(skb))
55 q->q.qlen++; /* it's still part of the queue */ 92 skb = __skb_dequeue_bad_txq(q);
93
94 return skb;
95}
96
97static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
98 struct sk_buff *skb)
99{
100 spinlock_t *lock = NULL;
101
102 if (q->flags & TCQ_F_NOLOCK) {
103 lock = qdisc_lock(q);
104 spin_lock(lock);
105 }
106
107 __skb_queue_tail(&q->skb_bad_txq, skb);
108
109 if (lock)
110 spin_unlock(lock);
111}
112
113static inline int __dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
114{
115 while (skb) {
116 struct sk_buff *next = skb->next;
117
118 __skb_queue_tail(&q->gso_skb, skb);
119 q->qstats.requeues++;
120 qdisc_qstats_backlog_inc(q, skb);
121 q->q.qlen++; /* it's still part of the queue */
122
123 skb = next;
124 }
125 __netif_schedule(q);
126
127 return 0;
128}
129
130static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q)
131{
132 spinlock_t *lock = qdisc_lock(q);
133
134 spin_lock(lock);
135 while (skb) {
136 struct sk_buff *next = skb->next;
137
138 __skb_queue_tail(&q->gso_skb, skb);
139
140 qdisc_qstats_cpu_requeues_inc(q);
141 qdisc_qstats_cpu_backlog_inc(q, skb);
142 qdisc_qstats_cpu_qlen_inc(q);
143
144 skb = next;
145 }
146 spin_unlock(lock);
147
56 __netif_schedule(q); 148 __netif_schedule(q);
57 149
58 return 0; 150 return 0;
59} 151}
60 152
153static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
154{
155 if (q->flags & TCQ_F_NOLOCK)
156 return dev_requeue_skb_locked(skb, q);
157 else
158 return __dev_requeue_skb(skb, q);
159}
160
61static void try_bulk_dequeue_skb(struct Qdisc *q, 161static void try_bulk_dequeue_skb(struct Qdisc *q,
62 struct sk_buff *skb, 162 struct sk_buff *skb,
63 const struct netdev_queue *txq, 163 const struct netdev_queue *txq,
@@ -95,9 +195,15 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
95 if (!nskb) 195 if (!nskb)
96 break; 196 break;
97 if (unlikely(skb_get_queue_mapping(nskb) != mapping)) { 197 if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
98 q->skb_bad_txq = nskb; 198 qdisc_enqueue_skb_bad_txq(q, nskb);
99 qdisc_qstats_backlog_inc(q, nskb); 199
100 q->q.qlen++; 200 if (qdisc_is_percpu_stats(q)) {
201 qdisc_qstats_cpu_backlog_inc(q, nskb);
202 qdisc_qstats_cpu_qlen_inc(q);
203 } else {
204 qdisc_qstats_backlog_inc(q, nskb);
205 q->q.qlen++;
206 }
101 break; 207 break;
102 } 208 }
103 skb->next = nskb; 209 skb->next = nskb;
@@ -113,40 +219,62 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
113static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, 219static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
114 int *packets) 220 int *packets)
115{ 221{
116 struct sk_buff *skb = q->gso_skb;
117 const struct netdev_queue *txq = q->dev_queue; 222 const struct netdev_queue *txq = q->dev_queue;
223 struct sk_buff *skb = NULL;
118 224
119 *packets = 1; 225 *packets = 1;
120 if (unlikely(skb)) { 226 if (unlikely(!skb_queue_empty(&q->gso_skb))) {
227 spinlock_t *lock = NULL;
228
229 if (q->flags & TCQ_F_NOLOCK) {
230 lock = qdisc_lock(q);
231 spin_lock(lock);
232 }
233
234 skb = skb_peek(&q->gso_skb);
235
236 /* skb may be null if another cpu pulls gso_skb off in between
237 * empty check and lock.
238 */
239 if (!skb) {
240 if (lock)
241 spin_unlock(lock);
242 goto validate;
243 }
244
121 /* skb in gso_skb were already validated */ 245 /* skb in gso_skb were already validated */
122 *validate = false; 246 *validate = false;
247 if (xfrm_offload(skb))
248 *validate = true;
123 /* check the reason of requeuing without tx lock first */ 249 /* check the reason of requeuing without tx lock first */
124 txq = skb_get_tx_queue(txq->dev, skb); 250 txq = skb_get_tx_queue(txq->dev, skb);
125 if (!netif_xmit_frozen_or_stopped(txq)) { 251 if (!netif_xmit_frozen_or_stopped(txq)) {
126 q->gso_skb = NULL; 252 skb = __skb_dequeue(&q->gso_skb);
127 qdisc_qstats_backlog_dec(q, skb); 253 if (qdisc_is_percpu_stats(q)) {
128 q->q.qlen--; 254 qdisc_qstats_cpu_backlog_dec(q, skb);
129 } else 255 qdisc_qstats_cpu_qlen_dec(q);
256 } else {
257 qdisc_qstats_backlog_dec(q, skb);
258 q->q.qlen--;
259 }
260 } else {
130 skb = NULL; 261 skb = NULL;
131 goto trace;
132 }
133 *validate = true;
134 skb = q->skb_bad_txq;
135 if (unlikely(skb)) {
136 /* check the reason of requeuing without tx lock first */
137 txq = skb_get_tx_queue(txq->dev, skb);
138 if (!netif_xmit_frozen_or_stopped(txq)) {
139 q->skb_bad_txq = NULL;
140 qdisc_qstats_backlog_dec(q, skb);
141 q->q.qlen--;
142 goto bulk;
143 } 262 }
144 skb = NULL; 263 if (lock)
264 spin_unlock(lock);
145 goto trace; 265 goto trace;
146 } 266 }
147 if (!(q->flags & TCQ_F_ONETXQUEUE) || 267validate:
148 !netif_xmit_frozen_or_stopped(txq)) 268 *validate = true;
149 skb = q->dequeue(q); 269
270 if ((q->flags & TCQ_F_ONETXQUEUE) &&
271 netif_xmit_frozen_or_stopped(txq))
272 return skb;
273
274 skb = qdisc_dequeue_skb_bad_txq(q);
275 if (unlikely(skb))
276 goto bulk;
277 skb = q->dequeue(q);
150 if (skb) { 278 if (skb) {
151bulk: 279bulk:
152 if (qdisc_may_bulk(q)) 280 if (qdisc_may_bulk(q))
@@ -165,21 +293,33 @@ trace:
165 * only one CPU can execute this function. 293 * only one CPU can execute this function.
166 * 294 *
167 * Returns to the caller: 295 * Returns to the caller:
168 * 0 - queue is empty or throttled. 296 * false - hardware queue frozen backoff
169 * >0 - queue is not empty. 297 * true - feel free to send more pkts
170 */ 298 */
171int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, 299bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
172 struct net_device *dev, struct netdev_queue *txq, 300 struct net_device *dev, struct netdev_queue *txq,
173 spinlock_t *root_lock, bool validate) 301 spinlock_t *root_lock, bool validate)
174{ 302{
175 int ret = NETDEV_TX_BUSY; 303 int ret = NETDEV_TX_BUSY;
304 bool again = false;
176 305
177 /* And release qdisc */ 306 /* And release qdisc */
178 spin_unlock(root_lock); 307 if (root_lock)
308 spin_unlock(root_lock);
179 309
180 /* Note that we validate skb (GSO, checksum, ...) outside of locks */ 310 /* Note that we validate skb (GSO, checksum, ...) outside of locks */
181 if (validate) 311 if (validate)
182 skb = validate_xmit_skb_list(skb, dev); 312 skb = validate_xmit_skb_list(skb, dev, &again);
313
314#ifdef CONFIG_XFRM_OFFLOAD
315 if (unlikely(again)) {
316 if (root_lock)
317 spin_lock(root_lock);
318
319 dev_requeue_skb(skb, q);
320 return false;
321 }
322#endif
183 323
184 if (likely(skb)) { 324 if (likely(skb)) {
185 HARD_TX_LOCK(dev, txq, smp_processor_id()); 325 HARD_TX_LOCK(dev, txq, smp_processor_id());
@@ -188,27 +328,28 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
188 328
189 HARD_TX_UNLOCK(dev, txq); 329 HARD_TX_UNLOCK(dev, txq);
190 } else { 330 } else {
191 spin_lock(root_lock); 331 if (root_lock)
192 return qdisc_qlen(q); 332 spin_lock(root_lock);
333 return true;
193 } 334 }
194 spin_lock(root_lock);
195 335
196 if (dev_xmit_complete(ret)) { 336 if (root_lock)
197 /* Driver sent out skb successfully or skb was consumed */ 337 spin_lock(root_lock);
198 ret = qdisc_qlen(q); 338
199 } else { 339 if (!dev_xmit_complete(ret)) {
200 /* Driver returned NETDEV_TX_BUSY - requeue skb */ 340 /* Driver returned NETDEV_TX_BUSY - requeue skb */
201 if (unlikely(ret != NETDEV_TX_BUSY)) 341 if (unlikely(ret != NETDEV_TX_BUSY))
202 net_warn_ratelimited("BUG %s code %d qlen %d\n", 342 net_warn_ratelimited("BUG %s code %d qlen %d\n",
203 dev->name, ret, q->q.qlen); 343 dev->name, ret, q->q.qlen);
204 344
205 ret = dev_requeue_skb(skb, q); 345 dev_requeue_skb(skb, q);
346 return false;
206 } 347 }
207 348
208 if (ret && netif_xmit_frozen_or_stopped(txq)) 349 if (ret && netif_xmit_frozen_or_stopped(txq))
209 ret = 0; 350 return false;
210 351
211 return ret; 352 return true;
212} 353}
213 354
214/* 355/*
@@ -230,20 +371,22 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
230 * >0 - queue is not empty. 371 * >0 - queue is not empty.
231 * 372 *
232 */ 373 */
233static inline int qdisc_restart(struct Qdisc *q, int *packets) 374static inline bool qdisc_restart(struct Qdisc *q, int *packets)
234{ 375{
376 spinlock_t *root_lock = NULL;
235 struct netdev_queue *txq; 377 struct netdev_queue *txq;
236 struct net_device *dev; 378 struct net_device *dev;
237 spinlock_t *root_lock;
238 struct sk_buff *skb; 379 struct sk_buff *skb;
239 bool validate; 380 bool validate;
240 381
241 /* Dequeue packet */ 382 /* Dequeue packet */
242 skb = dequeue_skb(q, &validate, packets); 383 skb = dequeue_skb(q, &validate, packets);
243 if (unlikely(!skb)) 384 if (unlikely(!skb))
244 return 0; 385 return false;
386
387 if (!(q->flags & TCQ_F_NOLOCK))
388 root_lock = qdisc_lock(q);
245 389
246 root_lock = qdisc_lock(q);
247 dev = qdisc_dev(q); 390 dev = qdisc_dev(q);
248 txq = skb_get_tx_queue(dev, skb); 391 txq = skb_get_tx_queue(dev, skb);
249 392
@@ -267,8 +410,6 @@ void __qdisc_run(struct Qdisc *q)
267 break; 410 break;
268 } 411 }
269 } 412 }
270
271 qdisc_run_end(q);
272} 413}
273 414
274unsigned long dev_trans_start(struct net_device *dev) 415unsigned long dev_trans_start(struct net_device *dev)
@@ -369,7 +510,7 @@ void netif_carrier_on(struct net_device *dev)
369 if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { 510 if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
370 if (dev->reg_state == NETREG_UNINITIALIZED) 511 if (dev->reg_state == NETREG_UNINITIALIZED)
371 return; 512 return;
372 atomic_inc(&dev->carrier_changes); 513 atomic_inc(&dev->carrier_up_count);
373 linkwatch_fire_event(dev); 514 linkwatch_fire_event(dev);
374 if (netif_running(dev)) 515 if (netif_running(dev))
375 __netdev_watchdog_up(dev); 516 __netdev_watchdog_up(dev);
@@ -388,7 +529,7 @@ void netif_carrier_off(struct net_device *dev)
388 if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) { 529 if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
389 if (dev->reg_state == NETREG_UNINITIALIZED) 530 if (dev->reg_state == NETREG_UNINITIALIZED)
390 return; 531 return;
391 atomic_inc(&dev->carrier_changes); 532 atomic_inc(&dev->carrier_down_count);
392 linkwatch_fire_event(dev); 533 linkwatch_fire_event(dev);
393 } 534 }
394} 535}
@@ -437,7 +578,8 @@ struct Qdisc noop_qdisc = {
437}; 578};
438EXPORT_SYMBOL(noop_qdisc); 579EXPORT_SYMBOL(noop_qdisc);
439 580
440static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt) 581static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt,
582 struct netlink_ext_ack *extack)
441{ 583{
442 /* register_qdisc() assigns a default of noop_enqueue if unset, 584 /* register_qdisc() assigns a default of noop_enqueue if unset,
443 * but __dev_queue_xmit() treats noqueue only as such 585 * but __dev_queue_xmit() treats noqueue only as such
@@ -468,93 +610,99 @@ static const u8 prio2band[TC_PRIO_MAX + 1] = {
468 610
469/* 611/*
470 * Private data for a pfifo_fast scheduler containing: 612 * Private data for a pfifo_fast scheduler containing:
471 * - queues for the three band 613 * - rings for priority bands
472 * - bitmap indicating which of the bands contain skbs
473 */ 614 */
474struct pfifo_fast_priv { 615struct pfifo_fast_priv {
475 u32 bitmap; 616 struct skb_array q[PFIFO_FAST_BANDS];
476 struct qdisc_skb_head q[PFIFO_FAST_BANDS];
477}; 617};
478 618
479/* 619static inline struct skb_array *band2list(struct pfifo_fast_priv *priv,
480 * Convert a bitmap to the first band number where an skb is queued, where: 620 int band)
481 * bitmap=0 means there are no skbs on any band.
482 * bitmap=1 means there is an skb on band 0.
483 * bitmap=7 means there are skbs on all 3 bands, etc.
484 */
485static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0};
486
487static inline struct qdisc_skb_head *band2list(struct pfifo_fast_priv *priv,
488 int band)
489{ 621{
490 return priv->q + band; 622 return &priv->q[band];
491} 623}
492 624
493static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, 625static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
494 struct sk_buff **to_free) 626 struct sk_buff **to_free)
495{ 627{
496 if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) { 628 int band = prio2band[skb->priority & TC_PRIO_MAX];
497 int band = prio2band[skb->priority & TC_PRIO_MAX]; 629 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
498 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 630 struct skb_array *q = band2list(priv, band);
499 struct qdisc_skb_head *list = band2list(priv, band); 631 int err;
500 632
501 priv->bitmap |= (1 << band); 633 err = skb_array_produce(q, skb);
502 qdisc->q.qlen++;
503 return __qdisc_enqueue_tail(skb, qdisc, list);
504 }
505 634
506 return qdisc_drop(skb, qdisc, to_free); 635 if (unlikely(err))
636 return qdisc_drop_cpu(skb, qdisc, to_free);
637
638 qdisc_qstats_cpu_qlen_inc(qdisc);
639 qdisc_qstats_cpu_backlog_inc(qdisc, skb);
640 return NET_XMIT_SUCCESS;
507} 641}
508 642
509static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) 643static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
510{ 644{
511 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 645 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
512 int band = bitmap2band[priv->bitmap]; 646 struct sk_buff *skb = NULL;
513 647 int band;
514 if (likely(band >= 0)) {
515 struct qdisc_skb_head *qh = band2list(priv, band);
516 struct sk_buff *skb = __qdisc_dequeue_head(qh);
517 648
518 if (likely(skb != NULL)) { 649 for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
519 qdisc_qstats_backlog_dec(qdisc, skb); 650 struct skb_array *q = band2list(priv, band);
520 qdisc_bstats_update(qdisc, skb);
521 }
522 651
523 qdisc->q.qlen--; 652 if (__skb_array_empty(q))
524 if (qh->qlen == 0) 653 continue;
525 priv->bitmap &= ~(1 << band);
526 654
527 return skb; 655 skb = skb_array_consume_bh(q);
656 }
657 if (likely(skb)) {
658 qdisc_qstats_cpu_backlog_dec(qdisc, skb);
659 qdisc_bstats_cpu_update(qdisc, skb);
660 qdisc_qstats_cpu_qlen_dec(qdisc);
528 } 661 }
529 662
530 return NULL; 663 return skb;
531} 664}
532 665
533static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc) 666static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
534{ 667{
535 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 668 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
536 int band = bitmap2band[priv->bitmap]; 669 struct sk_buff *skb = NULL;
670 int band;
537 671
538 if (band >= 0) { 672 for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
539 struct qdisc_skb_head *qh = band2list(priv, band); 673 struct skb_array *q = band2list(priv, band);
540 674
541 return qh->head; 675 skb = __skb_array_peek(q);
542 } 676 }
543 677
544 return NULL; 678 return skb;
545} 679}
546 680
547static void pfifo_fast_reset(struct Qdisc *qdisc) 681static void pfifo_fast_reset(struct Qdisc *qdisc)
548{ 682{
549 int prio; 683 int i, band;
550 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 684 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
551 685
552 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) 686 for (band = 0; band < PFIFO_FAST_BANDS; band++) {
553 __qdisc_reset_queue(band2list(priv, prio)); 687 struct skb_array *q = band2list(priv, band);
688 struct sk_buff *skb;
554 689
555 priv->bitmap = 0; 690 /* NULL ring is possible if destroy path is due to a failed
556 qdisc->qstats.backlog = 0; 691 * skb_array_init() in pfifo_fast_init() case.
557 qdisc->q.qlen = 0; 692 */
693 if (!q->ring.queue)
694 continue;
695
696 while ((skb = skb_array_consume_bh(q)) != NULL)
697 kfree_skb(skb);
698 }
699
700 for_each_possible_cpu(i) {
701 struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i);
702
703 q->backlog = 0;
704 q->qlen = 0;
705 }
558} 706}
559 707
560static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) 708static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
@@ -570,19 +718,68 @@ nla_put_failure:
570 return -1; 718 return -1;
571} 719}
572 720
573static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) 721static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt,
722 struct netlink_ext_ack *extack)
574{ 723{
575 int prio; 724 unsigned int qlen = qdisc_dev(qdisc)->tx_queue_len;
576 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 725 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
726 int prio;
727
728 /* guard against zero length rings */
729 if (!qlen)
730 return -EINVAL;
577 731
578 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) 732 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
579 qdisc_skb_head_init(band2list(priv, prio)); 733 struct skb_array *q = band2list(priv, prio);
734 int err;
735
736 err = skb_array_init(q, qlen, GFP_KERNEL);
737 if (err)
738 return -ENOMEM;
739 }
580 740
581 /* Can by-pass the queue discipline */ 741 /* Can by-pass the queue discipline */
582 qdisc->flags |= TCQ_F_CAN_BYPASS; 742 qdisc->flags |= TCQ_F_CAN_BYPASS;
583 return 0; 743 return 0;
584} 744}
585 745
746static void pfifo_fast_destroy(struct Qdisc *sch)
747{
748 struct pfifo_fast_priv *priv = qdisc_priv(sch);
749 int prio;
750
751 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
752 struct skb_array *q = band2list(priv, prio);
753
754 /* NULL ring is possible if destroy path is due to a failed
755 * skb_array_init() in pfifo_fast_init() case.
756 */
757 if (!q->ring.queue)
758 continue;
759 /* Destroy ring but no need to kfree_skb because a call to
760 * pfifo_fast_reset() has already done that work.
761 */
762 ptr_ring_cleanup(&q->ring, NULL);
763 }
764}
765
766static int pfifo_fast_change_tx_queue_len(struct Qdisc *sch,
767 unsigned int new_len)
768{
769 struct pfifo_fast_priv *priv = qdisc_priv(sch);
770 struct skb_array *bands[PFIFO_FAST_BANDS];
771 int prio;
772
773 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
774 struct skb_array *q = band2list(priv, prio);
775
776 bands[prio] = q;
777 }
778
779 return skb_array_resize_multiple(bands, PFIFO_FAST_BANDS, new_len,
780 GFP_KERNEL);
781}
782
586struct Qdisc_ops pfifo_fast_ops __read_mostly = { 783struct Qdisc_ops pfifo_fast_ops __read_mostly = {
587 .id = "pfifo_fast", 784 .id = "pfifo_fast",
588 .priv_size = sizeof(struct pfifo_fast_priv), 785 .priv_size = sizeof(struct pfifo_fast_priv),
@@ -590,9 +787,12 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
590 .dequeue = pfifo_fast_dequeue, 787 .dequeue = pfifo_fast_dequeue,
591 .peek = pfifo_fast_peek, 788 .peek = pfifo_fast_peek,
592 .init = pfifo_fast_init, 789 .init = pfifo_fast_init,
790 .destroy = pfifo_fast_destroy,
593 .reset = pfifo_fast_reset, 791 .reset = pfifo_fast_reset,
594 .dump = pfifo_fast_dump, 792 .dump = pfifo_fast_dump,
793 .change_tx_queue_len = pfifo_fast_change_tx_queue_len,
595 .owner = THIS_MODULE, 794 .owner = THIS_MODULE,
795 .static_flags = TCQ_F_NOLOCK | TCQ_F_CPUSTATS,
596}; 796};
597EXPORT_SYMBOL(pfifo_fast_ops); 797EXPORT_SYMBOL(pfifo_fast_ops);
598 798
@@ -600,7 +800,8 @@ static struct lock_class_key qdisc_tx_busylock;
600static struct lock_class_key qdisc_running_key; 800static struct lock_class_key qdisc_running_key;
601 801
602struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, 802struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
603 const struct Qdisc_ops *ops) 803 const struct Qdisc_ops *ops,
804 struct netlink_ext_ack *extack)
604{ 805{
605 void *p; 806 void *p;
606 struct Qdisc *sch; 807 struct Qdisc *sch;
@@ -609,6 +810,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
609 struct net_device *dev; 810 struct net_device *dev;
610 811
611 if (!dev_queue) { 812 if (!dev_queue) {
813 NL_SET_ERR_MSG(extack, "No device queue given");
612 err = -EINVAL; 814 err = -EINVAL;
613 goto errout; 815 goto errout;
614 } 816 }
@@ -630,6 +832,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
630 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); 832 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
631 sch->padded = (char *) sch - (char *) p; 833 sch->padded = (char *) sch - (char *) p;
632 } 834 }
835 __skb_queue_head_init(&sch->gso_skb);
836 __skb_queue_head_init(&sch->skb_bad_txq);
633 qdisc_skb_head_init(&sch->q); 837 qdisc_skb_head_init(&sch->q);
634 spin_lock_init(&sch->q.lock); 838 spin_lock_init(&sch->q.lock);
635 839
@@ -671,21 +875,24 @@ errout:
671 875
672struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, 876struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
673 const struct Qdisc_ops *ops, 877 const struct Qdisc_ops *ops,
674 unsigned int parentid) 878 unsigned int parentid,
879 struct netlink_ext_ack *extack)
675{ 880{
676 struct Qdisc *sch; 881 struct Qdisc *sch;
677 882
678 if (!try_module_get(ops->owner)) 883 if (!try_module_get(ops->owner)) {
884 NL_SET_ERR_MSG(extack, "Failed to increase module reference counter");
679 return NULL; 885 return NULL;
886 }
680 887
681 sch = qdisc_alloc(dev_queue, ops); 888 sch = qdisc_alloc(dev_queue, ops, extack);
682 if (IS_ERR(sch)) { 889 if (IS_ERR(sch)) {
683 module_put(ops->owner); 890 module_put(ops->owner);
684 return NULL; 891 return NULL;
685 } 892 }
686 sch->parent = parentid; 893 sch->parent = parentid;
687 894
688 if (!ops->init || ops->init(sch, NULL) == 0) 895 if (!ops->init || ops->init(sch, NULL, extack) == 0)
689 return sch; 896 return sch;
690 897
691 qdisc_destroy(sch); 898 qdisc_destroy(sch);
@@ -698,17 +905,21 @@ EXPORT_SYMBOL(qdisc_create_dflt);
698void qdisc_reset(struct Qdisc *qdisc) 905void qdisc_reset(struct Qdisc *qdisc)
699{ 906{
700 const struct Qdisc_ops *ops = qdisc->ops; 907 const struct Qdisc_ops *ops = qdisc->ops;
908 struct sk_buff *skb, *tmp;
701 909
702 if (ops->reset) 910 if (ops->reset)
703 ops->reset(qdisc); 911 ops->reset(qdisc);
704 912
705 kfree_skb(qdisc->skb_bad_txq); 913 skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
706 qdisc->skb_bad_txq = NULL; 914 __skb_unlink(skb, &qdisc->gso_skb);
915 kfree_skb_list(skb);
916 }
707 917
708 if (qdisc->gso_skb) { 918 skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
709 kfree_skb_list(qdisc->gso_skb); 919 __skb_unlink(skb, &qdisc->skb_bad_txq);
710 qdisc->gso_skb = NULL; 920 kfree_skb_list(skb);
711 } 921 }
922
712 qdisc->q.qlen = 0; 923 qdisc->q.qlen = 0;
713 qdisc->qstats.backlog = 0; 924 qdisc->qstats.backlog = 0;
714} 925}
@@ -727,6 +938,7 @@ void qdisc_free(struct Qdisc *qdisc)
727void qdisc_destroy(struct Qdisc *qdisc) 938void qdisc_destroy(struct Qdisc *qdisc)
728{ 939{
729 const struct Qdisc_ops *ops = qdisc->ops; 940 const struct Qdisc_ops *ops = qdisc->ops;
941 struct sk_buff *skb, *tmp;
730 942
731 if (qdisc->flags & TCQ_F_BUILTIN || 943 if (qdisc->flags & TCQ_F_BUILTIN ||
732 !refcount_dec_and_test(&qdisc->refcnt)) 944 !refcount_dec_and_test(&qdisc->refcnt))
@@ -746,8 +958,16 @@ void qdisc_destroy(struct Qdisc *qdisc)
746 module_put(ops->owner); 958 module_put(ops->owner);
747 dev_put(qdisc_dev(qdisc)); 959 dev_put(qdisc_dev(qdisc));
748 960
749 kfree_skb_list(qdisc->gso_skb); 961 skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
750 kfree_skb(qdisc->skb_bad_txq); 962 __skb_unlink(skb, &qdisc->gso_skb);
963 kfree_skb_list(skb);
964 }
965
966 skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
967 __skb_unlink(skb, &qdisc->skb_bad_txq);
968 kfree_skb_list(skb);
969 }
970
751 qdisc_free(qdisc); 971 qdisc_free(qdisc);
752} 972}
753EXPORT_SYMBOL(qdisc_destroy); 973EXPORT_SYMBOL(qdisc_destroy);
@@ -762,10 +982,6 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
762 root_lock = qdisc_lock(oqdisc); 982 root_lock = qdisc_lock(oqdisc);
763 spin_lock_bh(root_lock); 983 spin_lock_bh(root_lock);
764 984
765 /* Prune old scheduler */
766 if (oqdisc && refcount_read(&oqdisc->refcnt) <= 1)
767 qdisc_reset(oqdisc);
768
769 /* ... and graft new one */ 985 /* ... and graft new one */
770 if (qdisc == NULL) 986 if (qdisc == NULL)
771 qdisc = &noop_qdisc; 987 qdisc = &noop_qdisc;
@@ -788,7 +1004,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
788 if (dev->priv_flags & IFF_NO_QUEUE) 1004 if (dev->priv_flags & IFF_NO_QUEUE)
789 ops = &noqueue_qdisc_ops; 1005 ops = &noqueue_qdisc_ops;
790 1006
791 qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT); 1007 qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL);
792 if (!qdisc) { 1008 if (!qdisc) {
793 netdev_info(dev, "activation failed\n"); 1009 netdev_info(dev, "activation failed\n");
794 return; 1010 return;
@@ -811,7 +1027,7 @@ static void attach_default_qdiscs(struct net_device *dev)
811 dev->qdisc = txq->qdisc_sleeping; 1027 dev->qdisc = txq->qdisc_sleeping;
812 qdisc_refcount_inc(dev->qdisc); 1028 qdisc_refcount_inc(dev->qdisc);
813 } else { 1029 } else {
814 qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT); 1030 qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL);
815 if (qdisc) { 1031 if (qdisc) {
816 dev->qdisc = qdisc; 1032 dev->qdisc = qdisc;
817 qdisc->ops->attach(qdisc); 1033 qdisc->ops->attach(qdisc);
@@ -901,14 +1117,18 @@ static bool some_qdisc_is_busy(struct net_device *dev)
901 1117
902 dev_queue = netdev_get_tx_queue(dev, i); 1118 dev_queue = netdev_get_tx_queue(dev, i);
903 q = dev_queue->qdisc_sleeping; 1119 q = dev_queue->qdisc_sleeping;
904 root_lock = qdisc_lock(q);
905 1120
906 spin_lock_bh(root_lock); 1121 if (q->flags & TCQ_F_NOLOCK) {
1122 val = test_bit(__QDISC_STATE_SCHED, &q->state);
1123 } else {
1124 root_lock = qdisc_lock(q);
1125 spin_lock_bh(root_lock);
907 1126
908 val = (qdisc_is_running(q) || 1127 val = (qdisc_is_running(q) ||
909 test_bit(__QDISC_STATE_SCHED, &q->state)); 1128 test_bit(__QDISC_STATE_SCHED, &q->state));
910 1129
911 spin_unlock_bh(root_lock); 1130 spin_unlock_bh(root_lock);
1131 }
912 1132
913 if (val) 1133 if (val)
914 return true; 1134 return true;
@@ -916,6 +1136,16 @@ static bool some_qdisc_is_busy(struct net_device *dev)
916 return false; 1136 return false;
917} 1137}
918 1138
1139static void dev_qdisc_reset(struct net_device *dev,
1140 struct netdev_queue *dev_queue,
1141 void *none)
1142{
1143 struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
1144
1145 if (qdisc)
1146 qdisc_reset(qdisc);
1147}
1148
919/** 1149/**
920 * dev_deactivate_many - deactivate transmissions on several devices 1150 * dev_deactivate_many - deactivate transmissions on several devices
921 * @head: list of devices to deactivate 1151 * @head: list of devices to deactivate
@@ -926,7 +1156,6 @@ static bool some_qdisc_is_busy(struct net_device *dev)
926void dev_deactivate_many(struct list_head *head) 1156void dev_deactivate_many(struct list_head *head)
927{ 1157{
928 struct net_device *dev; 1158 struct net_device *dev;
929 bool sync_needed = false;
930 1159
931 list_for_each_entry(dev, head, close_list) { 1160 list_for_each_entry(dev, head, close_list) {
932 netdev_for_each_tx_queue(dev, dev_deactivate_queue, 1161 netdev_for_each_tx_queue(dev, dev_deactivate_queue,
@@ -936,20 +1165,25 @@ void dev_deactivate_many(struct list_head *head)
936 &noop_qdisc); 1165 &noop_qdisc);
937 1166
938 dev_watchdog_down(dev); 1167 dev_watchdog_down(dev);
939 sync_needed |= !dev->dismantle;
940 } 1168 }
941 1169
942 /* Wait for outstanding qdisc-less dev_queue_xmit calls. 1170 /* Wait for outstanding qdisc-less dev_queue_xmit calls.
943 * This is avoided if all devices are in dismantle phase : 1171 * This is avoided if all devices are in dismantle phase :
944 * Caller will call synchronize_net() for us 1172 * Caller will call synchronize_net() for us
945 */ 1173 */
946 if (sync_needed) 1174 synchronize_net();
947 synchronize_net();
948 1175
949 /* Wait for outstanding qdisc_run calls. */ 1176 /* Wait for outstanding qdisc_run calls. */
950 list_for_each_entry(dev, head, close_list) 1177 list_for_each_entry(dev, head, close_list) {
951 while (some_qdisc_is_busy(dev)) 1178 while (some_qdisc_is_busy(dev))
952 yield(); 1179 yield();
1180 /* The new qdisc is assigned at this point so we can safely
1181 * unwind stale skb lists and qdisc statistics
1182 */
1183 netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
1184 if (dev_ingress_queue(dev))
1185 dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
1186 }
953} 1187}
954 1188
955void dev_deactivate(struct net_device *dev) 1189void dev_deactivate(struct net_device *dev)
@@ -962,6 +1196,39 @@ void dev_deactivate(struct net_device *dev)
962} 1196}
963EXPORT_SYMBOL(dev_deactivate); 1197EXPORT_SYMBOL(dev_deactivate);
964 1198
1199static int qdisc_change_tx_queue_len(struct net_device *dev,
1200 struct netdev_queue *dev_queue)
1201{
1202 struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
1203 const struct Qdisc_ops *ops = qdisc->ops;
1204
1205 if (ops->change_tx_queue_len)
1206 return ops->change_tx_queue_len(qdisc, dev->tx_queue_len);
1207 return 0;
1208}
1209
1210int dev_qdisc_change_tx_queue_len(struct net_device *dev)
1211{
1212 bool up = dev->flags & IFF_UP;
1213 unsigned int i;
1214 int ret = 0;
1215
1216 if (up)
1217 dev_deactivate(dev);
1218
1219 for (i = 0; i < dev->num_tx_queues; i++) {
1220 ret = qdisc_change_tx_queue_len(dev, &dev->_tx[i]);
1221
1222 /* TODO: revert changes on a partial failure */
1223 if (ret)
1224 break;
1225 }
1226
1227 if (up)
1228 dev_activate(dev);
1229 return ret;
1230}
1231
965static void dev_init_scheduler_queue(struct net_device *dev, 1232static void dev_init_scheduler_queue(struct net_device *dev,
966 struct netdev_queue *dev_queue, 1233 struct netdev_queue *dev_queue,
967 void *_qdisc) 1234 void *_qdisc)
@@ -970,6 +1237,8 @@ static void dev_init_scheduler_queue(struct net_device *dev,
970 1237
971 rcu_assign_pointer(dev_queue->qdisc, qdisc); 1238 rcu_assign_pointer(dev_queue->qdisc, qdisc);
972 dev_queue->qdisc_sleeping = qdisc; 1239 dev_queue->qdisc_sleeping = qdisc;
1240 __skb_queue_head_init(&qdisc->gso_skb);
1241 __skb_queue_head_init(&qdisc->skb_bad_txq);
973} 1242}
974 1243
975void dev_init_scheduler(struct net_device *dev) 1244void dev_init_scheduler(struct net_device *dev)
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index bc30f9186ac6..cbe4831f46f4 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -306,12 +306,13 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
306 struct tc_gred_sopt *sopt; 306 struct tc_gred_sopt *sopt;
307 int i; 307 int i;
308 308
309 if (dps == NULL) 309 if (!dps)
310 return -EINVAL; 310 return -EINVAL;
311 311
312 sopt = nla_data(dps); 312 sopt = nla_data(dps);
313 313
314 if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs) 314 if (sopt->DPs > MAX_DPs || sopt->DPs == 0 ||
315 sopt->def_DP >= sopt->DPs)
315 return -EINVAL; 316 return -EINVAL;
316 317
317 sch_tree_lock(sch); 318 sch_tree_lock(sch);
@@ -391,7 +392,8 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
391 [TCA_GRED_LIMIT] = { .type = NLA_U32 }, 392 [TCA_GRED_LIMIT] = { .type = NLA_U32 },
392}; 393};
393 394
394static int gred_change(struct Qdisc *sch, struct nlattr *opt) 395static int gred_change(struct Qdisc *sch, struct nlattr *opt,
396 struct netlink_ext_ack *extack)
395{ 397{
396 struct gred_sched *table = qdisc_priv(sch); 398 struct gred_sched *table = qdisc_priv(sch);
397 struct tc_gred_qopt *ctl; 399 struct tc_gred_qopt *ctl;
@@ -465,12 +467,13 @@ errout:
465 return err; 467 return err;
466} 468}
467 469
468static int gred_init(struct Qdisc *sch, struct nlattr *opt) 470static int gred_init(struct Qdisc *sch, struct nlattr *opt,
471 struct netlink_ext_ack *extack)
469{ 472{
470 struct nlattr *tb[TCA_GRED_MAX + 1]; 473 struct nlattr *tb[TCA_GRED_MAX + 1];
471 int err; 474 int err;
472 475
473 if (opt == NULL) 476 if (!opt)
474 return -EINVAL; 477 return -EINVAL;
475 478
476 err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL); 479 err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index d04068a97d81..3ae9877ea205 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -921,7 +921,8 @@ static const struct nla_policy hfsc_policy[TCA_HFSC_MAX + 1] = {
921 921
922static int 922static int
923hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 923hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
924 struct nlattr **tca, unsigned long *arg) 924 struct nlattr **tca, unsigned long *arg,
925 struct netlink_ext_ack *extack)
925{ 926{
926 struct hfsc_sched *q = qdisc_priv(sch); 927 struct hfsc_sched *q = qdisc_priv(sch);
927 struct hfsc_class *cl = (struct hfsc_class *)*arg; 928 struct hfsc_class *cl = (struct hfsc_class *)*arg;
@@ -1033,7 +1034,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1033 if (cl == NULL) 1034 if (cl == NULL)
1034 return -ENOBUFS; 1035 return -ENOBUFS;
1035 1036
1036 err = tcf_block_get(&cl->block, &cl->filter_list, sch); 1037 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1037 if (err) { 1038 if (err) {
1038 kfree(cl); 1039 kfree(cl);
1039 return err; 1040 return err;
@@ -1061,8 +1062,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1061 cl->cl_common.classid = classid; 1062 cl->cl_common.classid = classid;
1062 cl->sched = q; 1063 cl->sched = q;
1063 cl->cl_parent = parent; 1064 cl->cl_parent = parent;
1064 cl->qdisc = qdisc_create_dflt(sch->dev_queue, 1065 cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1065 &pfifo_qdisc_ops, classid); 1066 classid, NULL);
1066 if (cl->qdisc == NULL) 1067 if (cl->qdisc == NULL)
1067 cl->qdisc = &noop_qdisc; 1068 cl->qdisc = &noop_qdisc;
1068 else 1069 else
@@ -1176,7 +1177,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
1176 1177
1177static int 1178static int
1178hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 1179hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1179 struct Qdisc **old) 1180 struct Qdisc **old, struct netlink_ext_ack *extack)
1180{ 1181{
1181 struct hfsc_class *cl = (struct hfsc_class *)arg; 1182 struct hfsc_class *cl = (struct hfsc_class *)arg;
1182 1183
@@ -1184,7 +1185,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1184 return -EINVAL; 1185 return -EINVAL;
1185 if (new == NULL) { 1186 if (new == NULL) {
1186 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, 1187 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1187 cl->cl_common.classid); 1188 cl->cl_common.classid, NULL);
1188 if (new == NULL) 1189 if (new == NULL)
1189 new = &noop_qdisc; 1190 new = &noop_qdisc;
1190 } 1191 }
@@ -1246,7 +1247,8 @@ hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
1246 cl->filter_cnt--; 1247 cl->filter_cnt--;
1247} 1248}
1248 1249
1249static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg) 1250static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg,
1251 struct netlink_ext_ack *extack)
1250{ 1252{
1251 struct hfsc_sched *q = qdisc_priv(sch); 1253 struct hfsc_sched *q = qdisc_priv(sch);
1252 struct hfsc_class *cl = (struct hfsc_class *)arg; 1254 struct hfsc_class *cl = (struct hfsc_class *)arg;
@@ -1388,7 +1390,8 @@ hfsc_schedule_watchdog(struct Qdisc *sch)
1388} 1390}
1389 1391
1390static int 1392static int
1391hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) 1393hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
1394 struct netlink_ext_ack *extack)
1392{ 1395{
1393 struct hfsc_sched *q = qdisc_priv(sch); 1396 struct hfsc_sched *q = qdisc_priv(sch);
1394 struct tc_hfsc_qopt *qopt; 1397 struct tc_hfsc_qopt *qopt;
@@ -1396,7 +1399,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1396 1399
1397 qdisc_watchdog_init(&q->watchdog, sch); 1400 qdisc_watchdog_init(&q->watchdog, sch);
1398 1401
1399 if (opt == NULL || nla_len(opt) < sizeof(*qopt)) 1402 if (!opt || nla_len(opt) < sizeof(*qopt))
1400 return -EINVAL; 1403 return -EINVAL;
1401 qopt = nla_data(opt); 1404 qopt = nla_data(opt);
1402 1405
@@ -1406,14 +1409,14 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1406 return err; 1409 return err;
1407 q->eligible = RB_ROOT; 1410 q->eligible = RB_ROOT;
1408 1411
1409 err = tcf_block_get(&q->root.block, &q->root.filter_list, sch); 1412 err = tcf_block_get(&q->root.block, &q->root.filter_list, sch, extack);
1410 if (err) 1413 if (err)
1411 return err; 1414 return err;
1412 1415
1413 q->root.cl_common.classid = sch->handle; 1416 q->root.cl_common.classid = sch->handle;
1414 q->root.sched = q; 1417 q->root.sched = q;
1415 q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, 1418 q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1416 sch->handle); 1419 sch->handle, NULL);
1417 if (q->root.qdisc == NULL) 1420 if (q->root.qdisc == NULL)
1418 q->root.qdisc = &noop_qdisc; 1421 q->root.qdisc = &noop_qdisc;
1419 else 1422 else
@@ -1429,7 +1432,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1429} 1432}
1430 1433
1431static int 1434static int
1432hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt) 1435hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt,
1436 struct netlink_ext_ack *extack)
1433{ 1437{
1434 struct hfsc_sched *q = qdisc_priv(sch); 1438 struct hfsc_sched *q = qdisc_priv(sch);
1435 struct tc_hfsc_qopt *qopt; 1439 struct tc_hfsc_qopt *qopt;
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 73a53c08091b..bce2632212d3 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -504,7 +504,8 @@ static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = {
504 [TCA_HHF_NON_HH_WEIGHT] = { .type = NLA_U32 }, 504 [TCA_HHF_NON_HH_WEIGHT] = { .type = NLA_U32 },
505}; 505};
506 506
507static int hhf_change(struct Qdisc *sch, struct nlattr *opt) 507static int hhf_change(struct Qdisc *sch, struct nlattr *opt,
508 struct netlink_ext_ack *extack)
508{ 509{
509 struct hhf_sched_data *q = qdisc_priv(sch); 510 struct hhf_sched_data *q = qdisc_priv(sch);
510 struct nlattr *tb[TCA_HHF_MAX + 1]; 511 struct nlattr *tb[TCA_HHF_MAX + 1];
@@ -571,7 +572,8 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
571 return 0; 572 return 0;
572} 573}
573 574
574static int hhf_init(struct Qdisc *sch, struct nlattr *opt) 575static int hhf_init(struct Qdisc *sch, struct nlattr *opt,
576 struct netlink_ext_ack *extack)
575{ 577{
576 struct hhf_sched_data *q = qdisc_priv(sch); 578 struct hhf_sched_data *q = qdisc_priv(sch);
577 int i; 579 int i;
@@ -589,7 +591,7 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
589 q->hhf_non_hh_weight = 2; 591 q->hhf_non_hh_weight = 2;
590 592
591 if (opt) { 593 if (opt) {
592 int err = hhf_change(sch, opt); 594 int err = hhf_change(sch, opt, extack);
593 595
594 if (err) 596 if (err)
595 return err; 597 return err;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index fa0380730ff0..1ea9846cc6ce 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1017,7 +1017,8 @@ static void htb_work_func(struct work_struct *work)
1017 rcu_read_unlock(); 1017 rcu_read_unlock();
1018} 1018}
1019 1019
1020static int htb_init(struct Qdisc *sch, struct nlattr *opt) 1020static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1021 struct netlink_ext_ack *extack)
1021{ 1022{
1022 struct htb_sched *q = qdisc_priv(sch); 1023 struct htb_sched *q = qdisc_priv(sch);
1023 struct nlattr *tb[TCA_HTB_MAX + 1]; 1024 struct nlattr *tb[TCA_HTB_MAX + 1];
@@ -1031,7 +1032,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1031 if (!opt) 1032 if (!opt)
1032 return -EINVAL; 1033 return -EINVAL;
1033 1034
1034 err = tcf_block_get(&q->block, &q->filter_list, sch); 1035 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1035 if (err) 1036 if (err)
1036 return err; 1037 return err;
1037 1038
@@ -1171,7 +1172,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1171} 1172}
1172 1173
1173static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 1174static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1174 struct Qdisc **old) 1175 struct Qdisc **old, struct netlink_ext_ack *extack)
1175{ 1176{
1176 struct htb_class *cl = (struct htb_class *)arg; 1177 struct htb_class *cl = (struct htb_class *)arg;
1177 1178
@@ -1179,7 +1180,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1179 return -EINVAL; 1180 return -EINVAL;
1180 if (new == NULL && 1181 if (new == NULL &&
1181 (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, 1182 (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1182 cl->common.classid)) == NULL) 1183 cl->common.classid, extack)) == NULL)
1183 return -ENOBUFS; 1184 return -ENOBUFS;
1184 1185
1185 *old = qdisc_replace(sch, new, &cl->un.leaf.q); 1186 *old = qdisc_replace(sch, new, &cl->un.leaf.q);
@@ -1289,7 +1290,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
1289 1290
1290 if (!cl->level && htb_parent_last_child(cl)) { 1291 if (!cl->level && htb_parent_last_child(cl)) {
1291 new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, 1292 new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1292 cl->parent->common.classid); 1293 cl->parent->common.classid,
1294 NULL);
1293 last_child = 1; 1295 last_child = 1;
1294 } 1296 }
1295 1297
@@ -1326,7 +1328,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
1326 1328
1327static int htb_change_class(struct Qdisc *sch, u32 classid, 1329static int htb_change_class(struct Qdisc *sch, u32 classid,
1328 u32 parentid, struct nlattr **tca, 1330 u32 parentid, struct nlattr **tca,
1329 unsigned long *arg) 1331 unsigned long *arg, struct netlink_ext_ack *extack)
1330{ 1332{
1331 int err = -EINVAL; 1333 int err = -EINVAL;
1332 struct htb_sched *q = qdisc_priv(sch); 1334 struct htb_sched *q = qdisc_priv(sch);
@@ -1356,10 +1358,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1356 1358
1357 /* Keeping backward compatible with rate_table based iproute2 tc */ 1359 /* Keeping backward compatible with rate_table based iproute2 tc */
1358 if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) 1360 if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1359 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB])); 1361 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1362 NULL));
1360 1363
1361 if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) 1364 if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1362 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB])); 1365 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1366 NULL));
1363 1367
1364 if (!cl) { /* new class */ 1368 if (!cl) { /* new class */
1365 struct Qdisc *new_q; 1369 struct Qdisc *new_q;
@@ -1394,7 +1398,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1394 if (!cl) 1398 if (!cl)
1395 goto failure; 1399 goto failure;
1396 1400
1397 err = tcf_block_get(&cl->block, &cl->filter_list, sch); 1401 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1398 if (err) { 1402 if (err) {
1399 kfree(cl); 1403 kfree(cl);
1400 goto failure; 1404 goto failure;
@@ -1423,8 +1427,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1423 * so that can't be used inside of sch_tree_lock 1427 * so that can't be used inside of sch_tree_lock
1424 * -- thanks to Karlis Peisenieks 1428 * -- thanks to Karlis Peisenieks
1425 */ 1429 */
1426 new_q = qdisc_create_dflt(sch->dev_queue, 1430 new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1427 &pfifo_qdisc_ops, classid); 1431 classid, NULL);
1428 sch_tree_lock(sch); 1432 sch_tree_lock(sch);
1429 if (parent && !parent->level) { 1433 if (parent && !parent->level) {
1430 unsigned int qlen = parent->un.leaf.q->q.qlen; 1434 unsigned int qlen = parent->un.leaf.q->q.qlen;
@@ -1524,7 +1528,8 @@ failure:
1524 return err; 1528 return err;
1525} 1529}
1526 1530
1527static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg) 1531static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
1532 struct netlink_ext_ack *extack)
1528{ 1533{
1529 struct htb_sched *q = qdisc_priv(sch); 1534 struct htb_sched *q = qdisc_priv(sch);
1530 struct htb_class *cl = (struct htb_class *)arg; 1535 struct htb_class *cl = (struct htb_class *)arg;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 003e1b063447..ce3f55259d0d 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -48,7 +48,8 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
48{ 48{
49} 49}
50 50
51static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl) 51static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl,
52 struct netlink_ext_ack *extack)
52{ 53{
53 struct ingress_sched_data *q = qdisc_priv(sch); 54 struct ingress_sched_data *q = qdisc_priv(sch);
54 55
@@ -60,9 +61,24 @@ static void clsact_chain_head_change(struct tcf_proto *tp_head, void *priv)
60 struct mini_Qdisc_pair *miniqp = priv; 61 struct mini_Qdisc_pair *miniqp = priv;
61 62
62 mini_qdisc_pair_swap(miniqp, tp_head); 63 mini_qdisc_pair_swap(miniqp, tp_head);
64};
65
66static void ingress_ingress_block_set(struct Qdisc *sch, u32 block_index)
67{
68 struct ingress_sched_data *q = qdisc_priv(sch);
69
70 q->block_info.block_index = block_index;
63} 71}
64 72
65static int ingress_init(struct Qdisc *sch, struct nlattr *opt) 73static u32 ingress_ingress_block_get(struct Qdisc *sch)
74{
75 struct ingress_sched_data *q = qdisc_priv(sch);
76
77 return q->block_info.block_index;
78}
79
80static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
81 struct netlink_ext_ack *extack)
66{ 82{
67 struct ingress_sched_data *q = qdisc_priv(sch); 83 struct ingress_sched_data *q = qdisc_priv(sch);
68 struct net_device *dev = qdisc_dev(sch); 84 struct net_device *dev = qdisc_dev(sch);
@@ -75,7 +91,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
75 q->block_info.chain_head_change = clsact_chain_head_change; 91 q->block_info.chain_head_change = clsact_chain_head_change;
76 q->block_info.chain_head_change_priv = &q->miniqp; 92 q->block_info.chain_head_change_priv = &q->miniqp;
77 93
78 return tcf_block_get_ext(&q->block, sch, &q->block_info); 94 return tcf_block_get_ext(&q->block, sch, &q->block_info, extack);
79} 95}
80 96
81static void ingress_destroy(struct Qdisc *sch) 97static void ingress_destroy(struct Qdisc *sch)
@@ -111,14 +127,16 @@ static const struct Qdisc_class_ops ingress_class_ops = {
111}; 127};
112 128
113static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { 129static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
114 .cl_ops = &ingress_class_ops, 130 .cl_ops = &ingress_class_ops,
115 .id = "ingress", 131 .id = "ingress",
116 .priv_size = sizeof(struct ingress_sched_data), 132 .priv_size = sizeof(struct ingress_sched_data),
117 .static_flags = TCQ_F_CPUSTATS, 133 .static_flags = TCQ_F_CPUSTATS,
118 .init = ingress_init, 134 .init = ingress_init,
119 .destroy = ingress_destroy, 135 .destroy = ingress_destroy,
120 .dump = ingress_dump, 136 .dump = ingress_dump,
121 .owner = THIS_MODULE, 137 .ingress_block_set = ingress_ingress_block_set,
138 .ingress_block_get = ingress_ingress_block_get,
139 .owner = THIS_MODULE,
122}; 140};
123 141
124struct clsact_sched_data { 142struct clsact_sched_data {
@@ -147,7 +165,8 @@ static unsigned long clsact_bind_filter(struct Qdisc *sch,
147 return clsact_find(sch, classid); 165 return clsact_find(sch, classid);
148} 166}
149 167
150static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl) 168static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl,
169 struct netlink_ext_ack *extack)
151{ 170{
152 struct clsact_sched_data *q = qdisc_priv(sch); 171 struct clsact_sched_data *q = qdisc_priv(sch);
153 172
@@ -161,7 +180,36 @@ static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl)
161 } 180 }
162} 181}
163 182
164static int clsact_init(struct Qdisc *sch, struct nlattr *opt) 183static void clsact_ingress_block_set(struct Qdisc *sch, u32 block_index)
184{
185 struct clsact_sched_data *q = qdisc_priv(sch);
186
187 q->ingress_block_info.block_index = block_index;
188}
189
190static void clsact_egress_block_set(struct Qdisc *sch, u32 block_index)
191{
192 struct clsact_sched_data *q = qdisc_priv(sch);
193
194 q->egress_block_info.block_index = block_index;
195}
196
197static u32 clsact_ingress_block_get(struct Qdisc *sch)
198{
199 struct clsact_sched_data *q = qdisc_priv(sch);
200
201 return q->ingress_block_info.block_index;
202}
203
204static u32 clsact_egress_block_get(struct Qdisc *sch)
205{
206 struct clsact_sched_data *q = qdisc_priv(sch);
207
208 return q->egress_block_info.block_index;
209}
210
211static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
212 struct netlink_ext_ack *extack)
165{ 213{
166 struct clsact_sched_data *q = qdisc_priv(sch); 214 struct clsact_sched_data *q = qdisc_priv(sch);
167 struct net_device *dev = qdisc_dev(sch); 215 struct net_device *dev = qdisc_dev(sch);
@@ -176,7 +224,8 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
176 q->ingress_block_info.chain_head_change = clsact_chain_head_change; 224 q->ingress_block_info.chain_head_change = clsact_chain_head_change;
177 q->ingress_block_info.chain_head_change_priv = &q->miniqp_ingress; 225 q->ingress_block_info.chain_head_change_priv = &q->miniqp_ingress;
178 226
179 err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info); 227 err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info,
228 extack);
180 if (err) 229 if (err)
181 return err; 230 return err;
182 231
@@ -186,7 +235,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
186 q->egress_block_info.chain_head_change = clsact_chain_head_change; 235 q->egress_block_info.chain_head_change = clsact_chain_head_change;
187 q->egress_block_info.chain_head_change_priv = &q->miniqp_egress; 236 q->egress_block_info.chain_head_change_priv = &q->miniqp_egress;
188 237
189 return tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info); 238 return tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info, extack);
190} 239}
191 240
192static void clsact_destroy(struct Qdisc *sch) 241static void clsact_destroy(struct Qdisc *sch)
@@ -210,14 +259,18 @@ static const struct Qdisc_class_ops clsact_class_ops = {
210}; 259};
211 260
212static struct Qdisc_ops clsact_qdisc_ops __read_mostly = { 261static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
213 .cl_ops = &clsact_class_ops, 262 .cl_ops = &clsact_class_ops,
214 .id = "clsact", 263 .id = "clsact",
215 .priv_size = sizeof(struct clsact_sched_data), 264 .priv_size = sizeof(struct clsact_sched_data),
216 .static_flags = TCQ_F_CPUSTATS, 265 .static_flags = TCQ_F_CPUSTATS,
217 .init = clsact_init, 266 .init = clsact_init,
218 .destroy = clsact_destroy, 267 .destroy = clsact_destroy,
219 .dump = ingress_dump, 268 .dump = ingress_dump,
220 .owner = THIS_MODULE, 269 .ingress_block_set = clsact_ingress_block_set,
270 .egress_block_set = clsact_egress_block_set,
271 .ingress_block_get = clsact_ingress_block_get,
272 .egress_block_get = clsact_egress_block_get,
273 .owner = THIS_MODULE,
221}; 274};
222 275
223static int __init ingress_module_init(void) 276static int __init ingress_module_init(void)
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 213b586a06a0..f062a18e9162 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -17,6 +17,7 @@
17#include <linux/skbuff.h> 17#include <linux/skbuff.h>
18#include <net/netlink.h> 18#include <net/netlink.h>
19#include <net/pkt_sched.h> 19#include <net/pkt_sched.h>
20#include <net/sch_generic.h>
20 21
21struct mq_sched { 22struct mq_sched {
22 struct Qdisc **qdiscs; 23 struct Qdisc **qdiscs;
@@ -35,7 +36,8 @@ static void mq_destroy(struct Qdisc *sch)
35 kfree(priv->qdiscs); 36 kfree(priv->qdiscs);
36} 37}
37 38
38static int mq_init(struct Qdisc *sch, struct nlattr *opt) 39static int mq_init(struct Qdisc *sch, struct nlattr *opt,
40 struct netlink_ext_ack *extack)
39{ 41{
40 struct net_device *dev = qdisc_dev(sch); 42 struct net_device *dev = qdisc_dev(sch);
41 struct mq_sched *priv = qdisc_priv(sch); 43 struct mq_sched *priv = qdisc_priv(sch);
@@ -59,7 +61,8 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
59 dev_queue = netdev_get_tx_queue(dev, ntx); 61 dev_queue = netdev_get_tx_queue(dev, ntx);
60 qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, ntx), 62 qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, ntx),
61 TC_H_MAKE(TC_H_MAJ(sch->handle), 63 TC_H_MAKE(TC_H_MAJ(sch->handle),
62 TC_H_MIN(ntx + 1))); 64 TC_H_MIN(ntx + 1)),
65 extack);
63 if (!qdisc) 66 if (!qdisc)
64 return -ENOMEM; 67 return -ENOMEM;
65 priv->qdiscs[ntx] = qdisc; 68 priv->qdiscs[ntx] = qdisc;
@@ -97,23 +100,42 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
97 struct net_device *dev = qdisc_dev(sch); 100 struct net_device *dev = qdisc_dev(sch);
98 struct Qdisc *qdisc; 101 struct Qdisc *qdisc;
99 unsigned int ntx; 102 unsigned int ntx;
103 __u32 qlen = 0;
100 104
101 sch->q.qlen = 0; 105 sch->q.qlen = 0;
102 memset(&sch->bstats, 0, sizeof(sch->bstats)); 106 memset(&sch->bstats, 0, sizeof(sch->bstats));
103 memset(&sch->qstats, 0, sizeof(sch->qstats)); 107 memset(&sch->qstats, 0, sizeof(sch->qstats));
104 108
109 /* MQ supports lockless qdiscs. However, statistics accounting needs
110 * to account for all, none, or a mix of locked and unlocked child
111 * qdiscs. Percpu stats are added to counters in-band and locking
112 * qdisc totals are added at end.
113 */
105 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { 114 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
106 qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; 115 qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
107 spin_lock_bh(qdisc_lock(qdisc)); 116 spin_lock_bh(qdisc_lock(qdisc));
108 sch->q.qlen += qdisc->q.qlen; 117
109 sch->bstats.bytes += qdisc->bstats.bytes; 118 if (qdisc_is_percpu_stats(qdisc)) {
110 sch->bstats.packets += qdisc->bstats.packets; 119 qlen = qdisc_qlen_sum(qdisc);
111 sch->qstats.backlog += qdisc->qstats.backlog; 120 __gnet_stats_copy_basic(NULL, &sch->bstats,
112 sch->qstats.drops += qdisc->qstats.drops; 121 qdisc->cpu_bstats,
113 sch->qstats.requeues += qdisc->qstats.requeues; 122 &qdisc->bstats);
114 sch->qstats.overlimits += qdisc->qstats.overlimits; 123 __gnet_stats_copy_queue(&sch->qstats,
124 qdisc->cpu_qstats,
125 &qdisc->qstats, qlen);
126 } else {
127 sch->q.qlen += qdisc->q.qlen;
128 sch->bstats.bytes += qdisc->bstats.bytes;
129 sch->bstats.packets += qdisc->bstats.packets;
130 sch->qstats.backlog += qdisc->qstats.backlog;
131 sch->qstats.drops += qdisc->qstats.drops;
132 sch->qstats.requeues += qdisc->qstats.requeues;
133 sch->qstats.overlimits += qdisc->qstats.overlimits;
134 }
135
115 spin_unlock_bh(qdisc_lock(qdisc)); 136 spin_unlock_bh(qdisc_lock(qdisc));
116 } 137 }
138
117 return 0; 139 return 0;
118} 140}
119 141
@@ -134,7 +156,7 @@ static struct netdev_queue *mq_select_queue(struct Qdisc *sch,
134} 156}
135 157
136static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, 158static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
137 struct Qdisc **old) 159 struct Qdisc **old, struct netlink_ext_ack *extack)
138{ 160{
139 struct netdev_queue *dev_queue = mq_queue_get(sch, cl); 161 struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
140 struct net_device *dev = qdisc_dev(sch); 162 struct net_device *dev = qdisc_dev(sch);
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index b85885a9d8a1..0e9d761cdd80 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -132,7 +132,8 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
132 return 0; 132 return 0;
133} 133}
134 134
135static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) 135static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
136 struct netlink_ext_ack *extack)
136{ 137{
137 struct net_device *dev = qdisc_dev(sch); 138 struct net_device *dev = qdisc_dev(sch);
138 struct mqprio_sched *priv = qdisc_priv(sch); 139 struct mqprio_sched *priv = qdisc_priv(sch);
@@ -229,7 +230,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
229 qdisc = qdisc_create_dflt(dev_queue, 230 qdisc = qdisc_create_dflt(dev_queue,
230 get_default_qdisc_ops(dev, i), 231 get_default_qdisc_ops(dev, i),
231 TC_H_MAKE(TC_H_MAJ(sch->handle), 232 TC_H_MAKE(TC_H_MAJ(sch->handle),
232 TC_H_MIN(i + 1))); 233 TC_H_MIN(i + 1)), extack);
233 if (!qdisc) 234 if (!qdisc)
234 return -ENOMEM; 235 return -ENOMEM;
235 236
@@ -319,7 +320,7 @@ static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
319} 320}
320 321
321static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, 322static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
322 struct Qdisc **old) 323 struct Qdisc **old, struct netlink_ext_ack *extack)
323{ 324{
324 struct net_device *dev = qdisc_dev(sch); 325 struct net_device *dev = qdisc_dev(sch);
325 struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); 326 struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
@@ -388,22 +389,40 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
388 struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb); 389 struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb);
389 struct tc_mqprio_qopt opt = { 0 }; 390 struct tc_mqprio_qopt opt = { 0 };
390 struct Qdisc *qdisc; 391 struct Qdisc *qdisc;
391 unsigned int i; 392 unsigned int ntx, tc;
392 393
393 sch->q.qlen = 0; 394 sch->q.qlen = 0;
394 memset(&sch->bstats, 0, sizeof(sch->bstats)); 395 memset(&sch->bstats, 0, sizeof(sch->bstats));
395 memset(&sch->qstats, 0, sizeof(sch->qstats)); 396 memset(&sch->qstats, 0, sizeof(sch->qstats));
396 397
397 for (i = 0; i < dev->num_tx_queues; i++) { 398 /* MQ supports lockless qdiscs. However, statistics accounting needs
398 qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc); 399 * to account for all, none, or a mix of locked and unlocked child
400 * qdiscs. Percpu stats are added to counters in-band and locking
401 * qdisc totals are added at end.
402 */
403 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
404 qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
399 spin_lock_bh(qdisc_lock(qdisc)); 405 spin_lock_bh(qdisc_lock(qdisc));
400 sch->q.qlen += qdisc->q.qlen; 406
401 sch->bstats.bytes += qdisc->bstats.bytes; 407 if (qdisc_is_percpu_stats(qdisc)) {
402 sch->bstats.packets += qdisc->bstats.packets; 408 __u32 qlen = qdisc_qlen_sum(qdisc);
403 sch->qstats.backlog += qdisc->qstats.backlog; 409
404 sch->qstats.drops += qdisc->qstats.drops; 410 __gnet_stats_copy_basic(NULL, &sch->bstats,
405 sch->qstats.requeues += qdisc->qstats.requeues; 411 qdisc->cpu_bstats,
406 sch->qstats.overlimits += qdisc->qstats.overlimits; 412 &qdisc->bstats);
413 __gnet_stats_copy_queue(&sch->qstats,
414 qdisc->cpu_qstats,
415 &qdisc->qstats, qlen);
416 } else {
417 sch->q.qlen += qdisc->q.qlen;
418 sch->bstats.bytes += qdisc->bstats.bytes;
419 sch->bstats.packets += qdisc->bstats.packets;
420 sch->qstats.backlog += qdisc->qstats.backlog;
421 sch->qstats.drops += qdisc->qstats.drops;
422 sch->qstats.requeues += qdisc->qstats.requeues;
423 sch->qstats.overlimits += qdisc->qstats.overlimits;
424 }
425
407 spin_unlock_bh(qdisc_lock(qdisc)); 426 spin_unlock_bh(qdisc_lock(qdisc));
408 } 427 }
409 428
@@ -411,9 +430,9 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
411 memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); 430 memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
412 opt.hw = priv->hw_offload; 431 opt.hw = priv->hw_offload;
413 432
414 for (i = 0; i < netdev_get_num_tc(dev); i++) { 433 for (tc = 0; tc < netdev_get_num_tc(dev); tc++) {
415 opt.count[i] = dev->tc_to_txq[i].count; 434 opt.count[tc] = dev->tc_to_txq[tc].count;
416 opt.offset[i] = dev->tc_to_txq[i].offset; 435 opt.offset[tc] = dev->tc_to_txq[tc].offset;
417 } 436 }
418 437
419 if (nla_put(skb, TCA_OPTIONS, NLA_ALIGN(sizeof(opt)), &opt)) 438 if (nla_put(skb, TCA_OPTIONS, NLA_ALIGN(sizeof(opt)), &opt))
@@ -495,7 +514,6 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
495 if (cl >= TC_H_MIN_PRIORITY) { 514 if (cl >= TC_H_MIN_PRIORITY) {
496 int i; 515 int i;
497 __u32 qlen = 0; 516 __u32 qlen = 0;
498 struct Qdisc *qdisc;
499 struct gnet_stats_queue qstats = {0}; 517 struct gnet_stats_queue qstats = {0};
500 struct gnet_stats_basic_packed bstats = {0}; 518 struct gnet_stats_basic_packed bstats = {0};
501 struct net_device *dev = qdisc_dev(sch); 519 struct net_device *dev = qdisc_dev(sch);
@@ -511,18 +529,26 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
511 529
512 for (i = tc.offset; i < tc.offset + tc.count; i++) { 530 for (i = tc.offset; i < tc.offset + tc.count; i++) {
513 struct netdev_queue *q = netdev_get_tx_queue(dev, i); 531 struct netdev_queue *q = netdev_get_tx_queue(dev, i);
532 struct Qdisc *qdisc = rtnl_dereference(q->qdisc);
533 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
534 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
514 535
515 qdisc = rtnl_dereference(q->qdisc);
516 spin_lock_bh(qdisc_lock(qdisc)); 536 spin_lock_bh(qdisc_lock(qdisc));
517 qlen += qdisc->q.qlen; 537 if (qdisc_is_percpu_stats(qdisc)) {
518 bstats.bytes += qdisc->bstats.bytes; 538 cpu_bstats = qdisc->cpu_bstats;
519 bstats.packets += qdisc->bstats.packets; 539 cpu_qstats = qdisc->cpu_qstats;
520 qstats.backlog += qdisc->qstats.backlog; 540 }
521 qstats.drops += qdisc->qstats.drops; 541
522 qstats.requeues += qdisc->qstats.requeues; 542 qlen = qdisc_qlen_sum(qdisc);
523 qstats.overlimits += qdisc->qstats.overlimits; 543 __gnet_stats_copy_basic(NULL, &sch->bstats,
544 cpu_bstats, &qdisc->bstats);
545 __gnet_stats_copy_queue(&sch->qstats,
546 cpu_qstats,
547 &qdisc->qstats,
548 qlen);
524 spin_unlock_bh(qdisc_lock(qdisc)); 549 spin_unlock_bh(qdisc_lock(qdisc));
525 } 550 }
551
526 /* Reclaim root sleeping lock before completing stats */ 552 /* Reclaim root sleeping lock before completing stats */
527 if (d->lock) 553 if (d->lock)
528 spin_lock_bh(d->lock); 554 spin_lock_bh(d->lock);
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 012216386c0b..1da7ea8de0ad 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -180,7 +180,8 @@ multiq_destroy(struct Qdisc *sch)
180 kfree(q->queues); 180 kfree(q->queues);
181} 181}
182 182
183static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) 183static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
184 struct netlink_ext_ack *extack)
184{ 185{
185 struct multiq_sched_data *q = qdisc_priv(sch); 186 struct multiq_sched_data *q = qdisc_priv(sch);
186 struct tc_multiq_qopt *qopt; 187 struct tc_multiq_qopt *qopt;
@@ -215,7 +216,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
215 child = qdisc_create_dflt(sch->dev_queue, 216 child = qdisc_create_dflt(sch->dev_queue,
216 &pfifo_qdisc_ops, 217 &pfifo_qdisc_ops,
217 TC_H_MAKE(sch->handle, 218 TC_H_MAKE(sch->handle,
218 i + 1)); 219 i + 1), extack);
219 if (child) { 220 if (child) {
220 sch_tree_lock(sch); 221 sch_tree_lock(sch);
221 old = q->queues[i]; 222 old = q->queues[i];
@@ -236,17 +237,18 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
236 return 0; 237 return 0;
237} 238}
238 239
239static int multiq_init(struct Qdisc *sch, struct nlattr *opt) 240static int multiq_init(struct Qdisc *sch, struct nlattr *opt,
241 struct netlink_ext_ack *extack)
240{ 242{
241 struct multiq_sched_data *q = qdisc_priv(sch); 243 struct multiq_sched_data *q = qdisc_priv(sch);
242 int i, err; 244 int i, err;
243 245
244 q->queues = NULL; 246 q->queues = NULL;
245 247
246 if (opt == NULL) 248 if (!opt)
247 return -EINVAL; 249 return -EINVAL;
248 250
249 err = tcf_block_get(&q->block, &q->filter_list, sch); 251 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
250 if (err) 252 if (err)
251 return err; 253 return err;
252 254
@@ -258,7 +260,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
258 for (i = 0; i < q->max_bands; i++) 260 for (i = 0; i < q->max_bands; i++)
259 q->queues[i] = &noop_qdisc; 261 q->queues[i] = &noop_qdisc;
260 262
261 return multiq_tune(sch, opt); 263 return multiq_tune(sch, opt, extack);
262} 264}
263 265
264static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb) 266static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -281,7 +283,7 @@ nla_put_failure:
281} 283}
282 284
283static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 285static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
284 struct Qdisc **old) 286 struct Qdisc **old, struct netlink_ext_ack *extack)
285{ 287{
286 struct multiq_sched_data *q = qdisc_priv(sch); 288 struct multiq_sched_data *q = qdisc_priv(sch);
287 unsigned long band = arg - 1; 289 unsigned long band = arg - 1;
@@ -369,7 +371,8 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
369 } 371 }
370} 372}
371 373
372static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl) 374static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl,
375 struct netlink_ext_ack *extack)
373{ 376{
374 struct multiq_sched_data *q = qdisc_priv(sch); 377 struct multiq_sched_data *q = qdisc_priv(sch);
375 378
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index dd70924cbcdf..7c179addebcd 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -327,7 +327,7 @@ static s64 tabledist(s64 mu, s32 sigma,
327 327
328 /* default uniform distribution */ 328 /* default uniform distribution */
329 if (dist == NULL) 329 if (dist == NULL)
330 return (rnd % (2 * sigma)) - sigma + mu; 330 return ((rnd % (2 * sigma)) + mu) - sigma;
331 331
332 t = dist->table[rnd % dist->size]; 332 t = dist->table[rnd % dist->size];
333 x = (sigma % NETEM_DIST_SCALE) * t; 333 x = (sigma % NETEM_DIST_SCALE) * t;
@@ -893,7 +893,8 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
893} 893}
894 894
895/* Parse netlink message to set options */ 895/* Parse netlink message to set options */
896static int netem_change(struct Qdisc *sch, struct nlattr *opt) 896static int netem_change(struct Qdisc *sch, struct nlattr *opt,
897 struct netlink_ext_ack *extack)
897{ 898{
898 struct netem_sched_data *q = qdisc_priv(sch); 899 struct netem_sched_data *q = qdisc_priv(sch);
899 struct nlattr *tb[TCA_NETEM_MAX + 1]; 900 struct nlattr *tb[TCA_NETEM_MAX + 1];
@@ -984,7 +985,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
984 return ret; 985 return ret;
985} 986}
986 987
987static int netem_init(struct Qdisc *sch, struct nlattr *opt) 988static int netem_init(struct Qdisc *sch, struct nlattr *opt,
989 struct netlink_ext_ack *extack)
988{ 990{
989 struct netem_sched_data *q = qdisc_priv(sch); 991 struct netem_sched_data *q = qdisc_priv(sch);
990 int ret; 992 int ret;
@@ -995,7 +997,7 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
995 return -EINVAL; 997 return -EINVAL;
996 998
997 q->loss_model = CLG_RANDOM; 999 q->loss_model = CLG_RANDOM;
998 ret = netem_change(sch, opt); 1000 ret = netem_change(sch, opt, extack);
999 if (ret) 1001 if (ret)
1000 pr_info("netem: change failed\n"); 1002 pr_info("netem: change failed\n");
1001 return ret; 1003 return ret;
@@ -1157,7 +1159,7 @@ static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
1157} 1159}
1158 1160
1159static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 1161static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1160 struct Qdisc **old) 1162 struct Qdisc **old, struct netlink_ext_ack *extack)
1161{ 1163{
1162 struct netem_sched_data *q = qdisc_priv(sch); 1164 struct netem_sched_data *q = qdisc_priv(sch);
1163 1165
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 776c694c77c7..18d30bb86881 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -181,7 +181,8 @@ static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = {
181 [TCA_PIE_BYTEMODE] = {.type = NLA_U32}, 181 [TCA_PIE_BYTEMODE] = {.type = NLA_U32},
182}; 182};
183 183
184static int pie_change(struct Qdisc *sch, struct nlattr *opt) 184static int pie_change(struct Qdisc *sch, struct nlattr *opt,
185 struct netlink_ext_ack *extack)
185{ 186{
186 struct pie_sched_data *q = qdisc_priv(sch); 187 struct pie_sched_data *q = qdisc_priv(sch);
187 struct nlattr *tb[TCA_PIE_MAX + 1]; 188 struct nlattr *tb[TCA_PIE_MAX + 1];
@@ -439,7 +440,8 @@ static void pie_timer(struct timer_list *t)
439 440
440} 441}
441 442
442static int pie_init(struct Qdisc *sch, struct nlattr *opt) 443static int pie_init(struct Qdisc *sch, struct nlattr *opt,
444 struct netlink_ext_ack *extack)
443{ 445{
444 struct pie_sched_data *q = qdisc_priv(sch); 446 struct pie_sched_data *q = qdisc_priv(sch);
445 447
@@ -451,7 +453,7 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt)
451 timer_setup(&q->adapt_timer, pie_timer, 0); 453 timer_setup(&q->adapt_timer, pie_timer, 0);
452 454
453 if (opt) { 455 if (opt) {
454 int err = pie_change(sch, opt); 456 int err = pie_change(sch, opt, extack);
455 457
456 if (err) 458 if (err)
457 return err; 459 return err;
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index 1c6cbab3e7b9..5619d2eb17b6 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -123,7 +123,8 @@ static struct sk_buff *plug_dequeue(struct Qdisc *sch)
123 return qdisc_dequeue_head(sch); 123 return qdisc_dequeue_head(sch);
124} 124}
125 125
126static int plug_init(struct Qdisc *sch, struct nlattr *opt) 126static int plug_init(struct Qdisc *sch, struct nlattr *opt,
127 struct netlink_ext_ack *extack)
127{ 128{
128 struct plug_sched_data *q = qdisc_priv(sch); 129 struct plug_sched_data *q = qdisc_priv(sch);
129 130
@@ -158,7 +159,8 @@ static int plug_init(struct Qdisc *sch, struct nlattr *opt)
158 * command is received (just act as a pass-thru queue). 159 * command is received (just act as a pass-thru queue).
159 * TCQ_PLUG_LIMIT: Increase/decrease queue size 160 * TCQ_PLUG_LIMIT: Increase/decrease queue size
160 */ 161 */
161static int plug_change(struct Qdisc *sch, struct nlattr *opt) 162static int plug_change(struct Qdisc *sch, struct nlattr *opt,
163 struct netlink_ext_ack *extack)
162{ 164{
163 struct plug_sched_data *q = qdisc_priv(sch); 165 struct plug_sched_data *q = qdisc_priv(sch);
164 struct tc_plug_qopt *msg; 166 struct tc_plug_qopt *msg;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 2c79559a0d31..efbf51f35778 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -142,6 +142,31 @@ prio_reset(struct Qdisc *sch)
142 sch->q.qlen = 0; 142 sch->q.qlen = 0;
143} 143}
144 144
145static int prio_offload(struct Qdisc *sch, bool enable)
146{
147 struct prio_sched_data *q = qdisc_priv(sch);
148 struct net_device *dev = qdisc_dev(sch);
149 struct tc_prio_qopt_offload opt = {
150 .handle = sch->handle,
151 .parent = sch->parent,
152 };
153
154 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
155 return -EOPNOTSUPP;
156
157 if (enable) {
158 opt.command = TC_PRIO_REPLACE;
159 opt.replace_params.bands = q->bands;
160 memcpy(&opt.replace_params.priomap, q->prio2band,
161 TC_PRIO_MAX + 1);
162 opt.replace_params.qstats = &sch->qstats;
163 } else {
164 opt.command = TC_PRIO_DESTROY;
165 }
166
167 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, &opt);
168}
169
145static void 170static void
146prio_destroy(struct Qdisc *sch) 171prio_destroy(struct Qdisc *sch)
147{ 172{
@@ -149,11 +174,13 @@ prio_destroy(struct Qdisc *sch)
149 struct prio_sched_data *q = qdisc_priv(sch); 174 struct prio_sched_data *q = qdisc_priv(sch);
150 175
151 tcf_block_put(q->block); 176 tcf_block_put(q->block);
177 prio_offload(sch, false);
152 for (prio = 0; prio < q->bands; prio++) 178 for (prio = 0; prio < q->bands; prio++)
153 qdisc_destroy(q->queues[prio]); 179 qdisc_destroy(q->queues[prio]);
154} 180}
155 181
156static int prio_tune(struct Qdisc *sch, struct nlattr *opt) 182static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
183 struct netlink_ext_ack *extack)
157{ 184{
158 struct prio_sched_data *q = qdisc_priv(sch); 185 struct prio_sched_data *q = qdisc_priv(sch);
159 struct Qdisc *queues[TCQ_PRIO_BANDS]; 186 struct Qdisc *queues[TCQ_PRIO_BANDS];
@@ -175,7 +202,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
175 /* Before commit, make sure we can allocate all new qdiscs */ 202 /* Before commit, make sure we can allocate all new qdiscs */
176 for (i = oldbands; i < qopt->bands; i++) { 203 for (i = oldbands; i < qopt->bands; i++) {
177 queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, 204 queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
178 TC_H_MAKE(sch->handle, i + 1)); 205 TC_H_MAKE(sch->handle, i + 1),
206 extack);
179 if (!queues[i]) { 207 if (!queues[i]) {
180 while (i > oldbands) 208 while (i > oldbands)
181 qdisc_destroy(queues[--i]); 209 qdisc_destroy(queues[--i]);
@@ -202,10 +230,12 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
202 } 230 }
203 231
204 sch_tree_unlock(sch); 232 sch_tree_unlock(sch);
233 prio_offload(sch, true);
205 return 0; 234 return 0;
206} 235}
207 236
208static int prio_init(struct Qdisc *sch, struct nlattr *opt) 237static int prio_init(struct Qdisc *sch, struct nlattr *opt,
238 struct netlink_ext_ack *extack)
209{ 239{
210 struct prio_sched_data *q = qdisc_priv(sch); 240 struct prio_sched_data *q = qdisc_priv(sch);
211 int err; 241 int err;
@@ -213,11 +243,42 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
213 if (!opt) 243 if (!opt)
214 return -EINVAL; 244 return -EINVAL;
215 245
216 err = tcf_block_get(&q->block, &q->filter_list, sch); 246 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
217 if (err) 247 if (err)
218 return err; 248 return err;
219 249
220 return prio_tune(sch, opt); 250 return prio_tune(sch, opt, extack);
251}
252
253static int prio_dump_offload(struct Qdisc *sch)
254{
255 struct net_device *dev = qdisc_dev(sch);
256 struct tc_prio_qopt_offload hw_stats = {
257 .command = TC_PRIO_STATS,
258 .handle = sch->handle,
259 .parent = sch->parent,
260 {
261 .stats = {
262 .bstats = &sch->bstats,
263 .qstats = &sch->qstats,
264 },
265 },
266 };
267 int err;
268
269 sch->flags &= ~TCQ_F_OFFLOADED;
270 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
271 return 0;
272
273 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
274 &hw_stats);
275 if (err == -EOPNOTSUPP)
276 return 0;
277
278 if (!err)
279 sch->flags |= TCQ_F_OFFLOADED;
280
281 return err;
221} 282}
222 283
223static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) 284static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -225,10 +286,15 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
225 struct prio_sched_data *q = qdisc_priv(sch); 286 struct prio_sched_data *q = qdisc_priv(sch);
226 unsigned char *b = skb_tail_pointer(skb); 287 unsigned char *b = skb_tail_pointer(skb);
227 struct tc_prio_qopt opt; 288 struct tc_prio_qopt opt;
289 int err;
228 290
229 opt.bands = q->bands; 291 opt.bands = q->bands;
230 memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1); 292 memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1);
231 293
294 err = prio_dump_offload(sch);
295 if (err)
296 goto nla_put_failure;
297
232 if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) 298 if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
233 goto nla_put_failure; 299 goto nla_put_failure;
234 300
@@ -240,7 +306,7 @@ nla_put_failure:
240} 306}
241 307
242static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 308static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
243 struct Qdisc **old) 309 struct Qdisc **old, struct netlink_ext_ack *extack)
244{ 310{
245 struct prio_sched_data *q = qdisc_priv(sch); 311 struct prio_sched_data *q = qdisc_priv(sch);
246 unsigned long band = arg - 1; 312 unsigned long band = arg - 1;
@@ -327,7 +393,8 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
327 } 393 }
328} 394}
329 395
330static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl) 396static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl,
397 struct netlink_ext_ack *extack)
331{ 398{
332 struct prio_sched_data *q = qdisc_priv(sch); 399 struct prio_sched_data *q = qdisc_priv(sch);
333 400
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 6962b37a3ad3..bb1a9c11fc54 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -402,7 +402,8 @@ static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
402} 402}
403 403
404static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 404static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
405 struct nlattr **tca, unsigned long *arg) 405 struct nlattr **tca, unsigned long *arg,
406 struct netlink_ext_ack *extack)
406{ 407{
407 struct qfq_sched *q = qdisc_priv(sch); 408 struct qfq_sched *q = qdisc_priv(sch);
408 struct qfq_class *cl = (struct qfq_class *)*arg; 409 struct qfq_class *cl = (struct qfq_class *)*arg;
@@ -479,8 +480,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
479 cl->common.classid = classid; 480 cl->common.classid = classid;
480 cl->deficit = lmax; 481 cl->deficit = lmax;
481 482
482 cl->qdisc = qdisc_create_dflt(sch->dev_queue, 483 cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
483 &pfifo_qdisc_ops, classid); 484 classid, NULL);
484 if (cl->qdisc == NULL) 485 if (cl->qdisc == NULL)
485 cl->qdisc = &noop_qdisc; 486 cl->qdisc = &noop_qdisc;
486 487
@@ -564,7 +565,8 @@ static unsigned long qfq_search_class(struct Qdisc *sch, u32 classid)
564 return (unsigned long)qfq_find_class(sch, classid); 565 return (unsigned long)qfq_find_class(sch, classid);
565} 566}
566 567
567static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl) 568static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl,
569 struct netlink_ext_ack *extack)
568{ 570{
569 struct qfq_sched *q = qdisc_priv(sch); 571 struct qfq_sched *q = qdisc_priv(sch);
570 572
@@ -593,13 +595,14 @@ static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
593} 595}
594 596
595static int qfq_graft_class(struct Qdisc *sch, unsigned long arg, 597static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
596 struct Qdisc *new, struct Qdisc **old) 598 struct Qdisc *new, struct Qdisc **old,
599 struct netlink_ext_ack *extack)
597{ 600{
598 struct qfq_class *cl = (struct qfq_class *)arg; 601 struct qfq_class *cl = (struct qfq_class *)arg;
599 602
600 if (new == NULL) { 603 if (new == NULL) {
601 new = qdisc_create_dflt(sch->dev_queue, 604 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
602 &pfifo_qdisc_ops, cl->common.classid); 605 cl->common.classid, NULL);
603 if (new == NULL) 606 if (new == NULL)
604 new = &noop_qdisc; 607 new = &noop_qdisc;
605 } 608 }
@@ -1413,14 +1416,15 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1413 qfq_deactivate_class(q, cl); 1416 qfq_deactivate_class(q, cl);
1414} 1417}
1415 1418
1416static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt) 1419static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
1420 struct netlink_ext_ack *extack)
1417{ 1421{
1418 struct qfq_sched *q = qdisc_priv(sch); 1422 struct qfq_sched *q = qdisc_priv(sch);
1419 struct qfq_group *grp; 1423 struct qfq_group *grp;
1420 int i, j, err; 1424 int i, j, err;
1421 u32 max_cl_shift, maxbudg_shift, max_classes; 1425 u32 max_cl_shift, maxbudg_shift, max_classes;
1422 1426
1423 err = tcf_block_get(&q->block, &q->filter_list, sch); 1427 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1424 if (err) 1428 if (err)
1425 return err; 1429 return err;
1426 1430
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index f0747eb87dc4..16644b3d2362 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -157,7 +157,6 @@ static int red_offload(struct Qdisc *sch, bool enable)
157 .handle = sch->handle, 157 .handle = sch->handle,
158 .parent = sch->parent, 158 .parent = sch->parent,
159 }; 159 };
160 int err;
161 160
162 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 161 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
163 return -EOPNOTSUPP; 162 return -EOPNOTSUPP;
@@ -168,18 +167,12 @@ static int red_offload(struct Qdisc *sch, bool enable)
168 opt.set.max = q->parms.qth_max >> q->parms.Wlog; 167 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
169 opt.set.probability = q->parms.max_P; 168 opt.set.probability = q->parms.max_P;
170 opt.set.is_ecn = red_use_ecn(q); 169 opt.set.is_ecn = red_use_ecn(q);
170 opt.set.qstats = &sch->qstats;
171 } else { 171 } else {
172 opt.command = TC_RED_DESTROY; 172 opt.command = TC_RED_DESTROY;
173 } 173 }
174 174
175 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); 175 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
176
177 if (!err && enable)
178 sch->flags |= TCQ_F_OFFLOADED;
179 else
180 sch->flags &= ~TCQ_F_OFFLOADED;
181
182 return err;
183} 176}
184 177
185static void red_destroy(struct Qdisc *sch) 178static void red_destroy(struct Qdisc *sch)
@@ -197,7 +190,8 @@ static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
197 [TCA_RED_MAX_P] = { .type = NLA_U32 }, 190 [TCA_RED_MAX_P] = { .type = NLA_U32 },
198}; 191};
199 192
200static int red_change(struct Qdisc *sch, struct nlattr *opt) 193static int red_change(struct Qdisc *sch, struct nlattr *opt,
194 struct netlink_ext_ack *extack)
201{ 195{
202 struct red_sched_data *q = qdisc_priv(sch); 196 struct red_sched_data *q = qdisc_priv(sch);
203 struct nlattr *tb[TCA_RED_MAX + 1]; 197 struct nlattr *tb[TCA_RED_MAX + 1];
@@ -224,7 +218,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
224 return -EINVAL; 218 return -EINVAL;
225 219
226 if (ctl->limit > 0) { 220 if (ctl->limit > 0) {
227 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit); 221 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
222 extack);
228 if (IS_ERR(child)) 223 if (IS_ERR(child))
229 return PTR_ERR(child); 224 return PTR_ERR(child);
230 } 225 }
@@ -272,14 +267,15 @@ static inline void red_adaptative_timer(struct timer_list *t)
272 spin_unlock(root_lock); 267 spin_unlock(root_lock);
273} 268}
274 269
275static int red_init(struct Qdisc *sch, struct nlattr *opt) 270static int red_init(struct Qdisc *sch, struct nlattr *opt,
271 struct netlink_ext_ack *extack)
276{ 272{
277 struct red_sched_data *q = qdisc_priv(sch); 273 struct red_sched_data *q = qdisc_priv(sch);
278 274
279 q->qdisc = &noop_qdisc; 275 q->qdisc = &noop_qdisc;
280 q->sch = sch; 276 q->sch = sch;
281 timer_setup(&q->adapt_timer, red_adaptative_timer, 0); 277 timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
282 return red_change(sch, opt); 278 return red_change(sch, opt, extack);
283} 279}
284 280
285static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt) 281static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
@@ -294,12 +290,22 @@ static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
294 .stats.qstats = &sch->qstats, 290 .stats.qstats = &sch->qstats,
295 }, 291 },
296 }; 292 };
293 int err;
297 294
298 if (!(sch->flags & TCQ_F_OFFLOADED)) 295 sch->flags &= ~TCQ_F_OFFLOADED;
296
297 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
299 return 0; 298 return 0;
300 299
301 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, 300 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
302 &hw_stats); 301 &hw_stats);
302 if (err == -EOPNOTSUPP)
303 return 0;
304
305 if (!err)
306 sch->flags |= TCQ_F_OFFLOADED;
307
308 return err;
303} 309}
304 310
305static int red_dump(struct Qdisc *sch, struct sk_buff *skb) 311static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -317,7 +323,6 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
317 }; 323 };
318 int err; 324 int err;
319 325
320 sch->qstats.backlog = q->qdisc->qstats.backlog;
321 err = red_dump_offload_stats(sch, &opt); 326 err = red_dump_offload_stats(sch, &opt);
322 if (err) 327 if (err)
323 goto nla_put_failure; 328 goto nla_put_failure;
@@ -339,32 +344,24 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
339{ 344{
340 struct red_sched_data *q = qdisc_priv(sch); 345 struct red_sched_data *q = qdisc_priv(sch);
341 struct net_device *dev = qdisc_dev(sch); 346 struct net_device *dev = qdisc_dev(sch);
342 struct tc_red_xstats st = { 347 struct tc_red_xstats st = {0};
343 .early = q->stats.prob_drop + q->stats.forced_drop,
344 .pdrop = q->stats.pdrop,
345 .other = q->stats.other,
346 .marked = q->stats.prob_mark + q->stats.forced_mark,
347 };
348 348
349 if (sch->flags & TCQ_F_OFFLOADED) { 349 if (sch->flags & TCQ_F_OFFLOADED) {
350 struct red_stats hw_stats = {0};
351 struct tc_red_qopt_offload hw_stats_request = { 350 struct tc_red_qopt_offload hw_stats_request = {
352 .command = TC_RED_XSTATS, 351 .command = TC_RED_XSTATS,
353 .handle = sch->handle, 352 .handle = sch->handle,
354 .parent = sch->parent, 353 .parent = sch->parent,
355 { 354 {
356 .xstats = &hw_stats, 355 .xstats = &q->stats,
357 }, 356 },
358 }; 357 };
359 if (!dev->netdev_ops->ndo_setup_tc(dev, 358 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
360 TC_SETUP_QDISC_RED, 359 &hw_stats_request);
361 &hw_stats_request)) {
362 st.early += hw_stats.prob_drop + hw_stats.forced_drop;
363 st.pdrop += hw_stats.pdrop;
364 st.other += hw_stats.other;
365 st.marked += hw_stats.prob_mark + hw_stats.forced_mark;
366 }
367 } 360 }
361 st.early = q->stats.prob_drop + q->stats.forced_drop;
362 st.pdrop = q->stats.pdrop;
363 st.other = q->stats.other;
364 st.marked = q->stats.prob_mark + q->stats.forced_mark;
368 365
369 return gnet_stats_copy_app(d, &st, sizeof(st)); 366 return gnet_stats_copy_app(d, &st, sizeof(st));
370} 367}
@@ -380,7 +377,7 @@ static int red_dump_class(struct Qdisc *sch, unsigned long cl,
380} 377}
381 378
382static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 379static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
383 struct Qdisc **old) 380 struct Qdisc **old, struct netlink_ext_ack *extack)
384{ 381{
385 struct red_sched_data *q = qdisc_priv(sch); 382 struct red_sched_data *q = qdisc_priv(sch);
386 383
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 0678debdd856..7cbdad8419b7 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -488,7 +488,8 @@ static const struct tc_sfb_qopt sfb_default_ops = {
488 .penalty_burst = 20, 488 .penalty_burst = 20,
489}; 489};
490 490
491static int sfb_change(struct Qdisc *sch, struct nlattr *opt) 491static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
492 struct netlink_ext_ack *extack)
492{ 493{
493 struct sfb_sched_data *q = qdisc_priv(sch); 494 struct sfb_sched_data *q = qdisc_priv(sch);
494 struct Qdisc *child; 495 struct Qdisc *child;
@@ -512,7 +513,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
512 if (limit == 0) 513 if (limit == 0)
513 limit = qdisc_dev(sch)->tx_queue_len; 514 limit = qdisc_dev(sch)->tx_queue_len;
514 515
515 child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit); 516 child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit, extack);
516 if (IS_ERR(child)) 517 if (IS_ERR(child))
517 return PTR_ERR(child); 518 return PTR_ERR(child);
518 519
@@ -549,17 +550,18 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
549 return 0; 550 return 0;
550} 551}
551 552
552static int sfb_init(struct Qdisc *sch, struct nlattr *opt) 553static int sfb_init(struct Qdisc *sch, struct nlattr *opt,
554 struct netlink_ext_ack *extack)
553{ 555{
554 struct sfb_sched_data *q = qdisc_priv(sch); 556 struct sfb_sched_data *q = qdisc_priv(sch);
555 int err; 557 int err;
556 558
557 err = tcf_block_get(&q->block, &q->filter_list, sch); 559 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
558 if (err) 560 if (err)
559 return err; 561 return err;
560 562
561 q->qdisc = &noop_qdisc; 563 q->qdisc = &noop_qdisc;
562 return sfb_change(sch, opt); 564 return sfb_change(sch, opt, extack);
563} 565}
564 566
565static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb) 567static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -615,7 +617,7 @@ static int sfb_dump_class(struct Qdisc *sch, unsigned long cl,
615} 617}
616 618
617static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 619static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
618 struct Qdisc **old) 620 struct Qdisc **old, struct netlink_ext_ack *extack)
619{ 621{
620 struct sfb_sched_data *q = qdisc_priv(sch); 622 struct sfb_sched_data *q = qdisc_priv(sch);
621 623
@@ -643,7 +645,8 @@ static void sfb_unbind(struct Qdisc *sch, unsigned long arg)
643} 645}
644 646
645static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 647static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
646 struct nlattr **tca, unsigned long *arg) 648 struct nlattr **tca, unsigned long *arg,
649 struct netlink_ext_ack *extack)
647{ 650{
648 return -ENOSYS; 651 return -ENOSYS;
649} 652}
@@ -665,7 +668,8 @@ static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker)
665 } 668 }
666} 669}
667 670
668static struct tcf_block *sfb_tcf_block(struct Qdisc *sch, unsigned long cl) 671static struct tcf_block *sfb_tcf_block(struct Qdisc *sch, unsigned long cl,
672 struct netlink_ext_ack *extack)
669{ 673{
670 struct sfb_sched_data *q = qdisc_priv(sch); 674 struct sfb_sched_data *q = qdisc_priv(sch);
671 675
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 930e5bd26d3d..2f2678197760 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -721,7 +721,8 @@ static void sfq_destroy(struct Qdisc *sch)
721 kfree(q->red_parms); 721 kfree(q->red_parms);
722} 722}
723 723
724static int sfq_init(struct Qdisc *sch, struct nlattr *opt) 724static int sfq_init(struct Qdisc *sch, struct nlattr *opt,
725 struct netlink_ext_ack *extack)
725{ 726{
726 struct sfq_sched_data *q = qdisc_priv(sch); 727 struct sfq_sched_data *q = qdisc_priv(sch);
727 int i; 728 int i;
@@ -730,7 +731,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
730 q->sch = sch; 731 q->sch = sch;
731 timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE); 732 timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE);
732 733
733 err = tcf_block_get(&q->block, &q->filter_list, sch); 734 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
734 if (err) 735 if (err)
735 return err; 736 return err;
736 737
@@ -836,7 +837,8 @@ static void sfq_unbind(struct Qdisc *q, unsigned long cl)
836{ 837{
837} 838}
838 839
839static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl) 840static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl,
841 struct netlink_ext_ack *extack)
840{ 842{
841 struct sfq_sched_data *q = qdisc_priv(sch); 843 struct sfq_sched_data *q = qdisc_priv(sch);
842 844
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 120f4f365967..229172d509cc 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -142,16 +142,6 @@ static u64 psched_ns_t2l(const struct psched_ratecfg *r,
142 return len; 142 return len;
143} 143}
144 144
145/*
146 * Return length of individual segments of a gso packet,
147 * including all headers (MAC, IP, TCP/UDP)
148 */
149static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
150{
151 unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
152 return hdr_len + skb_gso_transport_seglen(skb);
153}
154
155/* GSO packet is too big, segment it so that tbf can transmit 145/* GSO packet is too big, segment it so that tbf can transmit
156 * each segment in time 146 * each segment in time
157 */ 147 */
@@ -302,7 +292,8 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
302 [TCA_TBF_PBURST] = { .type = NLA_U32 }, 292 [TCA_TBF_PBURST] = { .type = NLA_U32 },
303}; 293};
304 294
305static int tbf_change(struct Qdisc *sch, struct nlattr *opt) 295static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
296 struct netlink_ext_ack *extack)
306{ 297{
307 int err; 298 int err;
308 struct tbf_sched_data *q = qdisc_priv(sch); 299 struct tbf_sched_data *q = qdisc_priv(sch);
@@ -326,11 +317,13 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
326 qopt = nla_data(tb[TCA_TBF_PARMS]); 317 qopt = nla_data(tb[TCA_TBF_PARMS]);
327 if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE) 318 if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
328 qdisc_put_rtab(qdisc_get_rtab(&qopt->rate, 319 qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
329 tb[TCA_TBF_RTAB])); 320 tb[TCA_TBF_RTAB],
321 NULL));
330 322
331 if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE) 323 if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
332 qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate, 324 qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
333 tb[TCA_TBF_PTAB])); 325 tb[TCA_TBF_PTAB],
326 NULL));
334 327
335 buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U); 328 buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
336 mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U); 329 mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
@@ -383,7 +376,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
383 if (err) 376 if (err)
384 goto done; 377 goto done;
385 } else if (qopt->limit > 0) { 378 } else if (qopt->limit > 0) {
386 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit); 379 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit,
380 extack);
387 if (IS_ERR(child)) { 381 if (IS_ERR(child)) {
388 err = PTR_ERR(child); 382 err = PTR_ERR(child);
389 goto done; 383 goto done;
@@ -421,19 +415,20 @@ done:
421 return err; 415 return err;
422} 416}
423 417
424static int tbf_init(struct Qdisc *sch, struct nlattr *opt) 418static int tbf_init(struct Qdisc *sch, struct nlattr *opt,
419 struct netlink_ext_ack *extack)
425{ 420{
426 struct tbf_sched_data *q = qdisc_priv(sch); 421 struct tbf_sched_data *q = qdisc_priv(sch);
427 422
428 qdisc_watchdog_init(&q->watchdog, sch); 423 qdisc_watchdog_init(&q->watchdog, sch);
429 q->qdisc = &noop_qdisc; 424 q->qdisc = &noop_qdisc;
430 425
431 if (opt == NULL) 426 if (!opt)
432 return -EINVAL; 427 return -EINVAL;
433 428
434 q->t_c = ktime_get_ns(); 429 q->t_c = ktime_get_ns();
435 430
436 return tbf_change(sch, opt); 431 return tbf_change(sch, opt, extack);
437} 432}
438 433
439static void tbf_destroy(struct Qdisc *sch) 434static void tbf_destroy(struct Qdisc *sch)
@@ -494,7 +489,7 @@ static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
494} 489}
495 490
496static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 491static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
497 struct Qdisc **old) 492 struct Qdisc **old, struct netlink_ext_ack *extack)
498{ 493{
499 struct tbf_sched_data *q = qdisc_priv(sch); 494 struct tbf_sched_data *q = qdisc_priv(sch);
500 495
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 9fe6b427afed..93f04cf5cac1 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -167,7 +167,8 @@ teql_destroy(struct Qdisc *sch)
167 } 167 }
168} 168}
169 169
170static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) 170static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
171 struct netlink_ext_ack *extack)
171{ 172{
172 struct net_device *dev = qdisc_dev(sch); 173 struct net_device *dev = qdisc_dev(sch);
173 struct teql_master *m = (struct teql_master *)sch->ops; 174 struct teql_master *m = (struct teql_master *)sch->ops;
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index d9c04dc1b3f3..c740b189d4ba 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -37,18 +37,6 @@ menuconfig IP_SCTP
37 37
38if IP_SCTP 38if IP_SCTP
39 39
40config NET_SCTPPROBE
41 tristate "SCTP: Association probing"
42 depends on PROC_FS && KPROBES
43 ---help---
44 This module allows for capturing the changes to SCTP association
45 state in response to incoming packets. It is used for debugging
46 SCTP congestion control algorithms. If you don't understand
47 what was just said, you don't need it: say N.
48
49 To compile this code as a module, choose M here: the
50 module will be called sctp_probe.
51
52config SCTP_DBG_OBJCNT 40config SCTP_DBG_OBJCNT
53 bool "SCTP: Debug object counts" 41 bool "SCTP: Debug object counts"
54 depends on PROC_FS 42 depends on PROC_FS
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 1ca84a288443..6776582ec449 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -4,7 +4,6 @@
4# 4#
5 5
6obj-$(CONFIG_IP_SCTP) += sctp.o 6obj-$(CONFIG_IP_SCTP) += sctp.o
7obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o
8obj-$(CONFIG_INET_SCTP_DIAG) += sctp_diag.o 7obj-$(CONFIG_INET_SCTP_DIAG) += sctp_diag.o
9 8
10sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ 9sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
@@ -14,9 +13,7 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
14 tsnmap.o bind_addr.o socket.o primitive.o \ 13 tsnmap.o bind_addr.o socket.o primitive.o \
15 output.o input.o debug.o stream.o auth.o \ 14 output.o input.o debug.o stream.o auth.o \
16 offload.o stream_sched.o stream_sched_prio.o \ 15 offload.o stream_sched.o stream_sched_prio.o \
17 stream_sched_rr.o 16 stream_sched_rr.o stream_interleave.o
18
19sctp_probe-y := probe.o
20 17
21sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o 18sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
22sctp-$(CONFIG_PROC_FS) += proc.o 19sctp-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 69394f4d6091..837806dd5799 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -861,7 +861,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
861 event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, 861 event = sctp_ulpevent_make_peer_addr_change(asoc, &addr,
862 0, spc_state, error, GFP_ATOMIC); 862 0, spc_state, error, GFP_ATOMIC);
863 if (event) 863 if (event)
864 sctp_ulpq_tail_event(&asoc->ulpq, event); 864 asoc->stream.si->enqueue_event(&asoc->ulpq, event);
865 } 865 }
866 866
867 /* Select new active and retran paths. */ 867 /* Select new active and retran paths. */
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 7f8baa48e7c2..991a530c6b31 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -124,7 +124,7 @@ static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
124 ev = sctp_ulpevent_make_send_failed(asoc, chunk, sent, 124 ev = sctp_ulpevent_make_send_failed(asoc, chunk, sent,
125 error, GFP_ATOMIC); 125 error, GFP_ATOMIC);
126 if (ev) 126 if (ev)
127 sctp_ulpq_tail_event(&asoc->ulpq, ev); 127 asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
128 } 128 }
129 129
130 sctp_chunk_put(chunk); 130 sctp_chunk_put(chunk);
@@ -191,7 +191,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
191 */ 191 */
192 max_data = asoc->pathmtu - 192 max_data = asoc->pathmtu -
193 sctp_sk(asoc->base.sk)->pf->af->net_header_len - 193 sctp_sk(asoc->base.sk)->pf->af->net_header_len -
194 sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk); 194 sizeof(struct sctphdr) - sctp_datachk_len(&asoc->stream);
195 max_data = SCTP_TRUNC4(max_data); 195 max_data = SCTP_TRUNC4(max_data);
196 196
197 /* If the the peer requested that we authenticate DATA chunks 197 /* If the the peer requested that we authenticate DATA chunks
@@ -264,8 +264,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
264 frag |= SCTP_DATA_SACK_IMM; 264 frag |= SCTP_DATA_SACK_IMM;
265 } 265 }
266 266
267 chunk = sctp_make_datafrag_empty(asoc, sinfo, len, frag, 267 chunk = asoc->stream.si->make_datafrag(asoc, sinfo, len, frag,
268 0, GFP_KERNEL); 268 GFP_KERNEL);
269 if (!chunk) { 269 if (!chunk) {
270 err = -ENOMEM; 270 err = -ENOMEM;
271 goto errout; 271 goto errout;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index ee1e601a0b11..8b3146816519 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -232,7 +232,7 @@ void sctp_endpoint_free(struct sctp_endpoint *ep)
232{ 232{
233 ep->base.dead = true; 233 ep->base.dead = true;
234 234
235 ep->base.sk->sk_state = SCTP_SS_CLOSED; 235 inet_sk_set_state(ep->base.sk, SCTP_SS_CLOSED);
236 236
237 /* Unlink this endpoint, so we can't find it again! */ 237 /* Unlink this endpoint, so we can't find it again! */
238 sctp_unhash_endpoint(ep); 238 sctp_unhash_endpoint(ep);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 5d4c15bf66d2..e35d4f73d2df 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -326,8 +326,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
326 final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); 326 final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
327 bdst = ip6_dst_lookup_flow(sk, fl6, final_p); 327 bdst = ip6_dst_lookup_flow(sk, fl6, final_p);
328 328
329 if (!IS_ERR(bdst) && 329 if (IS_ERR(bdst))
330 ipv6_chk_addr(dev_net(bdst->dev), 330 continue;
331
332 if (ipv6_chk_addr(dev_net(bdst->dev),
331 &laddr->a.v6.sin6_addr, bdst->dev, 1)) { 333 &laddr->a.v6.sin6_addr, bdst->dev, 1)) {
332 if (!IS_ERR_OR_NULL(dst)) 334 if (!IS_ERR_OR_NULL(dst))
333 dst_release(dst); 335 dst_release(dst);
@@ -336,8 +338,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
336 } 338 }
337 339
338 bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); 340 bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
339 if (matchlen > bmatchlen) 341 if (matchlen > bmatchlen) {
342 dst_release(bdst);
340 continue; 343 continue;
344 }
341 345
342 if (!IS_ERR_OR_NULL(dst)) 346 if (!IS_ERR_OR_NULL(dst))
343 dst_release(dst); 347 dst_release(dst);
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 4a865cd06d76..01a26ee051e3 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -313,6 +313,7 @@ static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet,
313 /* We believe that this chunk is OK to add to the packet */ 313 /* We believe that this chunk is OK to add to the packet */
314 switch (chunk->chunk_hdr->type) { 314 switch (chunk->chunk_hdr->type) {
315 case SCTP_CID_DATA: 315 case SCTP_CID_DATA:
316 case SCTP_CID_I_DATA:
316 /* Account for the data being in the packet */ 317 /* Account for the data being in the packet */
317 sctp_packet_append_data(packet, chunk); 318 sctp_packet_append_data(packet, chunk);
318 /* Disallow SACK bundling after DATA. */ 319 /* Disallow SACK bundling after DATA. */
@@ -724,7 +725,7 @@ static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet,
724 * or delay in hopes of bundling a full sized packet. 725 * or delay in hopes of bundling a full sized packet.
725 */ 726 */
726 if (chunk->skb->len + q->out_qlen > transport->pathmtu - 727 if (chunk->skb->len + q->out_qlen > transport->pathmtu -
727 packet->overhead - sizeof(struct sctp_data_chunk) - 4) 728 packet->overhead - sctp_datachk_len(&chunk->asoc->stream) - 4)
728 /* Enough data queued to fill a packet */ 729 /* Enough data queued to fill a packet */
729 return SCTP_XMIT_OK; 730 return SCTP_XMIT_OK;
730 731
@@ -759,7 +760,7 @@ static void sctp_packet_append_data(struct sctp_packet *packet,
759 760
760 asoc->peer.rwnd = rwnd; 761 asoc->peer.rwnd = rwnd;
761 sctp_chunk_assign_tsn(chunk); 762 sctp_chunk_assign_tsn(chunk);
762 sctp_chunk_assign_ssn(chunk); 763 asoc->stream.si->assign_number(chunk);
763} 764}
764 765
765static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet, 766static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index c4ec99b20150..f211b3db6a35 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -67,8 +67,6 @@ static void sctp_mark_missing(struct sctp_outq *q,
67 __u32 highest_new_tsn, 67 __u32 highest_new_tsn,
68 int count_of_newacks); 68 int count_of_newacks);
69 69
70static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn);
71
72static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp); 70static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
73 71
74/* Add data to the front of the queue. */ 72/* Add data to the front of the queue. */
@@ -591,7 +589,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
591 * following the procedures outlined in C1 - C5. 589 * following the procedures outlined in C1 - C5.
592 */ 590 */
593 if (reason == SCTP_RTXR_T3_RTX) 591 if (reason == SCTP_RTXR_T3_RTX)
594 sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point); 592 q->asoc->stream.si->generate_ftsn(q, q->asoc->ctsn_ack_point);
595 593
596 /* Flush the queues only on timeout, since fast_rtx is only 594 /* Flush the queues only on timeout, since fast_rtx is only
597 * triggered during sack processing and the queue 595 * triggered during sack processing and the queue
@@ -942,6 +940,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
942 case SCTP_CID_ECN_ECNE: 940 case SCTP_CID_ECN_ECNE:
943 case SCTP_CID_ASCONF: 941 case SCTP_CID_ASCONF:
944 case SCTP_CID_FWD_TSN: 942 case SCTP_CID_FWD_TSN:
943 case SCTP_CID_I_FWD_TSN:
945 case SCTP_CID_RECONF: 944 case SCTP_CID_RECONF:
946 status = sctp_packet_transmit_chunk(packet, chunk, 945 status = sctp_packet_transmit_chunk(packet, chunk,
947 one_packet, gfp); 946 one_packet, gfp);
@@ -956,7 +955,8 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
956 * sender MUST assure that at least one T3-rtx 955 * sender MUST assure that at least one T3-rtx
957 * timer is running. 956 * timer is running.
958 */ 957 */
959 if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) { 958 if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN ||
959 chunk->chunk_hdr->type == SCTP_CID_I_FWD_TSN) {
960 sctp_transport_reset_t3_rtx(transport); 960 sctp_transport_reset_t3_rtx(transport);
961 transport->last_time_sent = jiffies; 961 transport->last_time_sent = jiffies;
962 } 962 }
@@ -1372,7 +1372,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
1372 1372
1373 asoc->peer.rwnd = sack_a_rwnd; 1373 asoc->peer.rwnd = sack_a_rwnd;
1374 1374
1375 sctp_generate_fwdtsn(q, sack_ctsn); 1375 asoc->stream.si->generate_ftsn(q, sack_ctsn);
1376 1376
1377 pr_debug("%s: sack cumulative tsn ack:0x%x\n", __func__, sack_ctsn); 1377 pr_debug("%s: sack cumulative tsn ack:0x%x\n", __func__, sack_ctsn);
1378 pr_debug("%s: cumulative tsn ack of assoc:%p is 0x%x, " 1378 pr_debug("%s: cumulative tsn ack of assoc:%p is 0x%x, "
@@ -1795,7 +1795,7 @@ static inline int sctp_get_skip_pos(struct sctp_fwdtsn_skip *skiplist,
1795} 1795}
1796 1796
1797/* Create and add a fwdtsn chunk to the outq's control queue if needed. */ 1797/* Create and add a fwdtsn chunk to the outq's control queue if needed. */
1798static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn) 1798void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn)
1799{ 1799{
1800 struct sctp_association *asoc = q->asoc; 1800 struct sctp_association *asoc = q->asoc;
1801 struct sctp_chunk *ftsn_chunk = NULL; 1801 struct sctp_chunk *ftsn_chunk = NULL;
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
deleted file mode 100644
index 1280f85a598d..000000000000
--- a/net/sctp/probe.c
+++ /dev/null
@@ -1,244 +0,0 @@
1/*
2 * sctp_probe - Observe the SCTP flow with kprobes.
3 *
4 * The idea for this came from Werner Almesberger's umlsim
5 * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
6 *
7 * Modified for SCTP from Stephen Hemminger's code
8 * Copyright (C) 2010, Wei Yongjun <yjwei@cn.fujitsu.com>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
26
27#include <linux/kernel.h>
28#include <linux/kprobes.h>
29#include <linux/socket.h>
30#include <linux/sctp.h>
31#include <linux/proc_fs.h>
32#include <linux/vmalloc.h>
33#include <linux/module.h>
34#include <linux/kfifo.h>
35#include <linux/time.h>
36#include <net/net_namespace.h>
37
38#include <net/sctp/sctp.h>
39#include <net/sctp/sm.h>
40
41MODULE_SOFTDEP("pre: sctp");
42MODULE_AUTHOR("Wei Yongjun <yjwei@cn.fujitsu.com>");
43MODULE_DESCRIPTION("SCTP snooper");
44MODULE_LICENSE("GPL");
45
46static int port __read_mostly = 0;
47MODULE_PARM_DESC(port, "Port to match (0=all)");
48module_param(port, int, 0);
49
50static unsigned int fwmark __read_mostly = 0;
51MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)");
52module_param(fwmark, uint, 0);
53
54static int bufsize __read_mostly = 64 * 1024;
55MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
56module_param(bufsize, int, 0);
57
58static int full __read_mostly = 1;
59MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)");
60module_param(full, int, 0);
61
62static const char procname[] = "sctpprobe";
63
64static struct {
65 struct kfifo fifo;
66 spinlock_t lock;
67 wait_queue_head_t wait;
68 struct timespec64 tstart;
69} sctpw;
70
71static __printf(1, 2) void printl(const char *fmt, ...)
72{
73 va_list args;
74 int len;
75 char tbuf[256];
76
77 va_start(args, fmt);
78 len = vscnprintf(tbuf, sizeof(tbuf), fmt, args);
79 va_end(args);
80
81 kfifo_in_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
82 wake_up(&sctpw.wait);
83}
84
85static int sctpprobe_open(struct inode *inode, struct file *file)
86{
87 kfifo_reset(&sctpw.fifo);
88 ktime_get_ts64(&sctpw.tstart);
89
90 return 0;
91}
92
93static ssize_t sctpprobe_read(struct file *file, char __user *buf,
94 size_t len, loff_t *ppos)
95{
96 int error = 0, cnt = 0;
97 unsigned char *tbuf;
98
99 if (!buf)
100 return -EINVAL;
101
102 if (len == 0)
103 return 0;
104
105 tbuf = vmalloc(len);
106 if (!tbuf)
107 return -ENOMEM;
108
109 error = wait_event_interruptible(sctpw.wait,
110 kfifo_len(&sctpw.fifo) != 0);
111 if (error)
112 goto out_free;
113
114 cnt = kfifo_out_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
115 error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
116
117out_free:
118 vfree(tbuf);
119
120 return error ? error : cnt;
121}
122
123static const struct file_operations sctpprobe_fops = {
124 .owner = THIS_MODULE,
125 .open = sctpprobe_open,
126 .read = sctpprobe_read,
127 .llseek = noop_llseek,
128};
129
130static enum sctp_disposition jsctp_sf_eat_sack(
131 struct net *net,
132 const struct sctp_endpoint *ep,
133 const struct sctp_association *asoc,
134 const union sctp_subtype type,
135 void *arg,
136 struct sctp_cmd_seq *commands)
137{
138 struct sctp_chunk *chunk = arg;
139 struct sk_buff *skb = chunk->skb;
140 struct sctp_transport *sp;
141 static __u32 lcwnd = 0;
142 struct timespec64 now;
143
144 sp = asoc->peer.primary_path;
145
146 if (((port == 0 && fwmark == 0) ||
147 asoc->peer.port == port ||
148 ep->base.bind_addr.port == port ||
149 (fwmark > 0 && skb->mark == fwmark)) &&
150 (full || sp->cwnd != lcwnd)) {
151 lcwnd = sp->cwnd;
152
153 ktime_get_ts64(&now);
154 now = timespec64_sub(now, sctpw.tstart);
155
156 printl("%lu.%06lu ", (unsigned long) now.tv_sec,
157 (unsigned long) now.tv_nsec / NSEC_PER_USEC);
158
159 printl("%p %5d %5d %5d %8d %5d ", asoc,
160 ep->base.bind_addr.port, asoc->peer.port,
161 asoc->pathmtu, asoc->peer.rwnd, asoc->unack_data);
162
163 list_for_each_entry(sp, &asoc->peer.transport_addr_list,
164 transports) {
165 if (sp == asoc->peer.primary_path)
166 printl("*");
167
168 printl("%pISc %2u %8u %8u %8u %8u %8u ",
169 &sp->ipaddr, sp->state, sp->cwnd, sp->ssthresh,
170 sp->flight_size, sp->partial_bytes_acked,
171 sp->pathmtu);
172 }
173 printl("\n");
174 }
175
176 jprobe_return();
177 return 0;
178}
179
180static struct jprobe sctp_recv_probe = {
181 .kp = {
182 .symbol_name = "sctp_sf_eat_sack_6_2",
183 },
184 .entry = jsctp_sf_eat_sack,
185};
186
187static __init int sctp_setup_jprobe(void)
188{
189 int ret = register_jprobe(&sctp_recv_probe);
190
191 if (ret) {
192 if (request_module("sctp"))
193 goto out;
194 ret = register_jprobe(&sctp_recv_probe);
195 }
196
197out:
198 return ret;
199}
200
201static __init int sctpprobe_init(void)
202{
203 int ret = -ENOMEM;
204
205 /* Warning: if the function signature of sctp_sf_eat_sack_6_2,
206 * has been changed, you also have to change the signature of
207 * jsctp_sf_eat_sack, otherwise you end up right here!
208 */
209 BUILD_BUG_ON(__same_type(sctp_sf_eat_sack_6_2,
210 jsctp_sf_eat_sack) == 0);
211
212 init_waitqueue_head(&sctpw.wait);
213 spin_lock_init(&sctpw.lock);
214 if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL))
215 return ret;
216
217 if (!proc_create(procname, S_IRUSR, init_net.proc_net,
218 &sctpprobe_fops))
219 goto free_kfifo;
220
221 ret = sctp_setup_jprobe();
222 if (ret)
223 goto remove_proc;
224
225 pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n",
226 port, fwmark, bufsize);
227 return 0;
228
229remove_proc:
230 remove_proc_entry(procname, init_net.proc_net);
231free_kfifo:
232 kfifo_free(&sctpw.fifo);
233 return ret;
234}
235
236static __exit void sctpprobe_exit(void)
237{
238 kfifo_free(&sctpw.fifo);
239 remove_proc_entry(procname, init_net.proc_net);
240 unregister_jprobe(&sctp_recv_probe);
241}
242
243module_init(sctpprobe_init);
244module_exit(sctpprobe_exit);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 26b4be6b4172..537545ebcb0e 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -95,7 +95,6 @@ static int sctp_snmp_seq_open(struct inode *inode, struct file *file)
95} 95}
96 96
97static const struct file_operations sctp_snmp_seq_fops = { 97static const struct file_operations sctp_snmp_seq_fops = {
98 .owner = THIS_MODULE,
99 .open = sctp_snmp_seq_open, 98 .open = sctp_snmp_seq_open,
100 .read = seq_read, 99 .read = seq_read,
101 .llseek = seq_lseek, 100 .llseek = seq_lseek,
@@ -288,12 +287,8 @@ struct sctp_ht_iter {
288static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos) 287static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos)
289{ 288{
290 struct sctp_ht_iter *iter = seq->private; 289 struct sctp_ht_iter *iter = seq->private;
291 int err = sctp_transport_walk_start(&iter->hti);
292 290
293 if (err) { 291 sctp_transport_walk_start(&iter->hti);
294 iter->start_fail = 1;
295 return ERR_PTR(err);
296 }
297 292
298 iter->start_fail = 0; 293 iter->start_fail = 0;
299 return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos); 294 return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 6a38c2503649..91813e686c67 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -514,22 +514,20 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
514 if (IS_ERR(rt)) 514 if (IS_ERR(rt))
515 continue; 515 continue;
516 516
517 if (!dst)
518 dst = &rt->dst;
519
520 /* Ensure the src address belongs to the output 517 /* Ensure the src address belongs to the output
521 * interface. 518 * interface.
522 */ 519 */
523 odev = __ip_dev_find(sock_net(sk), laddr->a.v4.sin_addr.s_addr, 520 odev = __ip_dev_find(sock_net(sk), laddr->a.v4.sin_addr.s_addr,
524 false); 521 false);
525 if (!odev || odev->ifindex != fl4->flowi4_oif) { 522 if (!odev || odev->ifindex != fl4->flowi4_oif) {
526 if (&rt->dst != dst) 523 if (!dst)
524 dst = &rt->dst;
525 else
527 dst_release(&rt->dst); 526 dst_release(&rt->dst);
528 continue; 527 continue;
529 } 528 }
530 529
531 if (dst != &rt->dst) 530 dst_release(dst);
532 dst_release(dst);
533 dst = &rt->dst; 531 dst = &rt->dst;
534 break; 532 break;
535 } 533 }
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 9bf575f2e8ed..d01475f5f710 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -228,7 +228,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
228 struct sctp_inithdr init; 228 struct sctp_inithdr init;
229 union sctp_params addrs; 229 union sctp_params addrs;
230 struct sctp_sock *sp; 230 struct sctp_sock *sp;
231 __u8 extensions[4]; 231 __u8 extensions[5];
232 size_t chunksize; 232 size_t chunksize;
233 __be16 types[2]; 233 __be16 types[2];
234 int num_ext = 0; 234 int num_ext = 0;
@@ -278,6 +278,11 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
278 if (sp->adaptation_ind) 278 if (sp->adaptation_ind)
279 chunksize += sizeof(aiparam); 279 chunksize += sizeof(aiparam);
280 280
281 if (sp->strm_interleave) {
282 extensions[num_ext] = SCTP_CID_I_DATA;
283 num_ext += 1;
284 }
285
281 chunksize += vparam_len; 286 chunksize += vparam_len;
282 287
283 /* Account for AUTH related parameters */ 288 /* Account for AUTH related parameters */
@@ -392,7 +397,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
392 struct sctp_inithdr initack; 397 struct sctp_inithdr initack;
393 union sctp_params addrs; 398 union sctp_params addrs;
394 struct sctp_sock *sp; 399 struct sctp_sock *sp;
395 __u8 extensions[4]; 400 __u8 extensions[5];
396 size_t chunksize; 401 size_t chunksize;
397 int num_ext = 0; 402 int num_ext = 0;
398 int cookie_len; 403 int cookie_len;
@@ -442,6 +447,11 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
442 if (sp->adaptation_ind) 447 if (sp->adaptation_ind)
443 chunksize += sizeof(aiparam); 448 chunksize += sizeof(aiparam);
444 449
450 if (asoc->intl_enable) {
451 extensions[num_ext] = SCTP_CID_I_DATA;
452 num_ext += 1;
453 }
454
445 if (asoc->peer.auth_capable) { 455 if (asoc->peer.auth_capable) {
446 auth_random = (struct sctp_paramhdr *)asoc->c.auth_random; 456 auth_random = (struct sctp_paramhdr *)asoc->c.auth_random;
447 chunksize += ntohs(auth_random->length); 457 chunksize += ntohs(auth_random->length);
@@ -711,38 +721,31 @@ nodata:
711/* Make a DATA chunk for the given association from the provided 721/* Make a DATA chunk for the given association from the provided
712 * parameters. However, do not populate the data payload. 722 * parameters. However, do not populate the data payload.
713 */ 723 */
714struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc, 724struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc,
715 const struct sctp_sndrcvinfo *sinfo, 725 const struct sctp_sndrcvinfo *sinfo,
716 int data_len, __u8 flags, __u16 ssn, 726 int len, __u8 flags, gfp_t gfp)
717 gfp_t gfp)
718{ 727{
719 struct sctp_chunk *retval; 728 struct sctp_chunk *retval;
720 struct sctp_datahdr dp; 729 struct sctp_datahdr dp;
721 int chunk_len;
722 730
723 /* We assign the TSN as LATE as possible, not here when 731 /* We assign the TSN as LATE as possible, not here when
724 * creating the chunk. 732 * creating the chunk.
725 */ 733 */
726 dp.tsn = 0; 734 memset(&dp, 0, sizeof(dp));
735 dp.ppid = sinfo->sinfo_ppid;
727 dp.stream = htons(sinfo->sinfo_stream); 736 dp.stream = htons(sinfo->sinfo_stream);
728 dp.ppid = sinfo->sinfo_ppid;
729 737
730 /* Set the flags for an unordered send. */ 738 /* Set the flags for an unordered send. */
731 if (sinfo->sinfo_flags & SCTP_UNORDERED) { 739 if (sinfo->sinfo_flags & SCTP_UNORDERED)
732 flags |= SCTP_DATA_UNORDERED; 740 flags |= SCTP_DATA_UNORDERED;
733 dp.ssn = 0;
734 } else
735 dp.ssn = htons(ssn);
736 741
737 chunk_len = sizeof(dp) + data_len; 742 retval = sctp_make_data(asoc, flags, sizeof(dp) + len, gfp);
738 retval = sctp_make_data(asoc, flags, chunk_len, gfp);
739 if (!retval) 743 if (!retval)
740 goto nodata; 744 return NULL;
741 745
742 retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp); 746 retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp);
743 memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo)); 747 memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo));
744 748
745nodata:
746 return retval; 749 return retval;
747} 750}
748 751
@@ -1273,7 +1276,6 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
1273 struct sctp_authhdr auth_hdr; 1276 struct sctp_authhdr auth_hdr;
1274 struct sctp_hmac *hmac_desc; 1277 struct sctp_hmac *hmac_desc;
1275 struct sctp_chunk *retval; 1278 struct sctp_chunk *retval;
1276 __u8 *hmac;
1277 1279
1278 /* Get the first hmac that the peer told us to use */ 1280 /* Get the first hmac that the peer told us to use */
1279 hmac_desc = sctp_auth_asoc_get_hmac(asoc); 1281 hmac_desc = sctp_auth_asoc_get_hmac(asoc);
@@ -1292,7 +1294,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
1292 retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr), 1294 retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr),
1293 &auth_hdr); 1295 &auth_hdr);
1294 1296
1295 hmac = skb_put_zero(retval->skb, hmac_desc->hmac_len); 1297 skb_put_zero(retval->skb, hmac_desc->hmac_len);
1296 1298
1297 /* Adjust the chunk header to include the empty MAC */ 1299 /* Adjust the chunk header to include the empty MAC */
1298 retval->chunk_hdr->length = 1300 retval->chunk_hdr->length =
@@ -1378,9 +1380,14 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
1378 struct sctp_chunk *retval; 1380 struct sctp_chunk *retval;
1379 struct sk_buff *skb; 1381 struct sk_buff *skb;
1380 struct sock *sk; 1382 struct sock *sk;
1383 int chunklen;
1384
1385 chunklen = SCTP_PAD4(sizeof(*chunk_hdr) + paylen);
1386 if (chunklen > SCTP_MAX_CHUNK_LEN)
1387 goto nodata;
1381 1388
1382 /* No need to allocate LL here, as this is only a chunk. */ 1389 /* No need to allocate LL here, as this is only a chunk. */
1383 skb = alloc_skb(SCTP_PAD4(sizeof(*chunk_hdr) + paylen), gfp); 1390 skb = alloc_skb(chunklen, gfp);
1384 if (!skb) 1391 if (!skb)
1385 goto nodata; 1392 goto nodata;
1386 1393
@@ -1415,6 +1422,12 @@ static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
1415 return _sctp_make_chunk(asoc, SCTP_CID_DATA, flags, paylen, gfp); 1422 return _sctp_make_chunk(asoc, SCTP_CID_DATA, flags, paylen, gfp);
1416} 1423}
1417 1424
1425struct sctp_chunk *sctp_make_idata(const struct sctp_association *asoc,
1426 __u8 flags, int paylen, gfp_t gfp)
1427{
1428 return _sctp_make_chunk(asoc, SCTP_CID_I_DATA, flags, paylen, gfp);
1429}
1430
1418static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc, 1431static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc,
1419 __u8 type, __u8 flags, int paylen, 1432 __u8 type, __u8 flags, int paylen,
1420 gfp_t gfp) 1433 gfp_t gfp)
@@ -2032,6 +2045,10 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
2032 if (net->sctp.addip_enable) 2045 if (net->sctp.addip_enable)
2033 asoc->peer.asconf_capable = 1; 2046 asoc->peer.asconf_capable = 1;
2034 break; 2047 break;
2048 case SCTP_CID_I_DATA:
2049 if (sctp_sk(asoc->base.sk)->strm_interleave)
2050 asoc->intl_enable = 1;
2051 break;
2035 default: 2052 default:
2036 break; 2053 break;
2037 } 2054 }
@@ -3523,6 +3540,30 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
3523 return retval; 3540 return retval;
3524} 3541}
3525 3542
3543struct sctp_chunk *sctp_make_ifwdtsn(const struct sctp_association *asoc,
3544 __u32 new_cum_tsn, size_t nstreams,
3545 struct sctp_ifwdtsn_skip *skiplist)
3546{
3547 struct sctp_chunk *retval = NULL;
3548 struct sctp_ifwdtsn_hdr ftsn_hdr;
3549 size_t hint;
3550
3551 hint = (nstreams + 1) * sizeof(__u32);
3552
3553 retval = sctp_make_control(asoc, SCTP_CID_I_FWD_TSN, 0, hint,
3554 GFP_ATOMIC);
3555 if (!retval)
3556 return NULL;
3557
3558 ftsn_hdr.new_cum_tsn = htonl(new_cum_tsn);
3559 retval->subh.ifwdtsn_hdr =
3560 sctp_addto_chunk(retval, sizeof(ftsn_hdr), &ftsn_hdr);
3561
3562 sctp_addto_chunk(retval, nstreams * sizeof(skiplist[0]), skiplist);
3563
3564 return retval;
3565}
3566
3526/* RE-CONFIG 3.1 (RE-CONFIG chunk) 3567/* RE-CONFIG 3.1 (RE-CONFIG chunk)
3527 * 0 1 2 3 3568 * 0 1 2 3
3528 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 3569 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index df94d77401e7..b71e7fb0a20a 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -632,7 +632,7 @@ static void sctp_cmd_assoc_failed(struct sctp_cmd_seq *commands,
632 struct sctp_chunk *abort; 632 struct sctp_chunk *abort;
633 633
634 /* Cancel any partial delivery in progress. */ 634 /* Cancel any partial delivery in progress. */
635 sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); 635 asoc->stream.si->abort_pd(&asoc->ulpq, GFP_ATOMIC);
636 636
637 if (event_type == SCTP_EVENT_T_CHUNK && subtype.chunk == SCTP_CID_ABORT) 637 if (event_type == SCTP_EVENT_T_CHUNK && subtype.chunk == SCTP_CID_ABORT)
638 event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST, 638 event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST,
@@ -878,12 +878,12 @@ static void sctp_cmd_new_state(struct sctp_cmd_seq *cmds,
878 * successfully completed a connect() call. 878 * successfully completed a connect() call.
879 */ 879 */
880 if (sctp_state(asoc, ESTABLISHED) && sctp_sstate(sk, CLOSED)) 880 if (sctp_state(asoc, ESTABLISHED) && sctp_sstate(sk, CLOSED))
881 sk->sk_state = SCTP_SS_ESTABLISHED; 881 inet_sk_set_state(sk, SCTP_SS_ESTABLISHED);
882 882
883 /* Set the RCV_SHUTDOWN flag when a SHUTDOWN is received. */ 883 /* Set the RCV_SHUTDOWN flag when a SHUTDOWN is received. */
884 if (sctp_state(asoc, SHUTDOWN_RECEIVED) && 884 if (sctp_state(asoc, SHUTDOWN_RECEIVED) &&
885 sctp_sstate(sk, ESTABLISHED)) { 885 sctp_sstate(sk, ESTABLISHED)) {
886 sk->sk_state = SCTP_SS_CLOSING; 886 inet_sk_set_state(sk, SCTP_SS_CLOSING);
887 sk->sk_shutdown |= RCV_SHUTDOWN; 887 sk->sk_shutdown |= RCV_SHUTDOWN;
888 } 888 }
889 } 889 }
@@ -972,7 +972,7 @@ static void sctp_cmd_process_operr(struct sctp_cmd_seq *cmds,
972 if (!ev) 972 if (!ev)
973 return; 973 return;
974 974
975 sctp_ulpq_tail_event(&asoc->ulpq, ev); 975 asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
976 976
977 switch (err_hdr->cause) { 977 switch (err_hdr->cause) {
978 case SCTP_ERROR_UNKNOWN_CHUNK: 978 case SCTP_ERROR_UNKNOWN_CHUNK:
@@ -1007,18 +1007,6 @@ static void sctp_cmd_process_operr(struct sctp_cmd_seq *cmds,
1007 } 1007 }
1008} 1008}
1009 1009
1010/* Process variable FWDTSN chunk information. */
1011static void sctp_cmd_process_fwdtsn(struct sctp_ulpq *ulpq,
1012 struct sctp_chunk *chunk)
1013{
1014 struct sctp_fwdtsn_skip *skip;
1015
1016 /* Walk through all the skipped SSNs */
1017 sctp_walk_fwdtsn(skip, chunk) {
1018 sctp_ulpq_skip(ulpq, ntohs(skip->stream), ntohs(skip->ssn));
1019 }
1020}
1021
1022/* Helper function to remove the association non-primary peer 1010/* Helper function to remove the association non-primary peer
1023 * transports. 1011 * transports.
1024 */ 1012 */
@@ -1058,7 +1046,7 @@ static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands,
1058 asoc->c.sinit_max_instreams, 1046 asoc->c.sinit_max_instreams,
1059 NULL, GFP_ATOMIC); 1047 NULL, GFP_ATOMIC);
1060 if (ev) 1048 if (ev)
1061 sctp_ulpq_tail_event(&asoc->ulpq, ev); 1049 asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
1062} 1050}
1063 1051
1064/* Helper function to generate an adaptation indication event */ 1052/* Helper function to generate an adaptation indication event */
@@ -1070,7 +1058,7 @@ static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands,
1070 ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC); 1058 ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC);
1071 1059
1072 if (ev) 1060 if (ev)
1073 sctp_ulpq_tail_event(&asoc->ulpq, ev); 1061 asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
1074} 1062}
1075 1063
1076 1064
@@ -1368,18 +1356,12 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
1368 break; 1356 break;
1369 1357
1370 case SCTP_CMD_REPORT_FWDTSN: 1358 case SCTP_CMD_REPORT_FWDTSN:
1371 /* Move the Cumulattive TSN Ack ahead. */ 1359 asoc->stream.si->report_ftsn(&asoc->ulpq, cmd->obj.u32);
1372 sctp_tsnmap_skip(&asoc->peer.tsn_map, cmd->obj.u32);
1373
1374 /* purge the fragmentation queue */
1375 sctp_ulpq_reasm_flushtsn(&asoc->ulpq, cmd->obj.u32);
1376
1377 /* Abort any in progress partial delivery. */
1378 sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
1379 break; 1360 break;
1380 1361
1381 case SCTP_CMD_PROCESS_FWDTSN: 1362 case SCTP_CMD_PROCESS_FWDTSN:
1382 sctp_cmd_process_fwdtsn(&asoc->ulpq, cmd->obj.chunk); 1363 asoc->stream.si->handle_ftsn(&asoc->ulpq,
1364 cmd->obj.chunk);
1383 break; 1365 break;
1384 1366
1385 case SCTP_CMD_GEN_SACK: 1367 case SCTP_CMD_GEN_SACK:
@@ -1483,8 +1465,9 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
1483 pr_debug("%s: sm_sideff: chunk_up:%p, ulpq:%p\n", 1465 pr_debug("%s: sm_sideff: chunk_up:%p, ulpq:%p\n",
1484 __func__, cmd->obj.chunk, &asoc->ulpq); 1466 __func__, cmd->obj.chunk, &asoc->ulpq);
1485 1467
1486 sctp_ulpq_tail_data(&asoc->ulpq, cmd->obj.chunk, 1468 asoc->stream.si->ulpevent_data(&asoc->ulpq,
1487 GFP_ATOMIC); 1469 cmd->obj.chunk,
1470 GFP_ATOMIC);
1488 break; 1471 break;
1489 1472
1490 case SCTP_CMD_EVENT_ULP: 1473 case SCTP_CMD_EVENT_ULP:
@@ -1492,7 +1475,8 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
1492 pr_debug("%s: sm_sideff: event_up:%p, ulpq:%p\n", 1475 pr_debug("%s: sm_sideff: event_up:%p, ulpq:%p\n",
1493 __func__, cmd->obj.ulpevent, &asoc->ulpq); 1476 __func__, cmd->obj.ulpevent, &asoc->ulpq);
1494 1477
1495 sctp_ulpq_tail_event(&asoc->ulpq, cmd->obj.ulpevent); 1478 asoc->stream.si->enqueue_event(&asoc->ulpq,
1479 cmd->obj.ulpevent);
1496 break; 1480 break;
1497 1481
1498 case SCTP_CMD_REPLY: 1482 case SCTP_CMD_REPLY:
@@ -1729,12 +1713,13 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
1729 break; 1713 break;
1730 1714
1731 case SCTP_CMD_PART_DELIVER: 1715 case SCTP_CMD_PART_DELIVER:
1732 sctp_ulpq_partial_delivery(&asoc->ulpq, GFP_ATOMIC); 1716 asoc->stream.si->start_pd(&asoc->ulpq, GFP_ATOMIC);
1733 break; 1717 break;
1734 1718
1735 case SCTP_CMD_RENEGE: 1719 case SCTP_CMD_RENEGE:
1736 sctp_ulpq_renege(&asoc->ulpq, cmd->obj.chunk, 1720 asoc->stream.si->renege_events(&asoc->ulpq,
1737 GFP_ATOMIC); 1721 cmd->obj.chunk,
1722 GFP_ATOMIC);
1738 break; 1723 break;
1739 1724
1740 case SCTP_CMD_SETUP_T4: 1725 case SCTP_CMD_SETUP_T4:
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 8f8ccded13e4..eb7905ffe5f2 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -59,6 +59,9 @@
59#include <net/sctp/sm.h> 59#include <net/sctp/sm.h>
60#include <net/sctp/structs.h> 60#include <net/sctp/structs.h>
61 61
62#define CREATE_TRACE_POINTS
63#include <trace/events/sctp.h>
64
62static struct sctp_packet *sctp_abort_pkt_new( 65static struct sctp_packet *sctp_abort_pkt_new(
63 struct net *net, 66 struct net *net,
64 const struct sctp_endpoint *ep, 67 const struct sctp_endpoint *ep,
@@ -3013,7 +3016,7 @@ enum sctp_disposition sctp_sf_eat_data_6_2(struct net *net,
3013 return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); 3016 return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
3014 } 3017 }
3015 3018
3016 if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk))) 3019 if (!sctp_chunk_length_valid(chunk, sctp_datachk_len(&asoc->stream)))
3017 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, 3020 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
3018 commands); 3021 commands);
3019 3022
@@ -3034,7 +3037,7 @@ enum sctp_disposition sctp_sf_eat_data_6_2(struct net *net,
3034 case SCTP_IERROR_PROTO_VIOLATION: 3037 case SCTP_IERROR_PROTO_VIOLATION:
3035 return sctp_sf_abort_violation(net, ep, asoc, chunk, commands, 3038 return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
3036 (u8 *)chunk->subh.data_hdr, 3039 (u8 *)chunk->subh.data_hdr,
3037 sizeof(struct sctp_datahdr)); 3040 sctp_datahdr_len(&asoc->stream));
3038 default: 3041 default:
3039 BUG(); 3042 BUG();
3040 } 3043 }
@@ -3133,7 +3136,7 @@ enum sctp_disposition sctp_sf_eat_data_fast_4_4(
3133 return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); 3136 return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
3134 } 3137 }
3135 3138
3136 if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk))) 3139 if (!sctp_chunk_length_valid(chunk, sctp_datachk_len(&asoc->stream)))
3137 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, 3140 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
3138 commands); 3141 commands);
3139 3142
@@ -3150,7 +3153,7 @@ enum sctp_disposition sctp_sf_eat_data_fast_4_4(
3150 case SCTP_IERROR_PROTO_VIOLATION: 3153 case SCTP_IERROR_PROTO_VIOLATION:
3151 return sctp_sf_abort_violation(net, ep, asoc, chunk, commands, 3154 return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
3152 (u8 *)chunk->subh.data_hdr, 3155 (u8 *)chunk->subh.data_hdr,
3153 sizeof(struct sctp_datahdr)); 3156 sctp_datahdr_len(&asoc->stream));
3154 default: 3157 default:
3155 BUG(); 3158 BUG();
3156 } 3159 }
@@ -3219,6 +3222,8 @@ enum sctp_disposition sctp_sf_eat_sack_6_2(struct net *net,
3219 struct sctp_sackhdr *sackh; 3222 struct sctp_sackhdr *sackh;
3220 __u32 ctsn; 3223 __u32 ctsn;
3221 3224
3225 trace_sctp_probe(ep, asoc, chunk);
3226
3222 if (!sctp_vtag_verify(chunk, asoc)) 3227 if (!sctp_vtag_verify(chunk, asoc))
3223 return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); 3228 return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
3224 3229
@@ -3957,7 +3962,6 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn(struct net *net,
3957{ 3962{
3958 struct sctp_fwdtsn_hdr *fwdtsn_hdr; 3963 struct sctp_fwdtsn_hdr *fwdtsn_hdr;
3959 struct sctp_chunk *chunk = arg; 3964 struct sctp_chunk *chunk = arg;
3960 struct sctp_fwdtsn_skip *skip;
3961 __u16 len; 3965 __u16 len;
3962 __u32 tsn; 3966 __u32 tsn;
3963 3967
@@ -3971,7 +3975,7 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn(struct net *net,
3971 return sctp_sf_unk_chunk(net, ep, asoc, type, arg, commands); 3975 return sctp_sf_unk_chunk(net, ep, asoc, type, arg, commands);
3972 3976
3973 /* Make sure that the FORWARD_TSN chunk has valid length. */ 3977 /* Make sure that the FORWARD_TSN chunk has valid length. */
3974 if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk))) 3978 if (!sctp_chunk_length_valid(chunk, sctp_ftsnchk_len(&asoc->stream)))
3975 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, 3979 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
3976 commands); 3980 commands);
3977 3981
@@ -3990,14 +3994,11 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn(struct net *net,
3990 if (sctp_tsnmap_check(&asoc->peer.tsn_map, tsn) < 0) 3994 if (sctp_tsnmap_check(&asoc->peer.tsn_map, tsn) < 0)
3991 goto discard_noforce; 3995 goto discard_noforce;
3992 3996
3993 /* Silently discard the chunk if stream-id is not valid */ 3997 if (!asoc->stream.si->validate_ftsn(chunk))
3994 sctp_walk_fwdtsn(skip, chunk) { 3998 goto discard_noforce;
3995 if (ntohs(skip->stream) >= asoc->stream.incnt)
3996 goto discard_noforce;
3997 }
3998 3999
3999 sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_FWDTSN, SCTP_U32(tsn)); 4000 sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_FWDTSN, SCTP_U32(tsn));
4000 if (len > sizeof(struct sctp_fwdtsn_hdr)) 4001 if (len > sctp_ftsnhdr_len(&asoc->stream))
4001 sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_FWDTSN, 4002 sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_FWDTSN,
4002 SCTP_CHUNK(chunk)); 4003 SCTP_CHUNK(chunk));
4003 4004
@@ -4028,7 +4029,6 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn_fast(
4028{ 4029{
4029 struct sctp_fwdtsn_hdr *fwdtsn_hdr; 4030 struct sctp_fwdtsn_hdr *fwdtsn_hdr;
4030 struct sctp_chunk *chunk = arg; 4031 struct sctp_chunk *chunk = arg;
4031 struct sctp_fwdtsn_skip *skip;
4032 __u16 len; 4032 __u16 len;
4033 __u32 tsn; 4033 __u32 tsn;
4034 4034
@@ -4042,7 +4042,7 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn_fast(
4042 return sctp_sf_unk_chunk(net, ep, asoc, type, arg, commands); 4042 return sctp_sf_unk_chunk(net, ep, asoc, type, arg, commands);
4043 4043
4044 /* Make sure that the FORWARD_TSN chunk has a valid length. */ 4044 /* Make sure that the FORWARD_TSN chunk has a valid length. */
4045 if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk))) 4045 if (!sctp_chunk_length_valid(chunk, sctp_ftsnchk_len(&asoc->stream)))
4046 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, 4046 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
4047 commands); 4047 commands);
4048 4048
@@ -4061,14 +4061,11 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn_fast(
4061 if (sctp_tsnmap_check(&asoc->peer.tsn_map, tsn) < 0) 4061 if (sctp_tsnmap_check(&asoc->peer.tsn_map, tsn) < 0)
4062 goto gen_shutdown; 4062 goto gen_shutdown;
4063 4063
4064 /* Silently discard the chunk if stream-id is not valid */ 4064 if (!asoc->stream.si->validate_ftsn(chunk))
4065 sctp_walk_fwdtsn(skip, chunk) { 4065 goto gen_shutdown;
4066 if (ntohs(skip->stream) >= asoc->stream.incnt)
4067 goto gen_shutdown;
4068 }
4069 4066
4070 sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_FWDTSN, SCTP_U32(tsn)); 4067 sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_FWDTSN, SCTP_U32(tsn));
4071 if (len > sizeof(struct sctp_fwdtsn_hdr)) 4068 if (len > sctp_ftsnhdr_len(&asoc->stream))
4072 sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_FWDTSN, 4069 sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_FWDTSN,
4073 SCTP_CHUNK(chunk)); 4070 SCTP_CHUNK(chunk));
4074 4071
@@ -6244,14 +6241,12 @@ static int sctp_eat_data(const struct sctp_association *asoc,
6244 struct sctp_chunk *err; 6241 struct sctp_chunk *err;
6245 enum sctp_verb deliver; 6242 enum sctp_verb deliver;
6246 size_t datalen; 6243 size_t datalen;
6247 u8 ordered = 0;
6248 u16 ssn, sid;
6249 __u32 tsn; 6244 __u32 tsn;
6250 int tmp; 6245 int tmp;
6251 6246
6252 data_hdr = (struct sctp_datahdr *)chunk->skb->data; 6247 data_hdr = (struct sctp_datahdr *)chunk->skb->data;
6253 chunk->subh.data_hdr = data_hdr; 6248 chunk->subh.data_hdr = data_hdr;
6254 skb_pull(chunk->skb, sizeof(*data_hdr)); 6249 skb_pull(chunk->skb, sctp_datahdr_len(&asoc->stream));
6255 6250
6256 tsn = ntohl(data_hdr->tsn); 6251 tsn = ntohl(data_hdr->tsn);
6257 pr_debug("%s: TSN 0x%x\n", __func__, tsn); 6252 pr_debug("%s: TSN 0x%x\n", __func__, tsn);
@@ -6299,7 +6294,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
6299 * Actually, allow a little bit of overflow (up to a MTU). 6294 * Actually, allow a little bit of overflow (up to a MTU).
6300 */ 6295 */
6301 datalen = ntohs(chunk->chunk_hdr->length); 6296 datalen = ntohs(chunk->chunk_hdr->length);
6302 datalen -= sizeof(struct sctp_data_chunk); 6297 datalen -= sctp_datachk_len(&asoc->stream);
6303 6298
6304 deliver = SCTP_CMD_CHUNK_ULP; 6299 deliver = SCTP_CMD_CHUNK_ULP;
6305 6300
@@ -6394,7 +6389,6 @@ static int sctp_eat_data(const struct sctp_association *asoc,
6394 SCTP_INC_STATS(net, SCTP_MIB_INORDERCHUNKS); 6389 SCTP_INC_STATS(net, SCTP_MIB_INORDERCHUNKS);
6395 if (chunk->asoc) 6390 if (chunk->asoc)
6396 chunk->asoc->stats.iodchunks++; 6391 chunk->asoc->stats.iodchunks++;
6397 ordered = 1;
6398 } 6392 }
6399 6393
6400 /* RFC 2960 6.5 Stream Identifier and Stream Sequence Number 6394 /* RFC 2960 6.5 Stream Identifier and Stream Sequence Number
@@ -6405,8 +6399,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
6405 * with cause set to "Invalid Stream Identifier" (See Section 3.3.10) 6399 * with cause set to "Invalid Stream Identifier" (See Section 3.3.10)
6406 * and discard the DATA chunk. 6400 * and discard the DATA chunk.
6407 */ 6401 */
6408 sid = ntohs(data_hdr->stream); 6402 if (ntohs(data_hdr->stream) >= asoc->stream.incnt) {
6409 if (sid >= asoc->stream.incnt) {
6410 /* Mark tsn as received even though we drop it */ 6403 /* Mark tsn as received even though we drop it */
6411 sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); 6404 sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn));
6412 6405
@@ -6427,8 +6420,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
6427 * SSN is smaller then the next expected one. If it is, it wrapped 6420 * SSN is smaller then the next expected one. If it is, it wrapped
6428 * and is invalid. 6421 * and is invalid.
6429 */ 6422 */
6430 ssn = ntohs(data_hdr->ssn); 6423 if (!asoc->stream.si->validate_data(chunk))
6431 if (ordered && SSN_lt(ssn, sctp_ssn_peek(&asoc->stream, in, sid)))
6432 return SCTP_IERROR_PROTO_VIOLATION; 6424 return SCTP_IERROR_PROTO_VIOLATION;
6433 6425
6434 /* Send the data up to the user. Note: Schedule the 6426 /* Send the data up to the user. Note: Schedule the
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 79b6bee5b768..691d9dc620e3 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -985,11 +985,14 @@ static const struct sctp_sm_table_entry *sctp_chunk_event_lookup(
985 if (state > SCTP_STATE_MAX) 985 if (state > SCTP_STATE_MAX)
986 return &bug; 986 return &bug;
987 987
988 if (cid == SCTP_CID_I_DATA)
989 cid = SCTP_CID_DATA;
990
988 if (cid <= SCTP_CID_BASE_MAX) 991 if (cid <= SCTP_CID_BASE_MAX)
989 return &chunk_event_table[cid][state]; 992 return &chunk_event_table[cid][state];
990 993
991 if (net->sctp.prsctp_enable) { 994 if (net->sctp.prsctp_enable) {
992 if (cid == SCTP_CID_FWD_TSN) 995 if (cid == SCTP_CID_FWD_TSN || cid == SCTP_CID_I_FWD_TSN)
993 return &prsctp_chunk_event_table[0][state]; 996 return &prsctp_chunk_event_table[0][state];
994 } 997 }
995 998
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 039fcb618c34..bf271f8c2dc9 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -201,6 +201,22 @@ static void sctp_for_each_tx_datachunk(struct sctp_association *asoc,
201 cb(chunk); 201 cb(chunk);
202} 202}
203 203
204static void sctp_for_each_rx_skb(struct sctp_association *asoc, struct sock *sk,
205 void (*cb)(struct sk_buff *, struct sock *))
206
207{
208 struct sk_buff *skb, *tmp;
209
210 sctp_skb_for_each(skb, &asoc->ulpq.lobby, tmp)
211 cb(skb, sk);
212
213 sctp_skb_for_each(skb, &asoc->ulpq.reasm, tmp)
214 cb(skb, sk);
215
216 sctp_skb_for_each(skb, &asoc->ulpq.reasm_uo, tmp)
217 cb(skb, sk);
218}
219
204/* Verify that this is a valid address. */ 220/* Verify that this is a valid address. */
205static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr, 221static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr,
206 int len) 222 int len)
@@ -968,13 +984,6 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw)
968 * This is used for tunneling the sctp_bindx() request through sctp_setsockopt() 984 * This is used for tunneling the sctp_bindx() request through sctp_setsockopt()
969 * from userspace. 985 * from userspace.
970 * 986 *
971 * We don't use copy_from_user() for optimization: we first do the
972 * sanity checks (buffer size -fast- and access check-healthy
973 * pointer); if all of those succeed, then we can alloc the memory
974 * (expensive operation) needed to copy the data to kernel. Then we do
975 * the copying without checking the user space area
976 * (__copy_from_user()).
977 *
978 * On exit there is no need to do sockfd_put(), sys_setsockopt() does 987 * On exit there is no need to do sockfd_put(), sys_setsockopt() does
979 * it. 988 * it.
980 * 989 *
@@ -1004,25 +1013,15 @@ static int sctp_setsockopt_bindx(struct sock *sk,
1004 if (unlikely(addrs_size <= 0)) 1013 if (unlikely(addrs_size <= 0))
1005 return -EINVAL; 1014 return -EINVAL;
1006 1015
1007 /* Check the user passed a healthy pointer. */ 1016 kaddrs = vmemdup_user(addrs, addrs_size);
1008 if (unlikely(!access_ok(VERIFY_READ, addrs, addrs_size))) 1017 if (unlikely(IS_ERR(kaddrs)))
1009 return -EFAULT; 1018 return PTR_ERR(kaddrs);
1010
1011 /* Alloc space for the address array in kernel memory. */
1012 kaddrs = kmalloc(addrs_size, GFP_USER | __GFP_NOWARN);
1013 if (unlikely(!kaddrs))
1014 return -ENOMEM;
1015
1016 if (__copy_from_user(kaddrs, addrs, addrs_size)) {
1017 kfree(kaddrs);
1018 return -EFAULT;
1019 }
1020 1019
1021 /* Walk through the addrs buffer and count the number of addresses. */ 1020 /* Walk through the addrs buffer and count the number of addresses. */
1022 addr_buf = kaddrs; 1021 addr_buf = kaddrs;
1023 while (walk_size < addrs_size) { 1022 while (walk_size < addrs_size) {
1024 if (walk_size + sizeof(sa_family_t) > addrs_size) { 1023 if (walk_size + sizeof(sa_family_t) > addrs_size) {
1025 kfree(kaddrs); 1024 kvfree(kaddrs);
1026 return -EINVAL; 1025 return -EINVAL;
1027 } 1026 }
1028 1027
@@ -1033,7 +1032,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
1033 * causes the address buffer to overflow return EINVAL. 1032 * causes the address buffer to overflow return EINVAL.
1034 */ 1033 */
1035 if (!af || (walk_size + af->sockaddr_len) > addrs_size) { 1034 if (!af || (walk_size + af->sockaddr_len) > addrs_size) {
1036 kfree(kaddrs); 1035 kvfree(kaddrs);
1037 return -EINVAL; 1036 return -EINVAL;
1038 } 1037 }
1039 addrcnt++; 1038 addrcnt++;
@@ -1063,7 +1062,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
1063 } 1062 }
1064 1063
1065out: 1064out:
1066 kfree(kaddrs); 1065 kvfree(kaddrs);
1067 1066
1068 return err; 1067 return err;
1069} 1068}
@@ -1321,13 +1320,6 @@ out_free:
1321 * land and invoking either sctp_connectx(). This is used for tunneling 1320 * land and invoking either sctp_connectx(). This is used for tunneling
1322 * the sctp_connectx() request through sctp_setsockopt() from userspace. 1321 * the sctp_connectx() request through sctp_setsockopt() from userspace.
1323 * 1322 *
1324 * We don't use copy_from_user() for optimization: we first do the
1325 * sanity checks (buffer size -fast- and access check-healthy
1326 * pointer); if all of those succeed, then we can alloc the memory
1327 * (expensive operation) needed to copy the data to kernel. Then we do
1328 * the copying without checking the user space area
1329 * (__copy_from_user()).
1330 *
1331 * On exit there is no need to do sockfd_put(), sys_setsockopt() does 1323 * On exit there is no need to do sockfd_put(), sys_setsockopt() does
1332 * it. 1324 * it.
1333 * 1325 *
@@ -1343,7 +1335,6 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
1343 sctp_assoc_t *assoc_id) 1335 sctp_assoc_t *assoc_id)
1344{ 1336{
1345 struct sockaddr *kaddrs; 1337 struct sockaddr *kaddrs;
1346 gfp_t gfp = GFP_KERNEL;
1347 int err = 0; 1338 int err = 0;
1348 1339
1349 pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n", 1340 pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n",
@@ -1352,24 +1343,12 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
1352 if (unlikely(addrs_size <= 0)) 1343 if (unlikely(addrs_size <= 0))
1353 return -EINVAL; 1344 return -EINVAL;
1354 1345
1355 /* Check the user passed a healthy pointer. */ 1346 kaddrs = vmemdup_user(addrs, addrs_size);
1356 if (unlikely(!access_ok(VERIFY_READ, addrs, addrs_size))) 1347 if (unlikely(IS_ERR(kaddrs)))
1357 return -EFAULT; 1348 return PTR_ERR(kaddrs);
1358
1359 /* Alloc space for the address array in kernel memory. */
1360 if (sk->sk_socket->file)
1361 gfp = GFP_USER | __GFP_NOWARN;
1362 kaddrs = kmalloc(addrs_size, gfp);
1363 if (unlikely(!kaddrs))
1364 return -ENOMEM;
1365
1366 if (__copy_from_user(kaddrs, addrs, addrs_size)) {
1367 err = -EFAULT;
1368 } else {
1369 err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id);
1370 }
1371 1349
1372 kfree(kaddrs); 1350 err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id);
1351 kvfree(kaddrs);
1373 1352
1374 return err; 1353 return err;
1375} 1354}
@@ -1526,7 +1505,7 @@ static void sctp_close(struct sock *sk, long timeout)
1526 1505
1527 lock_sock_nested(sk, SINGLE_DEPTH_NESTING); 1506 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
1528 sk->sk_shutdown = SHUTDOWN_MASK; 1507 sk->sk_shutdown = SHUTDOWN_MASK;
1529 sk->sk_state = SCTP_SS_CLOSING; 1508 inet_sk_set_state(sk, SCTP_SS_CLOSING);
1530 1509
1531 ep = sctp_sk(sk)->ep; 1510 ep = sctp_sk(sk)->ep;
1532 1511
@@ -1552,6 +1531,7 @@ static void sctp_close(struct sock *sk, long timeout)
1552 1531
1553 if (data_was_unread || !skb_queue_empty(&asoc->ulpq.lobby) || 1532 if (data_was_unread || !skb_queue_empty(&asoc->ulpq.lobby) ||
1554 !skb_queue_empty(&asoc->ulpq.reasm) || 1533 !skb_queue_empty(&asoc->ulpq.reasm) ||
1534 !skb_queue_empty(&asoc->ulpq.reasm_uo) ||
1555 (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) { 1535 (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) {
1556 struct sctp_chunk *chunk; 1536 struct sctp_chunk *chunk;
1557 1537
@@ -2006,7 +1986,20 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
2006 if (err < 0) 1986 if (err < 0)
2007 goto out_free; 1987 goto out_free;
2008 1988
2009 wait_connect = true; 1989 /* If stream interleave is enabled, wait_connect has to be
1990 * done earlier than data enqueue, as it needs to make data
1991 * or idata according to asoc->intl_enable which is set
1992 * after connection is done.
1993 */
1994 if (sctp_sk(asoc->base.sk)->strm_interleave) {
1995 timeo = sock_sndtimeo(sk, 0);
1996 err = sctp_wait_for_connect(asoc, &timeo);
1997 if (err)
1998 goto out_unlock;
1999 } else {
2000 wait_connect = true;
2001 }
2002
2010 pr_debug("%s: we associated primitively\n", __func__); 2003 pr_debug("%s: we associated primitively\n", __func__);
2011 } 2004 }
2012 2005
@@ -2285,7 +2278,7 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
2285 if (!event) 2278 if (!event)
2286 return -ENOMEM; 2279 return -ENOMEM;
2287 2280
2288 sctp_ulpq_tail_event(&asoc->ulpq, event); 2281 asoc->stream.si->enqueue_event(&asoc->ulpq, event);
2289 } 2282 }
2290 } 2283 }
2291 2284
@@ -3184,7 +3177,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
3184 if (val == 0) { 3177 if (val == 0) {
3185 val = asoc->pathmtu - sp->pf->af->net_header_len; 3178 val = asoc->pathmtu - sp->pf->af->net_header_len;
3186 val -= sizeof(struct sctphdr) + 3179 val -= sizeof(struct sctphdr) +
3187 sizeof(struct sctp_data_chunk); 3180 sctp_datachk_len(&asoc->stream);
3188 } 3181 }
3189 asoc->user_frag = val; 3182 asoc->user_frag = val;
3190 asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu); 3183 asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
@@ -3354,7 +3347,10 @@ static int sctp_setsockopt_fragment_interleave(struct sock *sk,
3354 if (get_user(val, (int __user *)optval)) 3347 if (get_user(val, (int __user *)optval))
3355 return -EFAULT; 3348 return -EFAULT;
3356 3349
3357 sctp_sk(sk)->frag_interleave = (val == 0) ? 0 : 1; 3350 sctp_sk(sk)->frag_interleave = !!val;
3351
3352 if (!sctp_sk(sk)->frag_interleave)
3353 sctp_sk(sk)->strm_interleave = 0;
3358 3354
3359 return 0; 3355 return 0;
3360} 3356}
@@ -4037,6 +4033,40 @@ out:
4037 return retval; 4033 return retval;
4038} 4034}
4039 4035
4036static int sctp_setsockopt_interleaving_supported(struct sock *sk,
4037 char __user *optval,
4038 unsigned int optlen)
4039{
4040 struct sctp_sock *sp = sctp_sk(sk);
4041 struct net *net = sock_net(sk);
4042 struct sctp_assoc_value params;
4043 int retval = -EINVAL;
4044
4045 if (optlen < sizeof(params))
4046 goto out;
4047
4048 optlen = sizeof(params);
4049 if (copy_from_user(&params, optval, optlen)) {
4050 retval = -EFAULT;
4051 goto out;
4052 }
4053
4054 if (params.assoc_id)
4055 goto out;
4056
4057 if (!net->sctp.intl_enable || !sp->frag_interleave) {
4058 retval = -EPERM;
4059 goto out;
4060 }
4061
4062 sp->strm_interleave = !!params.assoc_value;
4063
4064 retval = 0;
4065
4066out:
4067 return retval;
4068}
4069
4040/* API 6.2 setsockopt(), getsockopt() 4070/* API 6.2 setsockopt(), getsockopt()
4041 * 4071 *
4042 * Applications use setsockopt() and getsockopt() to set or retrieve 4072 * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4224,6 +4254,10 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
4224 case SCTP_STREAM_SCHEDULER_VALUE: 4254 case SCTP_STREAM_SCHEDULER_VALUE:
4225 retval = sctp_setsockopt_scheduler_value(sk, optval, optlen); 4255 retval = sctp_setsockopt_scheduler_value(sk, optval, optlen);
4226 break; 4256 break;
4257 case SCTP_INTERLEAVING_SUPPORTED:
4258 retval = sctp_setsockopt_interleaving_supported(sk, optval,
4259 optlen);
4260 break;
4227 default: 4261 default:
4228 retval = -ENOPROTOOPT; 4262 retval = -ENOPROTOOPT;
4229 break; 4263 break;
@@ -4600,7 +4634,7 @@ static void sctp_shutdown(struct sock *sk, int how)
4600 if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) { 4634 if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) {
4601 struct sctp_association *asoc; 4635 struct sctp_association *asoc;
4602 4636
4603 sk->sk_state = SCTP_SS_CLOSING; 4637 inet_sk_set_state(sk, SCTP_SS_CLOSING);
4604 asoc = list_entry(ep->asocs.next, 4638 asoc = list_entry(ep->asocs.next,
4605 struct sctp_association, asocs); 4639 struct sctp_association, asocs);
4606 sctp_primitive_SHUTDOWN(net, asoc, NULL); 4640 sctp_primitive_SHUTDOWN(net, asoc, NULL);
@@ -4694,20 +4728,11 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
4694EXPORT_SYMBOL_GPL(sctp_get_sctp_info); 4728EXPORT_SYMBOL_GPL(sctp_get_sctp_info);
4695 4729
4696/* use callback to avoid exporting the core structure */ 4730/* use callback to avoid exporting the core structure */
4697int sctp_transport_walk_start(struct rhashtable_iter *iter) 4731void sctp_transport_walk_start(struct rhashtable_iter *iter)
4698{ 4732{
4699 int err;
4700
4701 rhltable_walk_enter(&sctp_transport_hashtable, iter); 4733 rhltable_walk_enter(&sctp_transport_hashtable, iter);
4702 4734
4703 err = rhashtable_walk_start(iter); 4735 rhashtable_walk_start(iter);
4704 if (err && err != -EAGAIN) {
4705 rhashtable_walk_stop(iter);
4706 rhashtable_walk_exit(iter);
4707 return err;
4708 }
4709
4710 return 0;
4711} 4736}
4712 4737
4713void sctp_transport_walk_stop(struct rhashtable_iter *iter) 4738void sctp_transport_walk_stop(struct rhashtable_iter *iter)
@@ -4801,9 +4826,8 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
4801 int ret; 4826 int ret;
4802 4827
4803again: 4828again:
4804 ret = sctp_transport_walk_start(&hti); 4829 ret = 0;
4805 if (ret) 4830 sctp_transport_walk_start(&hti);
4806 return ret;
4807 4831
4808 tsp = sctp_transport_get_idx(net, &hti, *pos + 1); 4832 tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
4809 for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { 4833 for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) {
@@ -5029,7 +5053,7 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv
5029 len = sizeof(int); 5053 len = sizeof(int);
5030 if (put_user(len, optlen)) 5054 if (put_user(len, optlen))
5031 return -EFAULT; 5055 return -EFAULT;
5032 if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len)) 5056 if (put_user(sctp_sk(sk)->autoclose, (int __user *)optval))
5033 return -EFAULT; 5057 return -EFAULT;
5034 return 0; 5058 return 0;
5035} 5059}
@@ -7002,6 +7026,47 @@ out:
7002 return retval; 7026 return retval;
7003} 7027}
7004 7028
7029static int sctp_getsockopt_interleaving_supported(struct sock *sk, int len,
7030 char __user *optval,
7031 int __user *optlen)
7032{
7033 struct sctp_assoc_value params;
7034 struct sctp_association *asoc;
7035 int retval = -EFAULT;
7036
7037 if (len < sizeof(params)) {
7038 retval = -EINVAL;
7039 goto out;
7040 }
7041
7042 len = sizeof(params);
7043 if (copy_from_user(&params, optval, len))
7044 goto out;
7045
7046 asoc = sctp_id2assoc(sk, params.assoc_id);
7047 if (asoc) {
7048 params.assoc_value = asoc->intl_enable;
7049 } else if (!params.assoc_id) {
7050 struct sctp_sock *sp = sctp_sk(sk);
7051
7052 params.assoc_value = sp->strm_interleave;
7053 } else {
7054 retval = -EINVAL;
7055 goto out;
7056 }
7057
7058 if (put_user(len, optlen))
7059 goto out;
7060
7061 if (copy_to_user(optval, &params, len))
7062 goto out;
7063
7064 retval = 0;
7065
7066out:
7067 return retval;
7068}
7069
7005static int sctp_getsockopt(struct sock *sk, int level, int optname, 7070static int sctp_getsockopt(struct sock *sk, int level, int optname,
7006 char __user *optval, int __user *optlen) 7071 char __user *optval, int __user *optlen)
7007{ 7072{
@@ -7192,6 +7257,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
7192 retval = sctp_getsockopt_scheduler_value(sk, len, optval, 7257 retval = sctp_getsockopt_scheduler_value(sk, len, optval,
7193 optlen); 7258 optlen);
7194 break; 7259 break;
7260 case SCTP_INTERLEAVING_SUPPORTED:
7261 retval = sctp_getsockopt_interleaving_supported(sk, len, optval,
7262 optlen);
7263 break;
7195 default: 7264 default:
7196 retval = -ENOPROTOOPT; 7265 retval = -ENOPROTOOPT;
7197 break; 7266 break;
@@ -7426,13 +7495,13 @@ static int sctp_listen_start(struct sock *sk, int backlog)
7426 * sockets. 7495 * sockets.
7427 * 7496 *
7428 */ 7497 */
7429 sk->sk_state = SCTP_SS_LISTENING; 7498 inet_sk_set_state(sk, SCTP_SS_LISTENING);
7430 if (!ep->base.bind_addr.port) { 7499 if (!ep->base.bind_addr.port) {
7431 if (sctp_autobind(sk)) 7500 if (sctp_autobind(sk))
7432 return -EAGAIN; 7501 return -EAGAIN;
7433 } else { 7502 } else {
7434 if (sctp_get_port(sk, inet_sk(sk)->inet_num)) { 7503 if (sctp_get_port(sk, inet_sk(sk)->inet_num)) {
7435 sk->sk_state = SCTP_SS_CLOSED; 7504 inet_sk_set_state(sk, SCTP_SS_CLOSED);
7436 return -EADDRINUSE; 7505 return -EADDRINUSE;
7437 } 7506 }
7438 } 7507 }
@@ -7518,11 +7587,11 @@ out:
7518 * here, again, by modeling the current TCP/UDP code. We don't have 7587 * here, again, by modeling the current TCP/UDP code. We don't have
7519 * a good way to test with it yet. 7588 * a good way to test with it yet.
7520 */ 7589 */
7521unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) 7590__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
7522{ 7591{
7523 struct sock *sk = sock->sk; 7592 struct sock *sk = sock->sk;
7524 struct sctp_sock *sp = sctp_sk(sk); 7593 struct sctp_sock *sp = sctp_sk(sk);
7525 unsigned int mask; 7594 __poll_t mask;
7526 7595
7527 poll_wait(file, sk_sleep(sk), wait); 7596 poll_wait(file, sk_sleep(sk), wait);
7528 7597
@@ -7533,22 +7602,22 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
7533 */ 7602 */
7534 if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) 7603 if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
7535 return (!list_empty(&sp->ep->asocs)) ? 7604 return (!list_empty(&sp->ep->asocs)) ?
7536 (POLLIN | POLLRDNORM) : 0; 7605 (EPOLLIN | EPOLLRDNORM) : 0;
7537 7606
7538 mask = 0; 7607 mask = 0;
7539 7608
7540 /* Is there any exceptional events? */ 7609 /* Is there any exceptional events? */
7541 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 7610 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
7542 mask |= POLLERR | 7611 mask |= EPOLLERR |
7543 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); 7612 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
7544 if (sk->sk_shutdown & RCV_SHUTDOWN) 7613 if (sk->sk_shutdown & RCV_SHUTDOWN)
7545 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 7614 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
7546 if (sk->sk_shutdown == SHUTDOWN_MASK) 7615 if (sk->sk_shutdown == SHUTDOWN_MASK)
7547 mask |= POLLHUP; 7616 mask |= EPOLLHUP;
7548 7617
7549 /* Is it readable? Reconsider this code with TCP-style support. */ 7618 /* Is it readable? Reconsider this code with TCP-style support. */
7550 if (!skb_queue_empty(&sk->sk_receive_queue)) 7619 if (!skb_queue_empty(&sk->sk_receive_queue))
7551 mask |= POLLIN | POLLRDNORM; 7620 mask |= EPOLLIN | EPOLLRDNORM;
7552 7621
7553 /* The association is either gone or not ready. */ 7622 /* The association is either gone or not ready. */
7554 if (!sctp_style(sk, UDP) && sctp_sstate(sk, CLOSED)) 7623 if (!sctp_style(sk, UDP) && sctp_sstate(sk, CLOSED))
@@ -7556,7 +7625,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
7556 7625
7557 /* Is it writable? */ 7626 /* Is it writable? */
7558 if (sctp_writeable(sk)) { 7627 if (sctp_writeable(sk)) {
7559 mask |= POLLOUT | POLLWRNORM; 7628 mask |= EPOLLOUT | EPOLLWRNORM;
7560 } else { 7629 } else {
7561 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 7630 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
7562 /* 7631 /*
@@ -7568,7 +7637,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
7568 * in the following code to cover it as well. 7637 * in the following code to cover it as well.
7569 */ 7638 */
7570 if (sctp_writeable(sk)) 7639 if (sctp_writeable(sk))
7571 mask |= POLLOUT | POLLWRNORM; 7640 mask |= EPOLLOUT | EPOLLWRNORM;
7572 } 7641 }
7573 return mask; 7642 return mask;
7574} 7643}
@@ -8092,8 +8161,8 @@ void sctp_data_ready(struct sock *sk)
8092 rcu_read_lock(); 8161 rcu_read_lock();
8093 wq = rcu_dereference(sk->sk_wq); 8162 wq = rcu_dereference(sk->sk_wq);
8094 if (skwq_has_sleeper(wq)) 8163 if (skwq_has_sleeper(wq))
8095 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | 8164 wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN |
8096 POLLRDNORM | POLLRDBAND); 8165 EPOLLRDNORM | EPOLLRDBAND);
8097 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); 8166 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
8098 rcu_read_unlock(); 8167 rcu_read_unlock();
8099} 8168}
@@ -8425,11 +8494,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
8425 8494
8426 } 8495 }
8427 8496
8428 sctp_skb_for_each(skb, &assoc->ulpq.reasm, tmp) 8497 sctp_for_each_rx_skb(assoc, newsk, sctp_skb_set_owner_r_frag);
8429 sctp_skb_set_owner_r_frag(skb, newsk);
8430
8431 sctp_skb_for_each(skb, &assoc->ulpq.lobby, tmp)
8432 sctp_skb_set_owner_r_frag(skb, newsk);
8433 8498
8434 /* Set the type of socket to indicate that it is peeled off from the 8499 /* Set the type of socket to indicate that it is peeled off from the
8435 * original UDP-style socket or created with the accept() call on a 8500 * original UDP-style socket or created with the accept() call on a
@@ -8455,10 +8520,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
8455 * is called, set RCV_SHUTDOWN flag. 8520 * is called, set RCV_SHUTDOWN flag.
8456 */ 8521 */
8457 if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP)) { 8522 if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP)) {
8458 newsk->sk_state = SCTP_SS_CLOSED; 8523 inet_sk_set_state(newsk, SCTP_SS_CLOSED);
8459 newsk->sk_shutdown |= RCV_SHUTDOWN; 8524 newsk->sk_shutdown |= RCV_SHUTDOWN;
8460 } else { 8525 } else {
8461 newsk->sk_state = SCTP_SS_ESTABLISHED; 8526 inet_sk_set_state(newsk, SCTP_SS_ESTABLISHED);
8462 } 8527 }
8463 8528
8464 release_sock(newsk); 8529 release_sock(newsk);
@@ -8487,6 +8552,10 @@ struct proto sctp_prot = {
8487 .unhash = sctp_unhash, 8552 .unhash = sctp_unhash,
8488 .get_port = sctp_get_port, 8553 .get_port = sctp_get_port,
8489 .obj_size = sizeof(struct sctp_sock), 8554 .obj_size = sizeof(struct sctp_sock),
8555 .useroffset = offsetof(struct sctp_sock, subscribe),
8556 .usersize = offsetof(struct sctp_sock, initmsg) -
8557 offsetof(struct sctp_sock, subscribe) +
8558 sizeof_field(struct sctp_sock, initmsg),
8490 .sysctl_mem = sysctl_sctp_mem, 8559 .sysctl_mem = sysctl_sctp_mem,
8491 .sysctl_rmem = sysctl_sctp_rmem, 8560 .sysctl_rmem = sysctl_sctp_rmem,
8492 .sysctl_wmem = sysctl_sctp_wmem, 8561 .sysctl_wmem = sysctl_sctp_wmem,
@@ -8526,6 +8595,10 @@ struct proto sctpv6_prot = {
8526 .unhash = sctp_unhash, 8595 .unhash = sctp_unhash,
8527 .get_port = sctp_get_port, 8596 .get_port = sctp_get_port,
8528 .obj_size = sizeof(struct sctp6_sock), 8597 .obj_size = sizeof(struct sctp6_sock),
8598 .useroffset = offsetof(struct sctp6_sock, sctp.subscribe),
8599 .usersize = offsetof(struct sctp6_sock, sctp.initmsg) -
8600 offsetof(struct sctp6_sock, sctp.subscribe) +
8601 sizeof_field(struct sctp6_sock, sctp.initmsg),
8529 .sysctl_mem = sysctl_sctp_mem, 8602 .sysctl_mem = sysctl_sctp_mem,
8530 .sysctl_rmem = sysctl_sctp_rmem, 8603 .sysctl_rmem = sysctl_sctp_rmem,
8531 .sysctl_wmem = sysctl_sctp_wmem, 8604 .sysctl_wmem = sysctl_sctp_wmem,
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 524dfeb94c41..cedf672487f9 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -167,6 +167,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
167 sched->init(stream); 167 sched->init(stream);
168 168
169in: 169in:
170 sctp_stream_interleave_init(stream);
170 if (!incnt) 171 if (!incnt)
171 goto out; 172 goto out;
172 173
@@ -213,11 +214,13 @@ void sctp_stream_clear(struct sctp_stream *stream)
213{ 214{
214 int i; 215 int i;
215 216
216 for (i = 0; i < stream->outcnt; i++) 217 for (i = 0; i < stream->outcnt; i++) {
217 stream->out[i].ssn = 0; 218 stream->out[i].mid = 0;
219 stream->out[i].mid_uo = 0;
220 }
218 221
219 for (i = 0; i < stream->incnt; i++) 222 for (i = 0; i < stream->incnt; i++)
220 stream->in[i].ssn = 0; 223 stream->in[i].mid = 0;
221} 224}
222 225
223void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new) 226void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
@@ -604,10 +607,10 @@ struct sctp_chunk *sctp_process_strreset_outreq(
604 } 607 }
605 608
606 for (i = 0; i < nums; i++) 609 for (i = 0; i < nums; i++)
607 stream->in[ntohs(str_p[i])].ssn = 0; 610 stream->in[ntohs(str_p[i])].mid = 0;
608 } else { 611 } else {
609 for (i = 0; i < stream->incnt; i++) 612 for (i = 0; i < stream->incnt; i++)
610 stream->in[i].ssn = 0; 613 stream->in[i].mid = 0;
611 } 614 }
612 615
613 result = SCTP_STRRESET_PERFORMED; 616 result = SCTP_STRRESET_PERFORMED;
@@ -751,8 +754,7 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
751 * performed. 754 * performed.
752 */ 755 */
753 max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map); 756 max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map);
754 sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen); 757 asoc->stream.si->report_ftsn(&asoc->ulpq, max_tsn_seen);
755 sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
756 758
757 /* G1: Compute an appropriate value for the Receiver's Next TSN -- the 759 /* G1: Compute an appropriate value for the Receiver's Next TSN -- the
758 * TSN that the peer should use to send the next DATA chunk. The 760 * TSN that the peer should use to send the next DATA chunk. The
@@ -781,10 +783,12 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
781 /* G5: The next expected and outgoing SSNs MUST be reset to 0 for all 783 /* G5: The next expected and outgoing SSNs MUST be reset to 0 for all
782 * incoming and outgoing streams. 784 * incoming and outgoing streams.
783 */ 785 */
784 for (i = 0; i < stream->outcnt; i++) 786 for (i = 0; i < stream->outcnt; i++) {
785 stream->out[i].ssn = 0; 787 stream->out[i].mid = 0;
788 stream->out[i].mid_uo = 0;
789 }
786 for (i = 0; i < stream->incnt; i++) 790 for (i = 0; i < stream->incnt; i++)
787 stream->in[i].ssn = 0; 791 stream->in[i].mid = 0;
788 792
789 result = SCTP_STRRESET_PERFORMED; 793 result = SCTP_STRRESET_PERFORMED;
790 794
@@ -974,11 +978,15 @@ struct sctp_chunk *sctp_process_strreset_resp(
974 978
975 if (result == SCTP_STRRESET_PERFORMED) { 979 if (result == SCTP_STRRESET_PERFORMED) {
976 if (nums) { 980 if (nums) {
977 for (i = 0; i < nums; i++) 981 for (i = 0; i < nums; i++) {
978 stream->out[ntohs(str_p[i])].ssn = 0; 982 stream->out[ntohs(str_p[i])].mid = 0;
983 stream->out[ntohs(str_p[i])].mid_uo = 0;
984 }
979 } else { 985 } else {
980 for (i = 0; i < stream->outcnt; i++) 986 for (i = 0; i < stream->outcnt; i++) {
981 stream->out[i].ssn = 0; 987 stream->out[i].mid = 0;
988 stream->out[i].mid_uo = 0;
989 }
982 } 990 }
983 991
984 flags = SCTP_STREAM_RESET_OUTGOING_SSN; 992 flags = SCTP_STREAM_RESET_OUTGOING_SSN;
@@ -1021,8 +1029,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
1021 &asoc->peer.tsn_map); 1029 &asoc->peer.tsn_map);
1022 LIST_HEAD(temp); 1030 LIST_HEAD(temp);
1023 1031
1024 sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn); 1032 asoc->stream.si->report_ftsn(&asoc->ulpq, mtsn);
1025 sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
1026 1033
1027 sctp_tsnmap_init(&asoc->peer.tsn_map, 1034 sctp_tsnmap_init(&asoc->peer.tsn_map,
1028 SCTP_TSN_MAP_INITIAL, 1035 SCTP_TSN_MAP_INITIAL,
@@ -1040,10 +1047,12 @@ struct sctp_chunk *sctp_process_strreset_resp(
1040 asoc->ctsn_ack_point = asoc->next_tsn - 1; 1047 asoc->ctsn_ack_point = asoc->next_tsn - 1;
1041 asoc->adv_peer_ack_point = asoc->ctsn_ack_point; 1048 asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
1042 1049
1043 for (i = 0; i < stream->outcnt; i++) 1050 for (i = 0; i < stream->outcnt; i++) {
1044 stream->out[i].ssn = 0; 1051 stream->out[i].mid = 0;
1052 stream->out[i].mid_uo = 0;
1053 }
1045 for (i = 0; i < stream->incnt; i++) 1054 for (i = 0; i < stream->incnt; i++)
1046 stream->in[i].ssn = 0; 1055 stream->in[i].mid = 0;
1047 } 1056 }
1048 1057
1049 for (i = 0; i < stream->outcnt; i++) 1058 for (i = 0; i < stream->outcnt; i++)
diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c
new file mode 100644
index 000000000000..8c7cf8f08711
--- /dev/null
+++ b/net/sctp/stream_interleave.c
@@ -0,0 +1,1334 @@
1/* SCTP kernel implementation
2 * (C) Copyright Red Hat Inc. 2017
3 *
4 * This file is part of the SCTP kernel implementation
5 *
6 * These functions manipulate sctp stream queue/scheduling.
7 *
8 * This SCTP implementation is free software;
9 * you can redistribute it and/or modify it under the terms of
10 * the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * This SCTP implementation is distributed in the hope that it
15 * will be useful, but WITHOUT ANY WARRANTY; without even the implied
16 * ************************
17 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 * See the GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with GNU CC; see the file COPYING. If not, see
22 * <http://www.gnu.org/licenses/>.
23 *
24 * Please send any bug reports or fixes you make to the
25 * email addresched(es):
26 * lksctp developers <linux-sctp@vger.kernel.org>
27 *
28 * Written or modified by:
29 * Xin Long <lucien.xin@gmail.com>
30 */
31
32#include <net/busy_poll.h>
33#include <net/sctp/sctp.h>
34#include <net/sctp/sm.h>
35#include <net/sctp/ulpevent.h>
36#include <linux/sctp.h>
37
38static struct sctp_chunk *sctp_make_idatafrag_empty(
39 const struct sctp_association *asoc,
40 const struct sctp_sndrcvinfo *sinfo,
41 int len, __u8 flags, gfp_t gfp)
42{
43 struct sctp_chunk *retval;
44 struct sctp_idatahdr dp;
45
46 memset(&dp, 0, sizeof(dp));
47 dp.stream = htons(sinfo->sinfo_stream);
48
49 if (sinfo->sinfo_flags & SCTP_UNORDERED)
50 flags |= SCTP_DATA_UNORDERED;
51
52 retval = sctp_make_idata(asoc, flags, sizeof(dp) + len, gfp);
53 if (!retval)
54 return NULL;
55
56 retval->subh.idata_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp);
57 memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo));
58
59 return retval;
60}
61
62static void sctp_chunk_assign_mid(struct sctp_chunk *chunk)
63{
64 struct sctp_stream *stream;
65 struct sctp_chunk *lchunk;
66 __u32 cfsn = 0;
67 __u16 sid;
68
69 if (chunk->has_mid)
70 return;
71
72 sid = sctp_chunk_stream_no(chunk);
73 stream = &chunk->asoc->stream;
74
75 list_for_each_entry(lchunk, &chunk->msg->chunks, frag_list) {
76 struct sctp_idatahdr *hdr;
77 __u32 mid;
78
79 lchunk->has_mid = 1;
80
81 hdr = lchunk->subh.idata_hdr;
82
83 if (lchunk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG)
84 hdr->ppid = lchunk->sinfo.sinfo_ppid;
85 else
86 hdr->fsn = htonl(cfsn++);
87
88 if (lchunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) {
89 mid = lchunk->chunk_hdr->flags & SCTP_DATA_LAST_FRAG ?
90 sctp_mid_uo_next(stream, out, sid) :
91 sctp_mid_uo_peek(stream, out, sid);
92 } else {
93 mid = lchunk->chunk_hdr->flags & SCTP_DATA_LAST_FRAG ?
94 sctp_mid_next(stream, out, sid) :
95 sctp_mid_peek(stream, out, sid);
96 }
97 hdr->mid = htonl(mid);
98 }
99}
100
101static bool sctp_validate_data(struct sctp_chunk *chunk)
102{
103 const struct sctp_stream *stream;
104 __u16 sid, ssn;
105
106 if (chunk->chunk_hdr->type != SCTP_CID_DATA)
107 return false;
108
109 if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
110 return true;
111
112 stream = &chunk->asoc->stream;
113 sid = sctp_chunk_stream_no(chunk);
114 ssn = ntohs(chunk->subh.data_hdr->ssn);
115
116 return !SSN_lt(ssn, sctp_ssn_peek(stream, in, sid));
117}
118
119static bool sctp_validate_idata(struct sctp_chunk *chunk)
120{
121 struct sctp_stream *stream;
122 __u32 mid;
123 __u16 sid;
124
125 if (chunk->chunk_hdr->type != SCTP_CID_I_DATA)
126 return false;
127
128 if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
129 return true;
130
131 stream = &chunk->asoc->stream;
132 sid = sctp_chunk_stream_no(chunk);
133 mid = ntohl(chunk->subh.idata_hdr->mid);
134
135 return !MID_lt(mid, sctp_mid_peek(stream, in, sid));
136}
137
138static void sctp_intl_store_reasm(struct sctp_ulpq *ulpq,
139 struct sctp_ulpevent *event)
140{
141 struct sctp_ulpevent *cevent;
142 struct sk_buff *pos;
143
144 pos = skb_peek_tail(&ulpq->reasm);
145 if (!pos) {
146 __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event));
147 return;
148 }
149
150 cevent = sctp_skb2event(pos);
151
152 if (event->stream == cevent->stream &&
153 event->mid == cevent->mid &&
154 (cevent->msg_flags & SCTP_DATA_FIRST_FRAG ||
155 (!(event->msg_flags & SCTP_DATA_FIRST_FRAG) &&
156 event->fsn > cevent->fsn))) {
157 __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event));
158 return;
159 }
160
161 if ((event->stream == cevent->stream &&
162 MID_lt(cevent->mid, event->mid)) ||
163 event->stream > cevent->stream) {
164 __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event));
165 return;
166 }
167
168 skb_queue_walk(&ulpq->reasm, pos) {
169 cevent = sctp_skb2event(pos);
170
171 if (event->stream < cevent->stream ||
172 (event->stream == cevent->stream &&
173 MID_lt(event->mid, cevent->mid)))
174 break;
175
176 if (event->stream == cevent->stream &&
177 event->mid == cevent->mid &&
178 !(cevent->msg_flags & SCTP_DATA_FIRST_FRAG) &&
179 (event->msg_flags & SCTP_DATA_FIRST_FRAG ||
180 event->fsn < cevent->fsn))
181 break;
182 }
183
184 __skb_queue_before(&ulpq->reasm, pos, sctp_event2skb(event));
185}
186
187static struct sctp_ulpevent *sctp_intl_retrieve_partial(
188 struct sctp_ulpq *ulpq,
189 struct sctp_ulpevent *event)
190{
191 struct sk_buff *first_frag = NULL;
192 struct sk_buff *last_frag = NULL;
193 struct sctp_ulpevent *retval;
194 struct sctp_stream_in *sin;
195 struct sk_buff *pos;
196 __u32 next_fsn = 0;
197 int is_last = 0;
198
199 sin = sctp_stream_in(ulpq->asoc, event->stream);
200
201 skb_queue_walk(&ulpq->reasm, pos) {
202 struct sctp_ulpevent *cevent = sctp_skb2event(pos);
203
204 if (cevent->stream < event->stream)
205 continue;
206
207 if (cevent->stream > event->stream ||
208 cevent->mid != sin->mid)
209 break;
210
211 switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
212 case SCTP_DATA_FIRST_FRAG:
213 goto out;
214 case SCTP_DATA_MIDDLE_FRAG:
215 if (!first_frag) {
216 if (cevent->fsn == sin->fsn) {
217 first_frag = pos;
218 last_frag = pos;
219 next_fsn = cevent->fsn + 1;
220 }
221 } else if (cevent->fsn == next_fsn) {
222 last_frag = pos;
223 next_fsn++;
224 } else {
225 goto out;
226 }
227 break;
228 case SCTP_DATA_LAST_FRAG:
229 if (!first_frag) {
230 if (cevent->fsn == sin->fsn) {
231 first_frag = pos;
232 last_frag = pos;
233 next_fsn = 0;
234 is_last = 1;
235 }
236 } else if (cevent->fsn == next_fsn) {
237 last_frag = pos;
238 next_fsn = 0;
239 is_last = 1;
240 }
241 goto out;
242 default:
243 goto out;
244 }
245 }
246
247out:
248 if (!first_frag)
249 return NULL;
250
251 retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
252 &ulpq->reasm, first_frag,
253 last_frag);
254 if (retval) {
255 sin->fsn = next_fsn;
256 if (is_last) {
257 retval->msg_flags |= MSG_EOR;
258 sin->pd_mode = 0;
259 }
260 }
261
262 return retval;
263}
264
265static struct sctp_ulpevent *sctp_intl_retrieve_reassembled(
266 struct sctp_ulpq *ulpq,
267 struct sctp_ulpevent *event)
268{
269 struct sctp_association *asoc = ulpq->asoc;
270 struct sk_buff *pos, *first_frag = NULL;
271 struct sctp_ulpevent *retval = NULL;
272 struct sk_buff *pd_first = NULL;
273 struct sk_buff *pd_last = NULL;
274 struct sctp_stream_in *sin;
275 __u32 next_fsn = 0;
276 __u32 pd_point = 0;
277 __u32 pd_len = 0;
278 __u32 mid = 0;
279
280 sin = sctp_stream_in(ulpq->asoc, event->stream);
281
282 skb_queue_walk(&ulpq->reasm, pos) {
283 struct sctp_ulpevent *cevent = sctp_skb2event(pos);
284
285 if (cevent->stream < event->stream)
286 continue;
287 if (cevent->stream > event->stream)
288 break;
289
290 if (MID_lt(cevent->mid, event->mid))
291 continue;
292 if (MID_lt(event->mid, cevent->mid))
293 break;
294
295 switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
296 case SCTP_DATA_FIRST_FRAG:
297 if (cevent->mid == sin->mid) {
298 pd_first = pos;
299 pd_last = pos;
300 pd_len = pos->len;
301 }
302
303 first_frag = pos;
304 next_fsn = 0;
305 mid = cevent->mid;
306 break;
307
308 case SCTP_DATA_MIDDLE_FRAG:
309 if (first_frag && cevent->mid == mid &&
310 cevent->fsn == next_fsn) {
311 next_fsn++;
312 if (pd_first) {
313 pd_last = pos;
314 pd_len += pos->len;
315 }
316 } else {
317 first_frag = NULL;
318 }
319 break;
320
321 case SCTP_DATA_LAST_FRAG:
322 if (first_frag && cevent->mid == mid &&
323 cevent->fsn == next_fsn)
324 goto found;
325 else
326 first_frag = NULL;
327 break;
328 }
329 }
330
331 if (!pd_first)
332 goto out;
333
334 pd_point = sctp_sk(asoc->base.sk)->pd_point;
335 if (pd_point && pd_point <= pd_len) {
336 retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
337 &ulpq->reasm,
338 pd_first, pd_last);
339 if (retval) {
340 sin->fsn = next_fsn;
341 sin->pd_mode = 1;
342 }
343 }
344 goto out;
345
346found:
347 retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
348 &ulpq->reasm,
349 first_frag, pos);
350 if (retval)
351 retval->msg_flags |= MSG_EOR;
352
353out:
354 return retval;
355}
356
357static struct sctp_ulpevent *sctp_intl_reasm(struct sctp_ulpq *ulpq,
358 struct sctp_ulpevent *event)
359{
360 struct sctp_ulpevent *retval = NULL;
361 struct sctp_stream_in *sin;
362
363 if (SCTP_DATA_NOT_FRAG == (event->msg_flags & SCTP_DATA_FRAG_MASK)) {
364 event->msg_flags |= MSG_EOR;
365 return event;
366 }
367
368 sctp_intl_store_reasm(ulpq, event);
369
370 sin = sctp_stream_in(ulpq->asoc, event->stream);
371 if (sin->pd_mode && event->mid == sin->mid &&
372 event->fsn == sin->fsn)
373 retval = sctp_intl_retrieve_partial(ulpq, event);
374
375 if (!retval)
376 retval = sctp_intl_retrieve_reassembled(ulpq, event);
377
378 return retval;
379}
380
381static void sctp_intl_store_ordered(struct sctp_ulpq *ulpq,
382 struct sctp_ulpevent *event)
383{
384 struct sctp_ulpevent *cevent;
385 struct sk_buff *pos;
386
387 pos = skb_peek_tail(&ulpq->lobby);
388 if (!pos) {
389 __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event));
390 return;
391 }
392
393 cevent = (struct sctp_ulpevent *)pos->cb;
394 if (event->stream == cevent->stream &&
395 MID_lt(cevent->mid, event->mid)) {
396 __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event));
397 return;
398 }
399
400 if (event->stream > cevent->stream) {
401 __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event));
402 return;
403 }
404
405 skb_queue_walk(&ulpq->lobby, pos) {
406 cevent = (struct sctp_ulpevent *)pos->cb;
407
408 if (cevent->stream > event->stream)
409 break;
410
411 if (cevent->stream == event->stream &&
412 MID_lt(event->mid, cevent->mid))
413 break;
414 }
415
416 __skb_queue_before(&ulpq->lobby, pos, sctp_event2skb(event));
417}
418
419static void sctp_intl_retrieve_ordered(struct sctp_ulpq *ulpq,
420 struct sctp_ulpevent *event)
421{
422 struct sk_buff_head *event_list;
423 struct sctp_stream *stream;
424 struct sk_buff *pos, *tmp;
425 __u16 sid = event->stream;
426
427 stream = &ulpq->asoc->stream;
428 event_list = (struct sk_buff_head *)sctp_event2skb(event)->prev;
429
430 sctp_skb_for_each(pos, &ulpq->lobby, tmp) {
431 struct sctp_ulpevent *cevent = (struct sctp_ulpevent *)pos->cb;
432
433 if (cevent->stream > sid)
434 break;
435
436 if (cevent->stream < sid)
437 continue;
438
439 if (cevent->mid != sctp_mid_peek(stream, in, sid))
440 break;
441
442 sctp_mid_next(stream, in, sid);
443
444 __skb_unlink(pos, &ulpq->lobby);
445
446 __skb_queue_tail(event_list, pos);
447 }
448}
449
450static struct sctp_ulpevent *sctp_intl_order(struct sctp_ulpq *ulpq,
451 struct sctp_ulpevent *event)
452{
453 struct sctp_stream *stream;
454 __u16 sid;
455
456 stream = &ulpq->asoc->stream;
457 sid = event->stream;
458
459 if (event->mid != sctp_mid_peek(stream, in, sid)) {
460 sctp_intl_store_ordered(ulpq, event);
461 return NULL;
462 }
463
464 sctp_mid_next(stream, in, sid);
465
466 sctp_intl_retrieve_ordered(ulpq, event);
467
468 return event;
469}
470
471static int sctp_enqueue_event(struct sctp_ulpq *ulpq,
472 struct sctp_ulpevent *event)
473{
474 struct sk_buff *skb = sctp_event2skb(event);
475 struct sock *sk = ulpq->asoc->base.sk;
476 struct sctp_sock *sp = sctp_sk(sk);
477 struct sk_buff_head *skb_list;
478
479 skb_list = (struct sk_buff_head *)skb->prev;
480
481 if (sk->sk_shutdown & RCV_SHUTDOWN &&
482 (sk->sk_shutdown & SEND_SHUTDOWN ||
483 !sctp_ulpevent_is_notification(event)))
484 goto out_free;
485
486 if (!sctp_ulpevent_is_notification(event)) {
487 sk_mark_napi_id(sk, skb);
488 sk_incoming_cpu_update(sk);
489 }
490
491 if (!sctp_ulpevent_is_enabled(event, &sp->subscribe))
492 goto out_free;
493
494 if (skb_list)
495 skb_queue_splice_tail_init(skb_list,
496 &sk->sk_receive_queue);
497 else
498 __skb_queue_tail(&sk->sk_receive_queue, skb);
499
500 if (!sp->data_ready_signalled) {
501 sp->data_ready_signalled = 1;
502 sk->sk_data_ready(sk);
503 }
504
505 return 1;
506
507out_free:
508 if (skb_list)
509 sctp_queue_purge_ulpevents(skb_list);
510 else
511 sctp_ulpevent_free(event);
512
513 return 0;
514}
515
516static void sctp_intl_store_reasm_uo(struct sctp_ulpq *ulpq,
517 struct sctp_ulpevent *event)
518{
519 struct sctp_ulpevent *cevent;
520 struct sk_buff *pos;
521
522 pos = skb_peek_tail(&ulpq->reasm_uo);
523 if (!pos) {
524 __skb_queue_tail(&ulpq->reasm_uo, sctp_event2skb(event));
525 return;
526 }
527
528 cevent = sctp_skb2event(pos);
529
530 if (event->stream == cevent->stream &&
531 event->mid == cevent->mid &&
532 (cevent->msg_flags & SCTP_DATA_FIRST_FRAG ||
533 (!(event->msg_flags & SCTP_DATA_FIRST_FRAG) &&
534 event->fsn > cevent->fsn))) {
535 __skb_queue_tail(&ulpq->reasm_uo, sctp_event2skb(event));
536 return;
537 }
538
539 if ((event->stream == cevent->stream &&
540 MID_lt(cevent->mid, event->mid)) ||
541 event->stream > cevent->stream) {
542 __skb_queue_tail(&ulpq->reasm_uo, sctp_event2skb(event));
543 return;
544 }
545
546 skb_queue_walk(&ulpq->reasm_uo, pos) {
547 cevent = sctp_skb2event(pos);
548
549 if (event->stream < cevent->stream ||
550 (event->stream == cevent->stream &&
551 MID_lt(event->mid, cevent->mid)))
552 break;
553
554 if (event->stream == cevent->stream &&
555 event->mid == cevent->mid &&
556 !(cevent->msg_flags & SCTP_DATA_FIRST_FRAG) &&
557 (event->msg_flags & SCTP_DATA_FIRST_FRAG ||
558 event->fsn < cevent->fsn))
559 break;
560 }
561
562 __skb_queue_before(&ulpq->reasm_uo, pos, sctp_event2skb(event));
563}
564
565static struct sctp_ulpevent *sctp_intl_retrieve_partial_uo(
566 struct sctp_ulpq *ulpq,
567 struct sctp_ulpevent *event)
568{
569 struct sk_buff *first_frag = NULL;
570 struct sk_buff *last_frag = NULL;
571 struct sctp_ulpevent *retval;
572 struct sctp_stream_in *sin;
573 struct sk_buff *pos;
574 __u32 next_fsn = 0;
575 int is_last = 0;
576
577 sin = sctp_stream_in(ulpq->asoc, event->stream);
578
579 skb_queue_walk(&ulpq->reasm_uo, pos) {
580 struct sctp_ulpevent *cevent = sctp_skb2event(pos);
581
582 if (cevent->stream < event->stream)
583 continue;
584 if (cevent->stream > event->stream)
585 break;
586
587 if (MID_lt(cevent->mid, sin->mid_uo))
588 continue;
589 if (MID_lt(sin->mid_uo, cevent->mid))
590 break;
591
592 switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
593 case SCTP_DATA_FIRST_FRAG:
594 goto out;
595 case SCTP_DATA_MIDDLE_FRAG:
596 if (!first_frag) {
597 if (cevent->fsn == sin->fsn_uo) {
598 first_frag = pos;
599 last_frag = pos;
600 next_fsn = cevent->fsn + 1;
601 }
602 } else if (cevent->fsn == next_fsn) {
603 last_frag = pos;
604 next_fsn++;
605 } else {
606 goto out;
607 }
608 break;
609 case SCTP_DATA_LAST_FRAG:
610 if (!first_frag) {
611 if (cevent->fsn == sin->fsn_uo) {
612 first_frag = pos;
613 last_frag = pos;
614 next_fsn = 0;
615 is_last = 1;
616 }
617 } else if (cevent->fsn == next_fsn) {
618 last_frag = pos;
619 next_fsn = 0;
620 is_last = 1;
621 }
622 goto out;
623 default:
624 goto out;
625 }
626 }
627
628out:
629 if (!first_frag)
630 return NULL;
631
632 retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
633 &ulpq->reasm_uo, first_frag,
634 last_frag);
635 if (retval) {
636 sin->fsn_uo = next_fsn;
637 if (is_last) {
638 retval->msg_flags |= MSG_EOR;
639 sin->pd_mode_uo = 0;
640 }
641 }
642
643 return retval;
644}
645
646static struct sctp_ulpevent *sctp_intl_retrieve_reassembled_uo(
647 struct sctp_ulpq *ulpq,
648 struct sctp_ulpevent *event)
649{
650 struct sctp_association *asoc = ulpq->asoc;
651 struct sk_buff *pos, *first_frag = NULL;
652 struct sctp_ulpevent *retval = NULL;
653 struct sk_buff *pd_first = NULL;
654 struct sk_buff *pd_last = NULL;
655 struct sctp_stream_in *sin;
656 __u32 next_fsn = 0;
657 __u32 pd_point = 0;
658 __u32 pd_len = 0;
659 __u32 mid = 0;
660
661 sin = sctp_stream_in(ulpq->asoc, event->stream);
662
663 skb_queue_walk(&ulpq->reasm_uo, pos) {
664 struct sctp_ulpevent *cevent = sctp_skb2event(pos);
665
666 if (cevent->stream < event->stream)
667 continue;
668 if (cevent->stream > event->stream)
669 break;
670
671 if (MID_lt(cevent->mid, event->mid))
672 continue;
673 if (MID_lt(event->mid, cevent->mid))
674 break;
675
676 switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
677 case SCTP_DATA_FIRST_FRAG:
678 if (!sin->pd_mode_uo) {
679 sin->mid_uo = cevent->mid;
680 pd_first = pos;
681 pd_last = pos;
682 pd_len = pos->len;
683 }
684
685 first_frag = pos;
686 next_fsn = 0;
687 mid = cevent->mid;
688 break;
689
690 case SCTP_DATA_MIDDLE_FRAG:
691 if (first_frag && cevent->mid == mid &&
692 cevent->fsn == next_fsn) {
693 next_fsn++;
694 if (pd_first) {
695 pd_last = pos;
696 pd_len += pos->len;
697 }
698 } else {
699 first_frag = NULL;
700 }
701 break;
702
703 case SCTP_DATA_LAST_FRAG:
704 if (first_frag && cevent->mid == mid &&
705 cevent->fsn == next_fsn)
706 goto found;
707 else
708 first_frag = NULL;
709 break;
710 }
711 }
712
713 if (!pd_first)
714 goto out;
715
716 pd_point = sctp_sk(asoc->base.sk)->pd_point;
717 if (pd_point && pd_point <= pd_len) {
718 retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
719 &ulpq->reasm_uo,
720 pd_first, pd_last);
721 if (retval) {
722 sin->fsn_uo = next_fsn;
723 sin->pd_mode_uo = 1;
724 }
725 }
726 goto out;
727
728found:
729 retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
730 &ulpq->reasm_uo,
731 first_frag, pos);
732 if (retval)
733 retval->msg_flags |= MSG_EOR;
734
735out:
736 return retval;
737}
738
739static struct sctp_ulpevent *sctp_intl_reasm_uo(struct sctp_ulpq *ulpq,
740 struct sctp_ulpevent *event)
741{
742 struct sctp_ulpevent *retval = NULL;
743 struct sctp_stream_in *sin;
744
745 if (SCTP_DATA_NOT_FRAG == (event->msg_flags & SCTP_DATA_FRAG_MASK)) {
746 event->msg_flags |= MSG_EOR;
747 return event;
748 }
749
750 sctp_intl_store_reasm_uo(ulpq, event);
751
752 sin = sctp_stream_in(ulpq->asoc, event->stream);
753 if (sin->pd_mode_uo && event->mid == sin->mid_uo &&
754 event->fsn == sin->fsn_uo)
755 retval = sctp_intl_retrieve_partial_uo(ulpq, event);
756
757 if (!retval)
758 retval = sctp_intl_retrieve_reassembled_uo(ulpq, event);
759
760 return retval;
761}
762
763static struct sctp_ulpevent *sctp_intl_retrieve_first_uo(struct sctp_ulpq *ulpq)
764{
765 struct sctp_stream_in *csin, *sin = NULL;
766 struct sk_buff *first_frag = NULL;
767 struct sk_buff *last_frag = NULL;
768 struct sctp_ulpevent *retval;
769 struct sk_buff *pos;
770 __u32 next_fsn = 0;
771 __u16 sid = 0;
772
773 skb_queue_walk(&ulpq->reasm_uo, pos) {
774 struct sctp_ulpevent *cevent = sctp_skb2event(pos);
775
776 csin = sctp_stream_in(ulpq->asoc, cevent->stream);
777 if (csin->pd_mode_uo)
778 continue;
779
780 switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
781 case SCTP_DATA_FIRST_FRAG:
782 if (first_frag)
783 goto out;
784 first_frag = pos;
785 last_frag = pos;
786 next_fsn = 0;
787 sin = csin;
788 sid = cevent->stream;
789 sin->mid_uo = cevent->mid;
790 break;
791 case SCTP_DATA_MIDDLE_FRAG:
792 if (!first_frag)
793 break;
794 if (cevent->stream == sid &&
795 cevent->mid == sin->mid_uo &&
796 cevent->fsn == next_fsn) {
797 next_fsn++;
798 last_frag = pos;
799 } else {
800 goto out;
801 }
802 break;
803 case SCTP_DATA_LAST_FRAG:
804 if (first_frag)
805 goto out;
806 break;
807 default:
808 break;
809 }
810 }
811
812 if (!first_frag)
813 return NULL;
814
815out:
816 retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
817 &ulpq->reasm_uo, first_frag,
818 last_frag);
819 if (retval) {
820 sin->fsn_uo = next_fsn;
821 sin->pd_mode_uo = 1;
822 }
823
824 return retval;
825}
826
827static int sctp_ulpevent_idata(struct sctp_ulpq *ulpq,
828 struct sctp_chunk *chunk, gfp_t gfp)
829{
830 struct sctp_ulpevent *event;
831 struct sk_buff_head temp;
832 int event_eor = 0;
833
834 event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp);
835 if (!event)
836 return -ENOMEM;
837
838 event->mid = ntohl(chunk->subh.idata_hdr->mid);
839 if (event->msg_flags & SCTP_DATA_FIRST_FRAG)
840 event->ppid = chunk->subh.idata_hdr->ppid;
841 else
842 event->fsn = ntohl(chunk->subh.idata_hdr->fsn);
843
844 if (!(event->msg_flags & SCTP_DATA_UNORDERED)) {
845 event = sctp_intl_reasm(ulpq, event);
846 if (event && event->msg_flags & MSG_EOR) {
847 skb_queue_head_init(&temp);
848 __skb_queue_tail(&temp, sctp_event2skb(event));
849
850 event = sctp_intl_order(ulpq, event);
851 }
852 } else {
853 event = sctp_intl_reasm_uo(ulpq, event);
854 }
855
856 if (event) {
857 event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0;
858 sctp_enqueue_event(ulpq, event);
859 }
860
861 return event_eor;
862}
863
864static struct sctp_ulpevent *sctp_intl_retrieve_first(struct sctp_ulpq *ulpq)
865{
866 struct sctp_stream_in *csin, *sin = NULL;
867 struct sk_buff *first_frag = NULL;
868 struct sk_buff *last_frag = NULL;
869 struct sctp_ulpevent *retval;
870 struct sk_buff *pos;
871 __u32 next_fsn = 0;
872 __u16 sid = 0;
873
874 skb_queue_walk(&ulpq->reasm, pos) {
875 struct sctp_ulpevent *cevent = sctp_skb2event(pos);
876
877 csin = sctp_stream_in(ulpq->asoc, cevent->stream);
878 if (csin->pd_mode)
879 continue;
880
881 switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
882 case SCTP_DATA_FIRST_FRAG:
883 if (first_frag)
884 goto out;
885 if (cevent->mid == csin->mid) {
886 first_frag = pos;
887 last_frag = pos;
888 next_fsn = 0;
889 sin = csin;
890 sid = cevent->stream;
891 }
892 break;
893 case SCTP_DATA_MIDDLE_FRAG:
894 if (!first_frag)
895 break;
896 if (cevent->stream == sid &&
897 cevent->mid == sin->mid &&
898 cevent->fsn == next_fsn) {
899 next_fsn++;
900 last_frag = pos;
901 } else {
902 goto out;
903 }
904 break;
905 case SCTP_DATA_LAST_FRAG:
906 if (first_frag)
907 goto out;
908 break;
909 default:
910 break;
911 }
912 }
913
914 if (!first_frag)
915 return NULL;
916
917out:
918 retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
919 &ulpq->reasm, first_frag,
920 last_frag);
921 if (retval) {
922 sin->fsn = next_fsn;
923 sin->pd_mode = 1;
924 }
925
926 return retval;
927}
928
929static void sctp_intl_start_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
930{
931 struct sctp_ulpevent *event;
932
933 if (!skb_queue_empty(&ulpq->reasm)) {
934 do {
935 event = sctp_intl_retrieve_first(ulpq);
936 if (event)
937 sctp_enqueue_event(ulpq, event);
938 } while (event);
939 }
940
941 if (!skb_queue_empty(&ulpq->reasm_uo)) {
942 do {
943 event = sctp_intl_retrieve_first_uo(ulpq);
944 if (event)
945 sctp_enqueue_event(ulpq, event);
946 } while (event);
947 }
948}
949
950static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
951 gfp_t gfp)
952{
953 struct sctp_association *asoc = ulpq->asoc;
954 __u32 freed = 0;
955 __u16 needed;
956
957 if (chunk) {
958 needed = ntohs(chunk->chunk_hdr->length);
959 needed -= sizeof(struct sctp_idata_chunk);
960 } else {
961 needed = SCTP_DEFAULT_MAXWINDOW;
962 }
963
964 if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
965 freed = sctp_ulpq_renege_list(ulpq, &ulpq->lobby, needed);
966 if (freed < needed)
967 freed += sctp_ulpq_renege_list(ulpq, &ulpq->reasm,
968 needed);
969 if (freed < needed)
970 freed += sctp_ulpq_renege_list(ulpq, &ulpq->reasm_uo,
971 needed);
972 }
973
974 if (chunk && freed >= needed)
975 if (sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0)
976 sctp_intl_start_pd(ulpq, gfp);
977
978 sk_mem_reclaim(asoc->base.sk);
979}
980
981static void sctp_intl_stream_abort_pd(struct sctp_ulpq *ulpq, __u16 sid,
982 __u32 mid, __u16 flags, gfp_t gfp)
983{
984 struct sock *sk = ulpq->asoc->base.sk;
985 struct sctp_ulpevent *ev = NULL;
986
987 if (!sctp_ulpevent_type_enabled(SCTP_PARTIAL_DELIVERY_EVENT,
988 &sctp_sk(sk)->subscribe))
989 return;
990
991 ev = sctp_ulpevent_make_pdapi(ulpq->asoc, SCTP_PARTIAL_DELIVERY_ABORTED,
992 sid, mid, flags, gfp);
993 if (ev) {
994 __skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev));
995
996 if (!sctp_sk(sk)->data_ready_signalled) {
997 sctp_sk(sk)->data_ready_signalled = 1;
998 sk->sk_data_ready(sk);
999 }
1000 }
1001}
1002
1003static void sctp_intl_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid)
1004{
1005 struct sctp_stream *stream = &ulpq->asoc->stream;
1006 struct sctp_ulpevent *cevent, *event = NULL;
1007 struct sk_buff_head *lobby = &ulpq->lobby;
1008 struct sk_buff *pos, *tmp;
1009 struct sk_buff_head temp;
1010 __u16 csid;
1011 __u32 cmid;
1012
1013 skb_queue_head_init(&temp);
1014 sctp_skb_for_each(pos, lobby, tmp) {
1015 cevent = (struct sctp_ulpevent *)pos->cb;
1016 csid = cevent->stream;
1017 cmid = cevent->mid;
1018
1019 if (csid > sid)
1020 break;
1021
1022 if (csid < sid)
1023 continue;
1024
1025 if (!MID_lt(cmid, sctp_mid_peek(stream, in, csid)))
1026 break;
1027
1028 __skb_unlink(pos, lobby);
1029 if (!event)
1030 event = sctp_skb2event(pos);
1031
1032 __skb_queue_tail(&temp, pos);
1033 }
1034
1035 if (!event && pos != (struct sk_buff *)lobby) {
1036 cevent = (struct sctp_ulpevent *)pos->cb;
1037 csid = cevent->stream;
1038 cmid = cevent->mid;
1039
1040 if (csid == sid && cmid == sctp_mid_peek(stream, in, csid)) {
1041 sctp_mid_next(stream, in, csid);
1042 __skb_unlink(pos, lobby);
1043 __skb_queue_tail(&temp, pos);
1044 event = sctp_skb2event(pos);
1045 }
1046 }
1047
1048 if (event) {
1049 sctp_intl_retrieve_ordered(ulpq, event);
1050 sctp_enqueue_event(ulpq, event);
1051 }
1052}
1053
1054static void sctp_intl_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
1055{
1056 struct sctp_stream *stream = &ulpq->asoc->stream;
1057 __u16 sid;
1058
1059 for (sid = 0; sid < stream->incnt; sid++) {
1060 struct sctp_stream_in *sin = &stream->in[sid];
1061 __u32 mid;
1062
1063 if (sin->pd_mode_uo) {
1064 sin->pd_mode_uo = 0;
1065
1066 mid = sin->mid_uo;
1067 sctp_intl_stream_abort_pd(ulpq, sid, mid, 0x1, gfp);
1068 }
1069
1070 if (sin->pd_mode) {
1071 sin->pd_mode = 0;
1072
1073 mid = sin->mid;
1074 sctp_intl_stream_abort_pd(ulpq, sid, mid, 0, gfp);
1075 sctp_mid_skip(stream, in, sid, mid);
1076
1077 sctp_intl_reap_ordered(ulpq, sid);
1078 }
1079 }
1080
1081 /* intl abort pd happens only when all data needs to be cleaned */
1082 sctp_ulpq_flush(ulpq);
1083}
1084
1085static inline int sctp_get_skip_pos(struct sctp_ifwdtsn_skip *skiplist,
1086 int nskips, __be16 stream, __u8 flags)
1087{
1088 int i;
1089
1090 for (i = 0; i < nskips; i++)
1091 if (skiplist[i].stream == stream &&
1092 skiplist[i].flags == flags)
1093 return i;
1094
1095 return i;
1096}
1097
1098#define SCTP_FTSN_U_BIT 0x1
1099static void sctp_generate_iftsn(struct sctp_outq *q, __u32 ctsn)
1100{
1101 struct sctp_ifwdtsn_skip ftsn_skip_arr[10];
1102 struct sctp_association *asoc = q->asoc;
1103 struct sctp_chunk *ftsn_chunk = NULL;
1104 struct list_head *lchunk, *temp;
1105 int nskips = 0, skip_pos;
1106 struct sctp_chunk *chunk;
1107 __u32 tsn;
1108
1109 if (!asoc->peer.prsctp_capable)
1110 return;
1111
1112 if (TSN_lt(asoc->adv_peer_ack_point, ctsn))
1113 asoc->adv_peer_ack_point = ctsn;
1114
1115 list_for_each_safe(lchunk, temp, &q->abandoned) {
1116 chunk = list_entry(lchunk, struct sctp_chunk, transmitted_list);
1117 tsn = ntohl(chunk->subh.data_hdr->tsn);
1118
1119 if (TSN_lte(tsn, ctsn)) {
1120 list_del_init(lchunk);
1121 sctp_chunk_free(chunk);
1122 } else if (TSN_lte(tsn, asoc->adv_peer_ack_point + 1)) {
1123 __be16 sid = chunk->subh.idata_hdr->stream;
1124 __be32 mid = chunk->subh.idata_hdr->mid;
1125 __u8 flags = 0;
1126
1127 if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
1128 flags |= SCTP_FTSN_U_BIT;
1129
1130 asoc->adv_peer_ack_point = tsn;
1131 skip_pos = sctp_get_skip_pos(&ftsn_skip_arr[0], nskips,
1132 sid, flags);
1133 ftsn_skip_arr[skip_pos].stream = sid;
1134 ftsn_skip_arr[skip_pos].reserved = 0;
1135 ftsn_skip_arr[skip_pos].flags = flags;
1136 ftsn_skip_arr[skip_pos].mid = mid;
1137 if (skip_pos == nskips)
1138 nskips++;
1139 if (nskips == 10)
1140 break;
1141 } else {
1142 break;
1143 }
1144 }
1145
1146 if (asoc->adv_peer_ack_point > ctsn)
1147 ftsn_chunk = sctp_make_ifwdtsn(asoc, asoc->adv_peer_ack_point,
1148 nskips, &ftsn_skip_arr[0]);
1149
1150 if (ftsn_chunk) {
1151 list_add_tail(&ftsn_chunk->list, &q->control_chunk_list);
1152 SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_OUTCTRLCHUNKS);
1153 }
1154}
1155
1156#define _sctp_walk_ifwdtsn(pos, chunk, end) \
1157 for (pos = chunk->subh.ifwdtsn_hdr->skip; \
1158 (void *)pos < (void *)chunk->subh.ifwdtsn_hdr->skip + (end); pos++)
1159
1160#define sctp_walk_ifwdtsn(pos, ch) \
1161 _sctp_walk_ifwdtsn((pos), (ch), ntohs((ch)->chunk_hdr->length) - \
1162 sizeof(struct sctp_ifwdtsn_chunk))
1163
1164static bool sctp_validate_fwdtsn(struct sctp_chunk *chunk)
1165{
1166 struct sctp_fwdtsn_skip *skip;
1167 __u16 incnt;
1168
1169 if (chunk->chunk_hdr->type != SCTP_CID_FWD_TSN)
1170 return false;
1171
1172 incnt = chunk->asoc->stream.incnt;
1173 sctp_walk_fwdtsn(skip, chunk)
1174 if (ntohs(skip->stream) >= incnt)
1175 return false;
1176
1177 return true;
1178}
1179
1180static bool sctp_validate_iftsn(struct sctp_chunk *chunk)
1181{
1182 struct sctp_ifwdtsn_skip *skip;
1183 __u16 incnt;
1184
1185 if (chunk->chunk_hdr->type != SCTP_CID_I_FWD_TSN)
1186 return false;
1187
1188 incnt = chunk->asoc->stream.incnt;
1189 sctp_walk_ifwdtsn(skip, chunk)
1190 if (ntohs(skip->stream) >= incnt)
1191 return false;
1192
1193 return true;
1194}
1195
1196static void sctp_report_fwdtsn(struct sctp_ulpq *ulpq, __u32 ftsn)
1197{
1198 /* Move the Cumulattive TSN Ack ahead. */
1199 sctp_tsnmap_skip(&ulpq->asoc->peer.tsn_map, ftsn);
1200 /* purge the fragmentation queue */
1201 sctp_ulpq_reasm_flushtsn(ulpq, ftsn);
1202 /* Abort any in progress partial delivery. */
1203 sctp_ulpq_abort_pd(ulpq, GFP_ATOMIC);
1204}
1205
1206static void sctp_intl_reasm_flushtsn(struct sctp_ulpq *ulpq, __u32 ftsn)
1207{
1208 struct sk_buff *pos, *tmp;
1209
1210 skb_queue_walk_safe(&ulpq->reasm, pos, tmp) {
1211 struct sctp_ulpevent *event = sctp_skb2event(pos);
1212 __u32 tsn = event->tsn;
1213
1214 if (TSN_lte(tsn, ftsn)) {
1215 __skb_unlink(pos, &ulpq->reasm);
1216 sctp_ulpevent_free(event);
1217 }
1218 }
1219
1220 skb_queue_walk_safe(&ulpq->reasm_uo, pos, tmp) {
1221 struct sctp_ulpevent *event = sctp_skb2event(pos);
1222 __u32 tsn = event->tsn;
1223
1224 if (TSN_lte(tsn, ftsn)) {
1225 __skb_unlink(pos, &ulpq->reasm_uo);
1226 sctp_ulpevent_free(event);
1227 }
1228 }
1229}
1230
1231static void sctp_report_iftsn(struct sctp_ulpq *ulpq, __u32 ftsn)
1232{
1233 /* Move the Cumulattive TSN Ack ahead. */
1234 sctp_tsnmap_skip(&ulpq->asoc->peer.tsn_map, ftsn);
1235 /* purge the fragmentation queue */
1236 sctp_intl_reasm_flushtsn(ulpq, ftsn);
1237 /* abort only when it's for all data */
1238 if (ftsn == sctp_tsnmap_get_max_tsn_seen(&ulpq->asoc->peer.tsn_map))
1239 sctp_intl_abort_pd(ulpq, GFP_ATOMIC);
1240}
1241
1242static void sctp_handle_fwdtsn(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk)
1243{
1244 struct sctp_fwdtsn_skip *skip;
1245
1246 /* Walk through all the skipped SSNs */
1247 sctp_walk_fwdtsn(skip, chunk)
1248 sctp_ulpq_skip(ulpq, ntohs(skip->stream), ntohs(skip->ssn));
1249}
1250
1251static void sctp_intl_skip(struct sctp_ulpq *ulpq, __u16 sid, __u32 mid,
1252 __u8 flags)
1253{
1254 struct sctp_stream_in *sin = sctp_stream_in(ulpq->asoc, sid);
1255 struct sctp_stream *stream = &ulpq->asoc->stream;
1256
1257 if (flags & SCTP_FTSN_U_BIT) {
1258 if (sin->pd_mode_uo && MID_lt(sin->mid_uo, mid)) {
1259 sin->pd_mode_uo = 0;
1260 sctp_intl_stream_abort_pd(ulpq, sid, mid, 0x1,
1261 GFP_ATOMIC);
1262 }
1263 return;
1264 }
1265
1266 if (MID_lt(mid, sctp_mid_peek(stream, in, sid)))
1267 return;
1268
1269 if (sin->pd_mode) {
1270 sin->pd_mode = 0;
1271 sctp_intl_stream_abort_pd(ulpq, sid, mid, 0x0, GFP_ATOMIC);
1272 }
1273
1274 sctp_mid_skip(stream, in, sid, mid);
1275
1276 sctp_intl_reap_ordered(ulpq, sid);
1277}
1278
1279static void sctp_handle_iftsn(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk)
1280{
1281 struct sctp_ifwdtsn_skip *skip;
1282
1283 /* Walk through all the skipped MIDs and abort stream pd if possible */
1284 sctp_walk_ifwdtsn(skip, chunk)
1285 sctp_intl_skip(ulpq, ntohs(skip->stream),
1286 ntohl(skip->mid), skip->flags);
1287}
1288
1289static struct sctp_stream_interleave sctp_stream_interleave_0 = {
1290 .data_chunk_len = sizeof(struct sctp_data_chunk),
1291 .ftsn_chunk_len = sizeof(struct sctp_fwdtsn_chunk),
1292 /* DATA process functions */
1293 .make_datafrag = sctp_make_datafrag_empty,
1294 .assign_number = sctp_chunk_assign_ssn,
1295 .validate_data = sctp_validate_data,
1296 .ulpevent_data = sctp_ulpq_tail_data,
1297 .enqueue_event = sctp_ulpq_tail_event,
1298 .renege_events = sctp_ulpq_renege,
1299 .start_pd = sctp_ulpq_partial_delivery,
1300 .abort_pd = sctp_ulpq_abort_pd,
1301 /* FORWARD-TSN process functions */
1302 .generate_ftsn = sctp_generate_fwdtsn,
1303 .validate_ftsn = sctp_validate_fwdtsn,
1304 .report_ftsn = sctp_report_fwdtsn,
1305 .handle_ftsn = sctp_handle_fwdtsn,
1306};
1307
1308static struct sctp_stream_interleave sctp_stream_interleave_1 = {
1309 .data_chunk_len = sizeof(struct sctp_idata_chunk),
1310 .ftsn_chunk_len = sizeof(struct sctp_ifwdtsn_chunk),
1311 /* I-DATA process functions */
1312 .make_datafrag = sctp_make_idatafrag_empty,
1313 .assign_number = sctp_chunk_assign_mid,
1314 .validate_data = sctp_validate_idata,
1315 .ulpevent_data = sctp_ulpevent_idata,
1316 .enqueue_event = sctp_enqueue_event,
1317 .renege_events = sctp_renege_events,
1318 .start_pd = sctp_intl_start_pd,
1319 .abort_pd = sctp_intl_abort_pd,
1320 /* I-FORWARD-TSN process functions */
1321 .generate_ftsn = sctp_generate_iftsn,
1322 .validate_ftsn = sctp_validate_iftsn,
1323 .report_ftsn = sctp_report_iftsn,
1324 .handle_ftsn = sctp_handle_iftsn,
1325};
1326
1327void sctp_stream_interleave_init(struct sctp_stream *stream)
1328{
1329 struct sctp_association *asoc;
1330
1331 asoc = container_of(stream, struct sctp_association, stream);
1332 stream->si = asoc->intl_enable ? &sctp_stream_interleave_1
1333 : &sctp_stream_interleave_0;
1334}
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index d8c162a4089c..f5fcd425232a 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -242,7 +242,8 @@ int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid,
242 242
243void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch) 243void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch)
244{ 244{
245 if (!list_is_last(&ch->frag_list, &ch->msg->chunks)) { 245 if (!list_is_last(&ch->frag_list, &ch->msg->chunks) &&
246 !q->asoc->intl_enable) {
246 struct sctp_stream_out *sout; 247 struct sctp_stream_out *sout;
247 __u16 sid; 248 __u16 sid;
248 249
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index ef7ca44d6e6a..33ca5b73cdb3 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -289,6 +289,13 @@ static struct ctl_table sctp_net_table[] = {
289 .proc_handler = proc_sctp_do_auth, 289 .proc_handler = proc_sctp_do_auth,
290 }, 290 },
291 { 291 {
292 .procname = "intl_enable",
293 .data = &init_net.sctp.intl_enable,
294 .maxlen = sizeof(int),
295 .mode = 0644,
296 .proc_handler = proc_dointvec,
297 },
298 {
292 .procname = "addr_scope_policy", 299 .procname = "addr_scope_policy",
293 .data = &init_net.sctp.scope_policy, 300 .data = &init_net.sctp.scope_policy,
294 .maxlen = sizeof(int), 301 .maxlen = sizeof(int),
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 5447228bf1a0..84207ad33e8e 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -443,8 +443,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed(
443 goto fail; 443 goto fail;
444 444
445 /* Pull off the common chunk header and DATA header. */ 445 /* Pull off the common chunk header and DATA header. */
446 skb_pull(skb, sizeof(struct sctp_data_chunk)); 446 skb_pull(skb, sctp_datachk_len(&asoc->stream));
447 len -= sizeof(struct sctp_data_chunk); 447 len -= sctp_datachk_len(&asoc->stream);
448 448
449 /* Embed the event fields inside the cloned skb. */ 449 /* Embed the event fields inside the cloned skb. */
450 event = sctp_skb2event(skb); 450 event = sctp_skb2event(skb);
@@ -705,8 +705,6 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
705 sctp_ulpevent_receive_data(event, asoc); 705 sctp_ulpevent_receive_data(event, asoc);
706 706
707 event->stream = ntohs(chunk->subh.data_hdr->stream); 707 event->stream = ntohs(chunk->subh.data_hdr->stream);
708 event->ssn = ntohs(chunk->subh.data_hdr->ssn);
709 event->ppid = chunk->subh.data_hdr->ppid;
710 if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) { 708 if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) {
711 event->flags |= SCTP_UNORDERED; 709 event->flags |= SCTP_UNORDERED;
712 event->cumtsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); 710 event->cumtsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
@@ -732,8 +730,9 @@ fail:
732 * various events. 730 * various events.
733 */ 731 */
734struct sctp_ulpevent *sctp_ulpevent_make_pdapi( 732struct sctp_ulpevent *sctp_ulpevent_make_pdapi(
735 const struct sctp_association *asoc, __u32 indication, 733 const struct sctp_association *asoc,
736 gfp_t gfp) 734 __u32 indication, __u32 sid, __u32 seq,
735 __u32 flags, gfp_t gfp)
737{ 736{
738 struct sctp_ulpevent *event; 737 struct sctp_ulpevent *event;
739 struct sctp_pdapi_event *pd; 738 struct sctp_pdapi_event *pd;
@@ -754,7 +753,9 @@ struct sctp_ulpevent *sctp_ulpevent_make_pdapi(
754 * Currently unused. 753 * Currently unused.
755 */ 754 */
756 pd->pdapi_type = SCTP_PARTIAL_DELIVERY_EVENT; 755 pd->pdapi_type = SCTP_PARTIAL_DELIVERY_EVENT;
757 pd->pdapi_flags = 0; 756 pd->pdapi_flags = flags;
757 pd->pdapi_stream = sid;
758 pd->pdapi_seq = seq;
758 759
759 /* pdapi_length: 32 bits (unsigned integer) 760 /* pdapi_length: 32 bits (unsigned integer)
760 * 761 *
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index e36ec5dd64c6..0b427100b0d4 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -60,6 +60,7 @@ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *ulpq,
60 60
61 ulpq->asoc = asoc; 61 ulpq->asoc = asoc;
62 skb_queue_head_init(&ulpq->reasm); 62 skb_queue_head_init(&ulpq->reasm);
63 skb_queue_head_init(&ulpq->reasm_uo);
63 skb_queue_head_init(&ulpq->lobby); 64 skb_queue_head_init(&ulpq->lobby);
64 ulpq->pd_mode = 0; 65 ulpq->pd_mode = 0;
65 66
@@ -83,6 +84,10 @@ void sctp_ulpq_flush(struct sctp_ulpq *ulpq)
83 sctp_ulpevent_free(event); 84 sctp_ulpevent_free(event);
84 } 85 }
85 86
87 while ((skb = __skb_dequeue(&ulpq->reasm_uo)) != NULL) {
88 event = sctp_skb2event(skb);
89 sctp_ulpevent_free(event);
90 }
86} 91}
87 92
88/* Dispose of a ulpqueue. */ 93/* Dispose of a ulpqueue. */
@@ -104,6 +109,9 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
104 if (!event) 109 if (!event)
105 return -ENOMEM; 110 return -ENOMEM;
106 111
112 event->ssn = ntohs(chunk->subh.data_hdr->ssn);
113 event->ppid = chunk->subh.data_hdr->ppid;
114
107 /* Do reassembly if needed. */ 115 /* Do reassembly if needed. */
108 event = sctp_ulpq_reasm(ulpq, event); 116 event = sctp_ulpq_reasm(ulpq, event);
109 117
@@ -328,9 +336,10 @@ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq,
328 * payload was fragmented on the way and ip had to reassemble them. 336 * payload was fragmented on the way and ip had to reassemble them.
329 * We add the rest of skb's to the first skb's fraglist. 337 * We add the rest of skb's to the first skb's fraglist.
330 */ 338 */
331static struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net, 339struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net,
332 struct sk_buff_head *queue, struct sk_buff *f_frag, 340 struct sk_buff_head *queue,
333 struct sk_buff *l_frag) 341 struct sk_buff *f_frag,
342 struct sk_buff *l_frag)
334{ 343{
335 struct sk_buff *pos; 344 struct sk_buff *pos;
336 struct sk_buff *new = NULL; 345 struct sk_buff *new = NULL;
@@ -853,7 +862,7 @@ static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq,
853 struct sctp_stream *stream; 862 struct sctp_stream *stream;
854 863
855 /* Check if this message needs ordering. */ 864 /* Check if this message needs ordering. */
856 if (SCTP_DATA_UNORDERED & event->msg_flags) 865 if (event->msg_flags & SCTP_DATA_UNORDERED)
857 return event; 866 return event;
858 867
859 /* Note: The stream ID must be verified before this routine. */ 868 /* Note: The stream ID must be verified before this routine. */
@@ -974,8 +983,8 @@ void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn)
974 sctp_ulpq_reap_ordered(ulpq, sid); 983 sctp_ulpq_reap_ordered(ulpq, sid);
975} 984}
976 985
977static __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, 986__u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, struct sk_buff_head *list,
978 struct sk_buff_head *list, __u16 needed) 987 __u16 needed)
979{ 988{
980 __u16 freed = 0; 989 __u16 freed = 0;
981 __u32 tsn, last_tsn; 990 __u32 tsn, last_tsn;
@@ -1132,7 +1141,7 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
1132 &sctp_sk(sk)->subscribe)) 1141 &sctp_sk(sk)->subscribe))
1133 ev = sctp_ulpevent_make_pdapi(ulpq->asoc, 1142 ev = sctp_ulpevent_make_pdapi(ulpq->asoc,
1134 SCTP_PARTIAL_DELIVERY_ABORTED, 1143 SCTP_PARTIAL_DELIVERY_ABORTED,
1135 gfp); 1144 0, 0, 0, gfp);
1136 if (ev) 1145 if (ev)
1137 __skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev)); 1146 __skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev));
1138 1147
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 6451c5013e06..da1a5cdefd13 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -115,7 +115,6 @@ static int smc_release(struct socket *sock)
115 goto out; 115 goto out;
116 116
117 smc = smc_sk(sk); 117 smc = smc_sk(sk);
118 sock_hold(sk);
119 if (sk->sk_state == SMC_LISTEN) 118 if (sk->sk_state == SMC_LISTEN)
120 /* smc_close_non_accepted() is called and acquires 119 /* smc_close_non_accepted() is called and acquires
121 * sock lock for child sockets again 120 * sock lock for child sockets again
@@ -124,10 +123,7 @@ static int smc_release(struct socket *sock)
124 else 123 else
125 lock_sock(sk); 124 lock_sock(sk);
126 125
127 if (smc->use_fallback) { 126 if (!smc->use_fallback) {
128 sk->sk_state = SMC_CLOSED;
129 sk->sk_state_change(sk);
130 } else {
131 rc = smc_close_active(smc); 127 rc = smc_close_active(smc);
132 sock_set_flag(sk, SOCK_DEAD); 128 sock_set_flag(sk, SOCK_DEAD);
133 sk->sk_shutdown |= SHUTDOWN_MASK; 129 sk->sk_shutdown |= SHUTDOWN_MASK;
@@ -136,20 +132,21 @@ static int smc_release(struct socket *sock)
136 sock_release(smc->clcsock); 132 sock_release(smc->clcsock);
137 smc->clcsock = NULL; 133 smc->clcsock = NULL;
138 } 134 }
135 if (smc->use_fallback) {
136 sock_put(sk); /* passive closing */
137 sk->sk_state = SMC_CLOSED;
138 sk->sk_state_change(sk);
139 }
139 140
140 /* detach socket */ 141 /* detach socket */
141 sock_orphan(sk); 142 sock_orphan(sk);
142 sock->sk = NULL; 143 sock->sk = NULL;
143 if (smc->use_fallback) { 144 if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
144 schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
145 } else if (sk->sk_state == SMC_CLOSED) {
146 smc_conn_free(&smc->conn); 145 smc_conn_free(&smc->conn);
147 schedule_delayed_work(&smc->sock_put_work,
148 SMC_CLOSE_SOCK_PUT_DELAY);
149 }
150 release_sock(sk); 146 release_sock(sk);
151 147
152 sock_put(sk); 148 sk->sk_prot->unhash(sk);
149 sock_put(sk); /* final sock_put */
153out: 150out:
154 return rc; 151 return rc;
155} 152}
@@ -181,7 +178,6 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
181 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 178 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
182 INIT_LIST_HEAD(&smc->accept_q); 179 INIT_LIST_HEAD(&smc->accept_q);
183 spin_lock_init(&smc->accept_q_lock); 180 spin_lock_init(&smc->accept_q_lock);
184 INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work);
185 sk->sk_prot->hash(sk); 181 sk->sk_prot->hash(sk);
186 sk_refcnt_debug_inc(sk); 182 sk_refcnt_debug_inc(sk);
187 183
@@ -377,6 +373,15 @@ static void smc_link_save_peer_info(struct smc_link *link,
377 link->peer_mtu = clc->qp_mtu; 373 link->peer_mtu = clc->qp_mtu;
378} 374}
379 375
376static void smc_lgr_forget(struct smc_link_group *lgr)
377{
378 spin_lock_bh(&smc_lgr_list.lock);
379 /* do not use this link group for new connections */
380 if (!list_empty(&lgr->list))
381 list_del_init(&lgr->list);
382 spin_unlock_bh(&smc_lgr_list.lock);
383}
384
380/* setup for RDMA connection of client */ 385/* setup for RDMA connection of client */
381static int smc_connect_rdma(struct smc_sock *smc) 386static int smc_connect_rdma(struct smc_sock *smc)
382{ 387{
@@ -390,6 +395,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
390 int rc = 0; 395 int rc = 0;
391 u8 ibport; 396 u8 ibport;
392 397
398 sock_hold(&smc->sk); /* sock put in passive closing */
399
393 if (!tcp_sk(smc->clcsock->sk)->syn_smc) { 400 if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
394 /* peer has not signalled SMC-capability */ 401 /* peer has not signalled SMC-capability */
395 smc->use_fallback = true; 402 smc->use_fallback = true;
@@ -513,6 +520,8 @@ out_connected:
513 return rc ? rc : local_contact; 520 return rc ? rc : local_contact;
514 521
515decline_rdma_unlock: 522decline_rdma_unlock:
523 if (local_contact == SMC_FIRST_CONTACT)
524 smc_lgr_forget(smc->conn.lgr);
516 mutex_unlock(&smc_create_lgr_pending); 525 mutex_unlock(&smc_create_lgr_pending);
517 smc_conn_free(&smc->conn); 526 smc_conn_free(&smc->conn);
518decline_rdma: 527decline_rdma:
@@ -520,15 +529,19 @@ decline_rdma:
520 smc->use_fallback = true; 529 smc->use_fallback = true;
521 if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { 530 if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
522 rc = smc_clc_send_decline(smc, reason_code); 531 rc = smc_clc_send_decline(smc, reason_code);
523 if (rc < sizeof(struct smc_clc_msg_decline)) 532 if (rc < 0)
524 goto out_err; 533 goto out_err;
525 } 534 }
526 goto out_connected; 535 goto out_connected;
527 536
528out_err_unlock: 537out_err_unlock:
538 if (local_contact == SMC_FIRST_CONTACT)
539 smc_lgr_forget(smc->conn.lgr);
529 mutex_unlock(&smc_create_lgr_pending); 540 mutex_unlock(&smc_create_lgr_pending);
530 smc_conn_free(&smc->conn); 541 smc_conn_free(&smc->conn);
531out_err: 542out_err:
543 if (smc->sk.sk_state == SMC_INIT)
544 sock_put(&smc->sk); /* passive closing */
532 return rc; 545 return rc;
533} 546}
534 547
@@ -581,40 +594,33 @@ out_err:
581 594
582static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) 595static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
583{ 596{
584 struct sock *sk = &lsmc->sk; 597 struct socket *new_clcsock = NULL;
585 struct socket *new_clcsock; 598 struct sock *lsk = &lsmc->sk;
586 struct sock *new_sk; 599 struct sock *new_sk;
587 int rc; 600 int rc;
588 601
589 release_sock(&lsmc->sk); 602 release_sock(lsk);
590 new_sk = smc_sock_alloc(sock_net(sk), NULL); 603 new_sk = smc_sock_alloc(sock_net(lsk), NULL);
591 if (!new_sk) { 604 if (!new_sk) {
592 rc = -ENOMEM; 605 rc = -ENOMEM;
593 lsmc->sk.sk_err = ENOMEM; 606 lsk->sk_err = ENOMEM;
594 *new_smc = NULL; 607 *new_smc = NULL;
595 lock_sock(&lsmc->sk); 608 lock_sock(lsk);
596 goto out; 609 goto out;
597 } 610 }
598 *new_smc = smc_sk(new_sk); 611 *new_smc = smc_sk(new_sk);
599 612
600 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); 613 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
601 lock_sock(&lsmc->sk); 614 lock_sock(lsk);
602 if (rc < 0) { 615 if (rc < 0)
603 lsmc->sk.sk_err = -rc; 616 lsk->sk_err = -rc;
604 new_sk->sk_state = SMC_CLOSED; 617 if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
605 sock_set_flag(new_sk, SOCK_DEAD);
606 sk->sk_prot->unhash(new_sk);
607 sock_put(new_sk);
608 *new_smc = NULL;
609 goto out;
610 }
611 if (lsmc->sk.sk_state == SMC_CLOSED) {
612 if (new_clcsock) 618 if (new_clcsock)
613 sock_release(new_clcsock); 619 sock_release(new_clcsock);
614 new_sk->sk_state = SMC_CLOSED; 620 new_sk->sk_state = SMC_CLOSED;
615 sock_set_flag(new_sk, SOCK_DEAD); 621 sock_set_flag(new_sk, SOCK_DEAD);
616 sk->sk_prot->unhash(new_sk); 622 new_sk->sk_prot->unhash(new_sk);
617 sock_put(new_sk); 623 sock_put(new_sk); /* final */
618 *new_smc = NULL; 624 *new_smc = NULL;
619 goto out; 625 goto out;
620 } 626 }
@@ -631,7 +637,7 @@ static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
631{ 637{
632 struct smc_sock *par = smc_sk(parent); 638 struct smc_sock *par = smc_sk(parent);
633 639
634 sock_hold(sk); 640 sock_hold(sk); /* sock_put in smc_accept_unlink () */
635 spin_lock(&par->accept_q_lock); 641 spin_lock(&par->accept_q_lock);
636 list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); 642 list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
637 spin_unlock(&par->accept_q_lock); 643 spin_unlock(&par->accept_q_lock);
@@ -647,7 +653,7 @@ static void smc_accept_unlink(struct sock *sk)
647 list_del_init(&smc_sk(sk)->accept_q); 653 list_del_init(&smc_sk(sk)->accept_q);
648 spin_unlock(&par->accept_q_lock); 654 spin_unlock(&par->accept_q_lock);
649 sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); 655 sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
650 sock_put(sk); 656 sock_put(sk); /* sock_hold in smc_accept_enqueue */
651} 657}
652 658
653/* remove a sock from the accept queue to bind it to a new socket created 659/* remove a sock from the accept queue to bind it to a new socket created
@@ -664,8 +670,12 @@ struct sock *smc_accept_dequeue(struct sock *parent,
664 670
665 smc_accept_unlink(new_sk); 671 smc_accept_unlink(new_sk);
666 if (new_sk->sk_state == SMC_CLOSED) { 672 if (new_sk->sk_state == SMC_CLOSED) {
673 if (isk->clcsock) {
674 sock_release(isk->clcsock);
675 isk->clcsock = NULL;
676 }
667 new_sk->sk_prot->unhash(new_sk); 677 new_sk->sk_prot->unhash(new_sk);
668 sock_put(new_sk); 678 sock_put(new_sk); /* final */
669 continue; 679 continue;
670 } 680 }
671 if (new_sock) 681 if (new_sock)
@@ -680,14 +690,11 @@ void smc_close_non_accepted(struct sock *sk)
680{ 690{
681 struct smc_sock *smc = smc_sk(sk); 691 struct smc_sock *smc = smc_sk(sk);
682 692
683 sock_hold(sk);
684 lock_sock(sk); 693 lock_sock(sk);
685 if (!sk->sk_lingertime) 694 if (!sk->sk_lingertime)
686 /* wait for peer closing */ 695 /* wait for peer closing */
687 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; 696 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
688 if (smc->use_fallback) { 697 if (!smc->use_fallback) {
689 sk->sk_state = SMC_CLOSED;
690 } else {
691 smc_close_active(smc); 698 smc_close_active(smc);
692 sock_set_flag(sk, SOCK_DEAD); 699 sock_set_flag(sk, SOCK_DEAD);
693 sk->sk_shutdown |= SHUTDOWN_MASK; 700 sk->sk_shutdown |= SHUTDOWN_MASK;
@@ -700,14 +707,15 @@ void smc_close_non_accepted(struct sock *sk)
700 sock_release(tcp); 707 sock_release(tcp);
701 } 708 }
702 if (smc->use_fallback) { 709 if (smc->use_fallback) {
703 schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); 710 sock_put(sk); /* passive closing */
704 } else if (sk->sk_state == SMC_CLOSED) { 711 sk->sk_state = SMC_CLOSED;
705 smc_conn_free(&smc->conn); 712 } else {
706 schedule_delayed_work(&smc->sock_put_work, 713 if (sk->sk_state == SMC_CLOSED)
707 SMC_CLOSE_SOCK_PUT_DELAY); 714 smc_conn_free(&smc->conn);
708 } 715 }
709 release_sock(sk); 716 release_sock(sk);
710 sock_put(sk); 717 sk->sk_prot->unhash(sk);
718 sock_put(sk); /* final sock_put */
711} 719}
712 720
713static int smc_serv_conf_first_link(struct smc_sock *smc) 721static int smc_serv_conf_first_link(struct smc_sock *smc)
@@ -751,14 +759,16 @@ static void smc_listen_work(struct work_struct *work)
751{ 759{
752 struct smc_sock *new_smc = container_of(work, struct smc_sock, 760 struct smc_sock *new_smc = container_of(work, struct smc_sock,
753 smc_listen_work); 761 smc_listen_work);
762 struct smc_clc_msg_proposal_prefix *pclc_prfx;
754 struct socket *newclcsock = new_smc->clcsock; 763 struct socket *newclcsock = new_smc->clcsock;
755 struct smc_sock *lsmc = new_smc->listen_smc; 764 struct smc_sock *lsmc = new_smc->listen_smc;
756 struct smc_clc_msg_accept_confirm cclc; 765 struct smc_clc_msg_accept_confirm cclc;
757 int local_contact = SMC_REUSE_CONTACT; 766 int local_contact = SMC_REUSE_CONTACT;
758 struct sock *newsmcsk = &new_smc->sk; 767 struct sock *newsmcsk = &new_smc->sk;
759 struct smc_clc_msg_proposal pclc; 768 struct smc_clc_msg_proposal *pclc;
760 struct smc_ib_device *smcibdev; 769 struct smc_ib_device *smcibdev;
761 struct sockaddr_in peeraddr; 770 struct sockaddr_in peeraddr;
771 u8 buf[SMC_CLC_MAX_LEN];
762 struct smc_link *link; 772 struct smc_link *link;
763 int reason_code = 0; 773 int reason_code = 0;
764 int rc = 0, len; 774 int rc = 0, len;
@@ -775,7 +785,7 @@ static void smc_listen_work(struct work_struct *work)
775 /* do inband token exchange - 785 /* do inband token exchange -
776 *wait for and receive SMC Proposal CLC message 786 *wait for and receive SMC Proposal CLC message
777 */ 787 */
778 reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc), 788 reason_code = smc_clc_wait_msg(new_smc, &buf, sizeof(buf),
779 SMC_CLC_PROPOSAL); 789 SMC_CLC_PROPOSAL);
780 if (reason_code < 0) 790 if (reason_code < 0)
781 goto out_err; 791 goto out_err;
@@ -804,8 +814,11 @@ static void smc_listen_work(struct work_struct *work)
804 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 814 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
805 goto decline_rdma; 815 goto decline_rdma;
806 } 816 }
807 if ((pclc.outgoing_subnet != subnet) || 817
808 (pclc.prefix_len != prefix_len)) { 818 pclc = (struct smc_clc_msg_proposal *)&buf;
819 pclc_prfx = smc_clc_proposal_get_prefix(pclc);
820 if (pclc_prfx->outgoing_subnet != subnet ||
821 pclc_prfx->prefix_len != prefix_len) {
809 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 822 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
810 goto decline_rdma; 823 goto decline_rdma;
811 } 824 }
@@ -816,7 +829,7 @@ static void smc_listen_work(struct work_struct *work)
816 /* allocate connection / link group */ 829 /* allocate connection / link group */
817 mutex_lock(&smc_create_lgr_pending); 830 mutex_lock(&smc_create_lgr_pending);
818 local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, 831 local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
819 smcibdev, ibport, &pclc.lcl, 0); 832 smcibdev, ibport, &pclc->lcl, 0);
820 if (local_contact < 0) { 833 if (local_contact < 0) {
821 rc = local_contact; 834 rc = local_contact;
822 if (rc == -ENOMEM) 835 if (rc == -ENOMEM)
@@ -879,11 +892,9 @@ static void smc_listen_work(struct work_struct *work)
879 } 892 }
880 /* QP confirmation over RoCE fabric */ 893 /* QP confirmation over RoCE fabric */
881 reason_code = smc_serv_conf_first_link(new_smc); 894 reason_code = smc_serv_conf_first_link(new_smc);
882 if (reason_code < 0) { 895 if (reason_code < 0)
883 /* peer is not aware of a problem */ 896 /* peer is not aware of a problem */
884 rc = reason_code;
885 goto out_err_unlock; 897 goto out_err_unlock;
886 }
887 if (reason_code > 0) 898 if (reason_code > 0)
888 goto decline_rdma_unlock; 899 goto decline_rdma_unlock;
889 } 900 }
@@ -910,21 +921,26 @@ enqueue:
910 return; 921 return;
911 922
912decline_rdma_unlock: 923decline_rdma_unlock:
924 if (local_contact == SMC_FIRST_CONTACT)
925 smc_lgr_forget(new_smc->conn.lgr);
913 mutex_unlock(&smc_create_lgr_pending); 926 mutex_unlock(&smc_create_lgr_pending);
914decline_rdma: 927decline_rdma:
915 /* RDMA setup failed, switch back to TCP */ 928 /* RDMA setup failed, switch back to TCP */
916 smc_conn_free(&new_smc->conn); 929 smc_conn_free(&new_smc->conn);
917 new_smc->use_fallback = true; 930 new_smc->use_fallback = true;
918 if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { 931 if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
919 rc = smc_clc_send_decline(new_smc, reason_code); 932 if (smc_clc_send_decline(new_smc, reason_code) < 0)
920 if (rc < sizeof(struct smc_clc_msg_decline))
921 goto out_err; 933 goto out_err;
922 } 934 }
923 goto out_connected; 935 goto out_connected;
924 936
925out_err_unlock: 937out_err_unlock:
938 if (local_contact == SMC_FIRST_CONTACT)
939 smc_lgr_forget(new_smc->conn.lgr);
926 mutex_unlock(&smc_create_lgr_pending); 940 mutex_unlock(&smc_create_lgr_pending);
927out_err: 941out_err:
942 if (newsmcsk->sk_state == SMC_INIT)
943 sock_put(&new_smc->sk); /* passive closing */
928 newsmcsk->sk_state = SMC_CLOSED; 944 newsmcsk->sk_state = SMC_CLOSED;
929 smc_conn_free(&new_smc->conn); 945 smc_conn_free(&new_smc->conn);
930 goto enqueue; /* queue new sock with sk_err set */ 946 goto enqueue; /* queue new sock with sk_err set */
@@ -934,11 +950,12 @@ static void smc_tcp_listen_work(struct work_struct *work)
934{ 950{
935 struct smc_sock *lsmc = container_of(work, struct smc_sock, 951 struct smc_sock *lsmc = container_of(work, struct smc_sock,
936 tcp_listen_work); 952 tcp_listen_work);
953 struct sock *lsk = &lsmc->sk;
937 struct smc_sock *new_smc; 954 struct smc_sock *new_smc;
938 int rc = 0; 955 int rc = 0;
939 956
940 lock_sock(&lsmc->sk); 957 lock_sock(lsk);
941 while (lsmc->sk.sk_state == SMC_LISTEN) { 958 while (lsk->sk_state == SMC_LISTEN) {
942 rc = smc_clcsock_accept(lsmc, &new_smc); 959 rc = smc_clcsock_accept(lsmc, &new_smc);
943 if (rc) 960 if (rc)
944 goto out; 961 goto out;
@@ -947,15 +964,25 @@ static void smc_tcp_listen_work(struct work_struct *work)
947 964
948 new_smc->listen_smc = lsmc; 965 new_smc->listen_smc = lsmc;
949 new_smc->use_fallback = false; /* assume rdma capability first*/ 966 new_smc->use_fallback = false; /* assume rdma capability first*/
950 sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */ 967 sock_hold(lsk); /* sock_put in smc_listen_work */
951 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); 968 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
952 smc_copy_sock_settings_to_smc(new_smc); 969 smc_copy_sock_settings_to_smc(new_smc);
953 schedule_work(&new_smc->smc_listen_work); 970 sock_hold(&new_smc->sk); /* sock_put in passive closing */
971 if (!schedule_work(&new_smc->smc_listen_work))
972 sock_put(&new_smc->sk);
954 } 973 }
955 974
956out: 975out:
957 release_sock(&lsmc->sk); 976 if (lsmc->clcsock) {
958 lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */ 977 sock_release(lsmc->clcsock);
978 lsmc->clcsock = NULL;
979 }
980 release_sock(lsk);
981 /* no more listening, wake up smc_close_wait_listen_clcsock and
982 * accept
983 */
984 lsk->sk_state_change(lsk);
985 sock_put(&lsmc->sk); /* sock_hold in smc_listen */
959} 986}
960 987
961static int smc_listen(struct socket *sock, int backlog) 988static int smc_listen(struct socket *sock, int backlog)
@@ -989,7 +1016,9 @@ static int smc_listen(struct socket *sock, int backlog)
989 sk->sk_ack_backlog = 0; 1016 sk->sk_ack_backlog = 0;
990 sk->sk_state = SMC_LISTEN; 1017 sk->sk_state = SMC_LISTEN;
991 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 1018 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
992 schedule_work(&smc->tcp_listen_work); 1019 sock_hold(sk); /* sock_hold in tcp_listen_worker */
1020 if (!schedule_work(&smc->tcp_listen_work))
1021 sock_put(sk);
993 1022
994out: 1023out:
995 release_sock(sk); 1024 release_sock(sk);
@@ -1006,6 +1035,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
1006 int rc = 0; 1035 int rc = 0;
1007 1036
1008 lsmc = smc_sk(sk); 1037 lsmc = smc_sk(sk);
1038 sock_hold(sk); /* sock_put below */
1009 lock_sock(sk); 1039 lock_sock(sk);
1010 1040
1011 if (lsmc->sk.sk_state != SMC_LISTEN) { 1041 if (lsmc->sk.sk_state != SMC_LISTEN) {
@@ -1040,6 +1070,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
1040 1070
1041out: 1071out:
1042 release_sock(sk); 1072 release_sock(sk);
1073 sock_put(sk); /* sock_hold above */
1043 return rc; 1074 return rc;
1044} 1075}
1045 1076
@@ -1107,78 +1138,84 @@ out:
1107 return rc; 1138 return rc;
1108} 1139}
1109 1140
1110static unsigned int smc_accept_poll(struct sock *parent) 1141static __poll_t smc_accept_poll(struct sock *parent)
1111{ 1142{
1112 struct smc_sock *isk; 1143 struct smc_sock *isk = smc_sk(parent);
1113 struct sock *sk; 1144 __poll_t mask = 0;
1114 1145
1115 lock_sock(parent); 1146 spin_lock(&isk->accept_q_lock);
1116 list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) { 1147 if (!list_empty(&isk->accept_q))
1117 sk = (struct sock *)isk; 1148 mask = EPOLLIN | EPOLLRDNORM;
1149 spin_unlock(&isk->accept_q_lock);
1118 1150
1119 if (sk->sk_state == SMC_ACTIVE) { 1151 return mask;
1120 release_sock(parent);
1121 return POLLIN | POLLRDNORM;
1122 }
1123 }
1124 release_sock(parent);
1125
1126 return 0;
1127} 1152}
1128 1153
1129static unsigned int smc_poll(struct file *file, struct socket *sock, 1154static __poll_t smc_poll(struct file *file, struct socket *sock,
1130 poll_table *wait) 1155 poll_table *wait)
1131{ 1156{
1132 struct sock *sk = sock->sk; 1157 struct sock *sk = sock->sk;
1133 unsigned int mask = 0; 1158 __poll_t mask = 0;
1134 struct smc_sock *smc; 1159 struct smc_sock *smc;
1135 int rc; 1160 int rc;
1136 1161
1162 if (!sk)
1163 return EPOLLNVAL;
1164
1137 smc = smc_sk(sock->sk); 1165 smc = smc_sk(sock->sk);
1166 sock_hold(sk);
1167 lock_sock(sk);
1138 if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { 1168 if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
1139 /* delegate to CLC child sock */ 1169 /* delegate to CLC child sock */
1170 release_sock(sk);
1140 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); 1171 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
1141 /* if non-blocking connect finished ... */ 1172 /* if non-blocking connect finished ... */
1142 lock_sock(sk); 1173 lock_sock(sk);
1143 if ((sk->sk_state == SMC_INIT) && (mask & POLLOUT)) { 1174 if ((sk->sk_state == SMC_INIT) && (mask & EPOLLOUT)) {
1144 sk->sk_err = smc->clcsock->sk->sk_err; 1175 sk->sk_err = smc->clcsock->sk->sk_err;
1145 if (sk->sk_err) { 1176 if (sk->sk_err) {
1146 mask |= POLLERR; 1177 mask |= EPOLLERR;
1147 } else { 1178 } else {
1148 rc = smc_connect_rdma(smc); 1179 rc = smc_connect_rdma(smc);
1149 if (rc < 0) 1180 if (rc < 0)
1150 mask |= POLLERR; 1181 mask |= EPOLLERR;
1151 else 1182 /* success cases including fallback */
1152 /* success cases including fallback */ 1183 mask |= EPOLLOUT | EPOLLWRNORM;
1153 mask |= POLLOUT | POLLWRNORM;
1154 } 1184 }
1155 } 1185 }
1156 release_sock(sk);
1157 } else { 1186 } else {
1158 sock_poll_wait(file, sk_sleep(sk), wait); 1187 if (sk->sk_state != SMC_CLOSED) {
1159 if (sk->sk_state == SMC_LISTEN) 1188 release_sock(sk);
1160 /* woken up by sk_data_ready in smc_listen_work() */ 1189 sock_poll_wait(file, sk_sleep(sk), wait);
1161 mask |= smc_accept_poll(sk); 1190 lock_sock(sk);
1162 if (sk->sk_err)
1163 mask |= POLLERR;
1164 if (atomic_read(&smc->conn.sndbuf_space) ||
1165 (sk->sk_shutdown & SEND_SHUTDOWN)) {
1166 mask |= POLLOUT | POLLWRNORM;
1167 } else {
1168 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1169 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1170 } 1191 }
1171 if (atomic_read(&smc->conn.bytes_to_rcv)) 1192 if (sk->sk_err)
1172 mask |= POLLIN | POLLRDNORM; 1193 mask |= EPOLLERR;
1173 if ((sk->sk_shutdown == SHUTDOWN_MASK) || 1194 if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
1174 (sk->sk_state == SMC_CLOSED)) 1195 (sk->sk_state == SMC_CLOSED))
1175 mask |= POLLHUP; 1196 mask |= EPOLLHUP;
1176 if (sk->sk_shutdown & RCV_SHUTDOWN) 1197 if (sk->sk_state == SMC_LISTEN) {
1177 mask |= POLLIN | POLLRDNORM | POLLRDHUP; 1198 /* woken up by sk_data_ready in smc_listen_work() */
1178 if (sk->sk_state == SMC_APPCLOSEWAIT1) 1199 mask = smc_accept_poll(sk);
1179 mask |= POLLIN; 1200 } else {
1201 if (atomic_read(&smc->conn.sndbuf_space) ||
1202 sk->sk_shutdown & SEND_SHUTDOWN) {
1203 mask |= EPOLLOUT | EPOLLWRNORM;
1204 } else {
1205 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1206 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1207 }
1208 if (atomic_read(&smc->conn.bytes_to_rcv))
1209 mask |= EPOLLIN | EPOLLRDNORM;
1210 if (sk->sk_shutdown & RCV_SHUTDOWN)
1211 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
1212 if (sk->sk_state == SMC_APPCLOSEWAIT1)
1213 mask |= EPOLLIN;
1214 }
1180 1215
1181 } 1216 }
1217 release_sock(sk);
1218 sock_put(sk);
1182 1219
1183 return mask; 1220 return mask;
1184} 1221}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 0bee9d16cf29..9518986c97b1 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -178,7 +178,6 @@ struct smc_sock { /* smc sock container */
178 struct work_struct smc_listen_work;/* prepare new accept socket */ 178 struct work_struct smc_listen_work;/* prepare new accept socket */
179 struct list_head accept_q; /* sockets to be accepted */ 179 struct list_head accept_q; /* sockets to be accepted */
180 spinlock_t accept_q_lock; /* protects accept_q */ 180 spinlock_t accept_q_lock; /* protects accept_q */
181 struct delayed_work sock_put_work; /* final socket freeing */
182 bool use_fallback; /* fallback to tcp */ 181 bool use_fallback; /* fallback to tcp */
183 u8 wait_close_tx_prepared : 1; 182 u8 wait_close_tx_prepared : 1;
184 /* shutdown wr or close 183 /* shutdown wr or close
@@ -253,12 +252,12 @@ static inline int smc_uncompress_bufsize(u8 compressed)
253static inline bool using_ipsec(struct smc_sock *smc) 252static inline bool using_ipsec(struct smc_sock *smc)
254{ 253{
255 return (smc->clcsock->sk->sk_policy[0] || 254 return (smc->clcsock->sk->sk_policy[0] ||
256 smc->clcsock->sk->sk_policy[1]) ? 1 : 0; 255 smc->clcsock->sk->sk_policy[1]) ? true : false;
257} 256}
258#else 257#else
259static inline bool using_ipsec(struct smc_sock *smc) 258static inline bool using_ipsec(struct smc_sock *smc)
260{ 259{
261 return 0; 260 return false;
262} 261}
263#endif 262#endif
264 263
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 87f7bede6eab..3cd086e5bd28 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -57,9 +57,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
57 cdcpend->conn); 57 cdcpend->conn);
58 } 58 }
59 smc_tx_sndbuf_nonfull(smc); 59 smc_tx_sndbuf_nonfull(smc);
60 if (smc->sk.sk_state != SMC_ACTIVE)
61 /* wake up smc_close_wait_tx_pends() */
62 smc->sk.sk_state_change(&smc->sk);
63 bh_unlock_sock(&smc->sk); 60 bh_unlock_sock(&smc->sk);
64} 61}
65 62
@@ -68,9 +65,14 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
68 struct smc_cdc_tx_pend **pend) 65 struct smc_cdc_tx_pend **pend)
69{ 66{
70 struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; 67 struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
68 int rc;
71 69
72 return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, 70 rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
73 (struct smc_wr_tx_pend_priv **)pend); 71 (struct smc_wr_tx_pend_priv **)pend);
72 if (!conn->alert_token_local)
73 /* abnormal termination */
74 rc = -EPIPE;
75 return rc;
74} 76}
75 77
76static inline void smc_cdc_add_pending_send(struct smc_connection *conn, 78static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
@@ -155,14 +157,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
155 (unsigned long)conn); 157 (unsigned long)conn);
156} 158}
157 159
158bool smc_cdc_tx_has_pending(struct smc_connection *conn)
159{
160 struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
161
162 return smc_wr_tx_has_pending(link, SMC_CDC_MSG_TYPE,
163 smc_cdc_tx_filter, (unsigned long)conn);
164}
165
166/********************************* receive ***********************************/ 160/********************************* receive ***********************************/
167 161
168static inline bool smc_cdc_before(u16 seq1, u16 seq2) 162static inline bool smc_cdc_before(u16 seq1, u16 seq2)
@@ -213,6 +207,17 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
213 /* guarantee 0 <= bytes_to_rcv <= rmbe_size */ 207 /* guarantee 0 <= bytes_to_rcv <= rmbe_size */
214 smp_mb__after_atomic(); 208 smp_mb__after_atomic();
215 smc->sk.sk_data_ready(&smc->sk); 209 smc->sk.sk_data_ready(&smc->sk);
210 } else if ((conn->local_rx_ctrl.prod_flags.write_blocked) ||
211 (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req)) {
212 smc->sk.sk_data_ready(&smc->sk);
213 }
214
215 /* piggy backed tx info */
216 /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
217 if (diff_cons && smc_tx_prepared_sends(conn)) {
218 smc_tx_sndbuf_nonempty(conn);
219 /* trigger socket release if connection closed */
220 smc_close_wake_tx_prepared(smc);
216 } 221 }
217 222
218 if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) { 223 if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
@@ -224,25 +229,10 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
224 if (smc->clcsock && smc->clcsock->sk) 229 if (smc->clcsock && smc->clcsock->sk)
225 smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN; 230 smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
226 sock_set_flag(&smc->sk, SOCK_DONE); 231 sock_set_flag(&smc->sk, SOCK_DONE);
227 schedule_work(&conn->close_work); 232 sock_hold(&smc->sk); /* sock_put in close_work */
233 if (!schedule_work(&conn->close_work))
234 sock_put(&smc->sk);
228 } 235 }
229
230 /* piggy backed tx info */
231 /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
232 if (diff_cons && smc_tx_prepared_sends(conn)) {
233 smc_tx_sndbuf_nonempty(conn);
234 /* trigger socket release if connection closed */
235 smc_close_wake_tx_prepared(smc);
236 }
237
238 /* socket connected but not accepted */
239 if (!smc->sk.sk_socket)
240 return;
241
242 /* data available */
243 if ((conn->local_rx_ctrl.prod_flags.write_blocked) ||
244 (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req))
245 smc_tx_consumer_update(conn);
246} 236}
247 237
248/* called under tasklet context */ 238/* called under tasklet context */
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 149ceda1b088..ab240b37ad11 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -214,7 +214,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
214int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, 214int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
215 struct smc_cdc_tx_pend *pend); 215 struct smc_cdc_tx_pend *pend);
216int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); 216int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
217bool smc_cdc_tx_has_pending(struct smc_connection *conn);
218int smc_cdc_init(void) __init; 217int smc_cdc_init(void) __init;
219 218
220#endif /* SMC_CDC_H */ 219#endif /* SMC_CDC_H */
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 1800e16b2a02..8ac51583a063 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -22,6 +22,54 @@
22#include "smc_clc.h" 22#include "smc_clc.h"
23#include "smc_ib.h" 23#include "smc_ib.h"
24 24
25/* check if received message has a correct header length and contains valid
26 * heading and trailing eyecatchers
27 */
28static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
29{
30 struct smc_clc_msg_proposal_prefix *pclc_prfx;
31 struct smc_clc_msg_accept_confirm *clc;
32 struct smc_clc_msg_proposal *pclc;
33 struct smc_clc_msg_decline *dclc;
34 struct smc_clc_msg_trail *trl;
35
36 if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
37 return false;
38 switch (clcm->type) {
39 case SMC_CLC_PROPOSAL:
40 pclc = (struct smc_clc_msg_proposal *)clcm;
41 pclc_prfx = smc_clc_proposal_get_prefix(pclc);
42 if (ntohs(pclc->hdr.length) !=
43 sizeof(*pclc) + ntohs(pclc->iparea_offset) +
44 sizeof(*pclc_prfx) +
45 pclc_prfx->ipv6_prefixes_cnt *
46 sizeof(struct smc_clc_ipv6_prefix) +
47 sizeof(*trl))
48 return false;
49 trl = (struct smc_clc_msg_trail *)
50 ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl));
51 break;
52 case SMC_CLC_ACCEPT:
53 case SMC_CLC_CONFIRM:
54 clc = (struct smc_clc_msg_accept_confirm *)clcm;
55 if (ntohs(clc->hdr.length) != sizeof(*clc))
56 return false;
57 trl = &clc->trl;
58 break;
59 case SMC_CLC_DECLINE:
60 dclc = (struct smc_clc_msg_decline *)clcm;
61 if (ntohs(dclc->hdr.length) != sizeof(*dclc))
62 return false;
63 trl = &dclc->trl;
64 break;
65 default:
66 return false;
67 }
68 if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
69 return false;
70 return true;
71}
72
25/* Wait for data on the tcp-socket, analyze received data 73/* Wait for data on the tcp-socket, analyze received data
26 * Returns: 74 * Returns:
27 * 0 if success and it was not a decline that we received. 75 * 0 if success and it was not a decline that we received.
@@ -35,7 +83,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
35 struct smc_clc_msg_hdr *clcm = buf; 83 struct smc_clc_msg_hdr *clcm = buf;
36 struct msghdr msg = {NULL, 0}; 84 struct msghdr msg = {NULL, 0};
37 int reason_code = 0; 85 int reason_code = 0;
38 struct kvec vec; 86 struct kvec vec = {buf, buflen};
39 int len, datlen; 87 int len, datlen;
40 int krflags; 88 int krflags;
41 89
@@ -43,12 +91,15 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
43 * so we don't consume any subsequent CLC message or payload data 91 * so we don't consume any subsequent CLC message or payload data
44 * in the TCP byte stream 92 * in the TCP byte stream
45 */ 93 */
46 vec.iov_base = buf; 94 /*
47 vec.iov_len = buflen; 95 * Caller must make sure that buflen is no less than
96 * sizeof(struct smc_clc_msg_hdr)
97 */
48 krflags = MSG_PEEK | MSG_WAITALL; 98 krflags = MSG_PEEK | MSG_WAITALL;
49 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; 99 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
50 len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, 100 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1,
51 sizeof(struct smc_clc_msg_hdr), krflags); 101 sizeof(struct smc_clc_msg_hdr));
102 len = sock_recvmsg(smc->clcsock, &msg, krflags);
52 if (signal_pending(current)) { 103 if (signal_pending(current)) {
53 reason_code = -EINTR; 104 reason_code = -EINTR;
54 clc_sk->sk_err = EINTR; 105 clc_sk->sk_err = EINTR;
@@ -72,9 +123,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
72 } 123 }
73 datlen = ntohs(clcm->length); 124 datlen = ntohs(clcm->length);
74 if ((len < sizeof(struct smc_clc_msg_hdr)) || 125 if ((len < sizeof(struct smc_clc_msg_hdr)) ||
75 (datlen < sizeof(struct smc_clc_msg_decline)) || 126 (datlen > buflen) ||
76 (datlen > sizeof(struct smc_clc_msg_accept_confirm)) ||
77 memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) ||
78 ((clcm->type != SMC_CLC_DECLINE) && 127 ((clcm->type != SMC_CLC_DECLINE) &&
79 (clcm->type != expected_type))) { 128 (clcm->type != expected_type))) {
80 smc->sk.sk_err = EPROTO; 129 smc->sk.sk_err = EPROTO;
@@ -83,13 +132,12 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
83 } 132 }
84 133
85 /* receive the complete CLC message */ 134 /* receive the complete CLC message */
86 vec.iov_base = buf;
87 vec.iov_len = buflen;
88 memset(&msg, 0, sizeof(struct msghdr)); 135 memset(&msg, 0, sizeof(struct msghdr));
136 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, buflen);
89 krflags = MSG_WAITALL; 137 krflags = MSG_WAITALL;
90 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; 138 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
91 len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags); 139 len = sock_recvmsg(smc->clcsock, &msg, krflags);
92 if (len < datlen) { 140 if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
93 smc->sk.sk_err = EPROTO; 141 smc->sk.sk_err = EPROTO;
94 reason_code = -EPROTO; 142 reason_code = -EPROTO;
95 goto out; 143 goto out;
@@ -133,7 +181,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
133 smc->sk.sk_err = EPROTO; 181 smc->sk.sk_err = EPROTO;
134 if (len < 0) 182 if (len < 0)
135 smc->sk.sk_err = -len; 183 smc->sk.sk_err = -len;
136 return len; 184 return sock_error(&smc->sk);
137} 185}
138 186
139/* send CLC PROPOSAL message across internal TCP socket */ 187/* send CLC PROPOSAL message across internal TCP socket */
@@ -141,33 +189,43 @@ int smc_clc_send_proposal(struct smc_sock *smc,
141 struct smc_ib_device *smcibdev, 189 struct smc_ib_device *smcibdev,
142 u8 ibport) 190 u8 ibport)
143{ 191{
192 struct smc_clc_msg_proposal_prefix pclc_prfx;
144 struct smc_clc_msg_proposal pclc; 193 struct smc_clc_msg_proposal pclc;
194 struct smc_clc_msg_trail trl;
145 int reason_code = 0; 195 int reason_code = 0;
196 struct kvec vec[3];
146 struct msghdr msg; 197 struct msghdr msg;
147 struct kvec vec; 198 int len, plen, rc;
148 int len, rc;
149 199
150 /* send SMC Proposal CLC message */ 200 /* send SMC Proposal CLC message */
201 plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
151 memset(&pclc, 0, sizeof(pclc)); 202 memset(&pclc, 0, sizeof(pclc));
152 memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 203 memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
153 pclc.hdr.type = SMC_CLC_PROPOSAL; 204 pclc.hdr.type = SMC_CLC_PROPOSAL;
154 pclc.hdr.length = htons(sizeof(pclc)); 205 pclc.hdr.length = htons(plen);
155 pclc.hdr.version = SMC_CLC_V1; /* SMC version */ 206 pclc.hdr.version = SMC_CLC_V1; /* SMC version */
156 memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 207 memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
157 memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE); 208 memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
158 memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); 209 memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
210 pclc.iparea_offset = htons(0);
159 211
212 memset(&pclc_prfx, 0, sizeof(pclc_prfx));
160 /* determine subnet and mask from internal TCP socket */ 213 /* determine subnet and mask from internal TCP socket */
161 rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet, 214 rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
162 &pclc.prefix_len); 215 &pclc_prfx.prefix_len);
163 if (rc) 216 if (rc)
164 return SMC_CLC_DECL_CNFERR; /* configuration error */ 217 return SMC_CLC_DECL_CNFERR; /* configuration error */
165 memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 218 pclc_prfx.ipv6_prefixes_cnt = 0;
219 memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
166 memset(&msg, 0, sizeof(msg)); 220 memset(&msg, 0, sizeof(msg));
167 vec.iov_base = &pclc; 221 vec[0].iov_base = &pclc;
168 vec.iov_len = sizeof(pclc); 222 vec[0].iov_len = sizeof(pclc);
223 vec[1].iov_base = &pclc_prfx;
224 vec[1].iov_len = sizeof(pclc_prfx);
225 vec[2].iov_base = &trl;
226 vec[2].iov_len = sizeof(trl);
169 /* due to the few bytes needed for clc-handshake this cannot block */ 227 /* due to the few bytes needed for clc-handshake this cannot block */
170 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc)); 228 len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen);
171 if (len < sizeof(pclc)) { 229 if (len < sizeof(pclc)) {
172 if (len >= 0) { 230 if (len >= 0) {
173 reason_code = -ENETUNREACH; 231 reason_code = -ENETUNREACH;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 12a9af1539a2..c145a0f36a68 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -44,7 +44,7 @@ struct smc_clc_msg_hdr { /* header1 of clc messages */
44#if defined(__BIG_ENDIAN_BITFIELD) 44#if defined(__BIG_ENDIAN_BITFIELD)
45 u8 version : 4, 45 u8 version : 4,
46 flag : 1, 46 flag : 1,
47 rsvd : 3; 47 rsvd : 3;
48#elif defined(__LITTLE_ENDIAN_BITFIELD) 48#elif defined(__LITTLE_ENDIAN_BITFIELD)
49 u8 rsvd : 3, 49 u8 rsvd : 3,
50 flag : 1, 50 flag : 1,
@@ -62,17 +62,31 @@ struct smc_clc_msg_local { /* header2 of clc messages */
62 u8 mac[6]; /* mac of ib_device port */ 62 u8 mac[6]; /* mac of ib_device port */
63}; 63};
64 64
65struct smc_clc_msg_proposal { /* clc proposal message */ 65struct smc_clc_ipv6_prefix {
66 struct smc_clc_msg_hdr hdr; 66 u8 prefix[4];
67 struct smc_clc_msg_local lcl; 67 u8 prefix_len;
68 __be16 iparea_offset; /* offset to IP address information area */ 68} __packed;
69
70struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
69 __be32 outgoing_subnet; /* subnet mask */ 71 __be32 outgoing_subnet; /* subnet mask */
70 u8 prefix_len; /* number of significant bits in mask */ 72 u8 prefix_len; /* number of significant bits in mask */
71 u8 reserved[2]; 73 u8 reserved[2];
72 u8 ipv6_prefixes_cnt; /* number of IPv6 prefixes in prefix array */ 74 u8 ipv6_prefixes_cnt; /* number of IPv6 prefixes in prefix array */
73 struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */
74} __aligned(4); 75} __aligned(4);
75 76
77struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
78 struct smc_clc_msg_hdr hdr;
79 struct smc_clc_msg_local lcl;
80 __be16 iparea_offset; /* offset to IP address information area */
81} __aligned(4);
82
83#define SMC_CLC_PROPOSAL_MAX_OFFSET 0x28
84#define SMC_CLC_PROPOSAL_MAX_PREFIX (8 * sizeof(struct smc_clc_ipv6_prefix))
85#define SMC_CLC_MAX_LEN (sizeof(struct smc_clc_msg_proposal) + \
86 SMC_CLC_PROPOSAL_MAX_OFFSET + \
87 SMC_CLC_PROPOSAL_MAX_PREFIX + \
88 sizeof(struct smc_clc_msg_trail))
89
76struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ 90struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
77 struct smc_clc_msg_hdr hdr; 91 struct smc_clc_msg_hdr hdr;
78 struct smc_clc_msg_local lcl; 92 struct smc_clc_msg_local lcl;
@@ -102,6 +116,14 @@ struct smc_clc_msg_decline { /* clc decline message */
102 struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */ 116 struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */
103} __aligned(4); 117} __aligned(4);
104 118
119/* determine start of the prefix area within the proposal message */
120static inline struct smc_clc_msg_proposal_prefix *
121smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
122{
123 return (struct smc_clc_msg_proposal_prefix *)
124 ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
125}
126
105struct smc_sock; 127struct smc_sock;
106struct smc_ib_device; 128struct smc_ib_device;
107 129
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 48615d2ac4aa..e339c0186dcf 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -19,7 +19,7 @@
19#include "smc_cdc.h" 19#include "smc_cdc.h"
20#include "smc_close.h" 20#include "smc_close.h"
21 21
22#define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ) 22#define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ)
23 23
24static void smc_close_cleanup_listen(struct sock *parent) 24static void smc_close_cleanup_listen(struct sock *parent)
25{ 25{
@@ -30,23 +30,24 @@ static void smc_close_cleanup_listen(struct sock *parent)
30 smc_close_non_accepted(sk); 30 smc_close_non_accepted(sk);
31} 31}
32 32
33static void smc_close_wait_tx_pends(struct smc_sock *smc) 33static void smc_close_wait_listen_clcsock(struct smc_sock *smc)
34{ 34{
35 DEFINE_WAIT_FUNC(wait, woken_wake_function); 35 DEFINE_WAIT_FUNC(wait, woken_wake_function);
36 struct sock *sk = &smc->sk; 36 struct sock *sk = &smc->sk;
37 signed long timeout; 37 signed long timeout;
38 38
39 timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME; 39 timeout = SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME;
40 add_wait_queue(sk_sleep(sk), &wait); 40 add_wait_queue(sk_sleep(sk), &wait);
41 while (!signal_pending(current) && timeout) { 41 do {
42 int rc; 42 release_sock(sk);
43 43 if (smc->clcsock)
44 rc = sk_wait_event(sk, &timeout, 44 timeout = wait_woken(&wait, TASK_UNINTERRUPTIBLE,
45 !smc_cdc_tx_has_pending(&smc->conn), 45 timeout);
46 &wait); 46 sched_annotate_sleep();
47 if (rc) 47 lock_sock(sk);
48 if (!smc->clcsock)
48 break; 49 break;
49 } 50 } while (timeout);
50 remove_wait_queue(sk_sleep(sk), &wait); 51 remove_wait_queue(sk_sleep(sk), &wait);
51} 52}
52 53
@@ -111,58 +112,63 @@ static int smc_close_abort(struct smc_connection *conn)
111} 112}
112 113
113/* terminate smc socket abnormally - active abort 114/* terminate smc socket abnormally - active abort
114 * RDMA communication no longer possible 115 * link group is terminated, i.e. RDMA communication no longer possible
115 */ 116 */
116void smc_close_active_abort(struct smc_sock *smc) 117static void smc_close_active_abort(struct smc_sock *smc)
117{ 118{
119 struct sock *sk = &smc->sk;
120
118 struct smc_cdc_conn_state_flags *txflags = 121 struct smc_cdc_conn_state_flags *txflags =
119 &smc->conn.local_tx_ctrl.conn_state_flags; 122 &smc->conn.local_tx_ctrl.conn_state_flags;
120 123
121 smc->sk.sk_err = ECONNABORTED; 124 sk->sk_err = ECONNABORTED;
122 if (smc->clcsock && smc->clcsock->sk) { 125 if (smc->clcsock && smc->clcsock->sk) {
123 smc->clcsock->sk->sk_err = ECONNABORTED; 126 smc->clcsock->sk->sk_err = ECONNABORTED;
124 smc->clcsock->sk->sk_state_change(smc->clcsock->sk); 127 smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
125 } 128 }
126 switch (smc->sk.sk_state) { 129 switch (sk->sk_state) {
127 case SMC_INIT: 130 case SMC_INIT:
128 case SMC_ACTIVE: 131 case SMC_ACTIVE:
129 smc->sk.sk_state = SMC_PEERABORTWAIT; 132 sk->sk_state = SMC_PEERABORTWAIT;
133 release_sock(sk);
134 cancel_delayed_work_sync(&smc->conn.tx_work);
135 lock_sock(sk);
136 sock_put(sk); /* passive closing */
130 break; 137 break;
131 case SMC_APPCLOSEWAIT1: 138 case SMC_APPCLOSEWAIT1:
132 case SMC_APPCLOSEWAIT2: 139 case SMC_APPCLOSEWAIT2:
133 txflags->peer_conn_abort = 1;
134 sock_release(smc->clcsock);
135 if (!smc_cdc_rxed_any_close(&smc->conn)) 140 if (!smc_cdc_rxed_any_close(&smc->conn))
136 smc->sk.sk_state = SMC_PEERABORTWAIT; 141 sk->sk_state = SMC_PEERABORTWAIT;
137 else 142 else
138 smc->sk.sk_state = SMC_CLOSED; 143 sk->sk_state = SMC_CLOSED;
144 release_sock(sk);
145 cancel_delayed_work_sync(&smc->conn.tx_work);
146 lock_sock(sk);
139 break; 147 break;
140 case SMC_PEERCLOSEWAIT1: 148 case SMC_PEERCLOSEWAIT1:
141 case SMC_PEERCLOSEWAIT2: 149 case SMC_PEERCLOSEWAIT2:
142 if (!txflags->peer_conn_closed) { 150 if (!txflags->peer_conn_closed) {
143 smc->sk.sk_state = SMC_PEERABORTWAIT; 151 /* just SHUTDOWN_SEND done */
144 txflags->peer_conn_abort = 1; 152 sk->sk_state = SMC_PEERABORTWAIT;
145 sock_release(smc->clcsock);
146 } else { 153 } else {
147 smc->sk.sk_state = SMC_CLOSED; 154 sk->sk_state = SMC_CLOSED;
148 } 155 }
156 sock_put(sk); /* passive closing */
149 break; 157 break;
150 case SMC_PROCESSABORT: 158 case SMC_PROCESSABORT:
151 case SMC_APPFINCLOSEWAIT: 159 case SMC_APPFINCLOSEWAIT:
152 if (!txflags->peer_conn_closed) { 160 sk->sk_state = SMC_CLOSED;
153 txflags->peer_conn_abort = 1;
154 sock_release(smc->clcsock);
155 }
156 smc->sk.sk_state = SMC_CLOSED;
157 break; 161 break;
158 case SMC_PEERFINCLOSEWAIT: 162 case SMC_PEERFINCLOSEWAIT:
163 sock_put(sk); /* passive closing */
164 break;
159 case SMC_PEERABORTWAIT: 165 case SMC_PEERABORTWAIT:
160 case SMC_CLOSED: 166 case SMC_CLOSED:
161 break; 167 break;
162 } 168 }
163 169
164 sock_set_flag(&smc->sk, SOCK_DEAD); 170 sock_set_flag(sk, SOCK_DEAD);
165 smc->sk.sk_state_change(&smc->sk); 171 sk->sk_state_change(sk);
166} 172}
167 173
168static inline bool smc_close_sent_any_close(struct smc_connection *conn) 174static inline bool smc_close_sent_any_close(struct smc_connection *conn)
@@ -185,13 +191,11 @@ int smc_close_active(struct smc_sock *smc)
185 0 : sock_flag(sk, SOCK_LINGER) ? 191 0 : sock_flag(sk, SOCK_LINGER) ?
186 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 192 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
187 193
188again:
189 old_state = sk->sk_state; 194 old_state = sk->sk_state;
190 switch (old_state) { 195again:
196 switch (sk->sk_state) {
191 case SMC_INIT: 197 case SMC_INIT:
192 sk->sk_state = SMC_CLOSED; 198 sk->sk_state = SMC_CLOSED;
193 if (smc->smc_listen_work.func)
194 cancel_work_sync(&smc->smc_listen_work);
195 break; 199 break;
196 case SMC_LISTEN: 200 case SMC_LISTEN:
197 sk->sk_state = SMC_CLOSED; 201 sk->sk_state = SMC_CLOSED;
@@ -200,11 +204,9 @@ again:
200 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 204 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
201 /* wake up kernel_accept of smc_tcp_listen_worker */ 205 /* wake up kernel_accept of smc_tcp_listen_worker */
202 smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); 206 smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
207 smc_close_wait_listen_clcsock(smc);
203 } 208 }
204 release_sock(sk);
205 smc_close_cleanup_listen(sk); 209 smc_close_cleanup_listen(sk);
206 cancel_work_sync(&smc->smc_listen_work);
207 lock_sock(sk);
208 break; 210 break;
209 case SMC_ACTIVE: 211 case SMC_ACTIVE:
210 smc_close_stream_wait(smc, timeout); 212 smc_close_stream_wait(smc, timeout);
@@ -214,6 +216,8 @@ again:
214 if (sk->sk_state == SMC_ACTIVE) { 216 if (sk->sk_state == SMC_ACTIVE) {
215 /* send close request */ 217 /* send close request */
216 rc = smc_close_final(conn); 218 rc = smc_close_final(conn);
219 if (rc)
220 break;
217 sk->sk_state = SMC_PEERCLOSEWAIT1; 221 sk->sk_state = SMC_PEERCLOSEWAIT1;
218 } else { 222 } else {
219 /* peer event has changed the state */ 223 /* peer event has changed the state */
@@ -226,9 +230,10 @@ again:
226 !smc_close_sent_any_close(conn)) { 230 !smc_close_sent_any_close(conn)) {
227 /* just shutdown wr done, send close request */ 231 /* just shutdown wr done, send close request */
228 rc = smc_close_final(conn); 232 rc = smc_close_final(conn);
233 if (rc)
234 break;
229 } 235 }
230 sk->sk_state = SMC_CLOSED; 236 sk->sk_state = SMC_CLOSED;
231 smc_close_wait_tx_pends(smc);
232 break; 237 break;
233 case SMC_APPCLOSEWAIT1: 238 case SMC_APPCLOSEWAIT1:
234 case SMC_APPCLOSEWAIT2: 239 case SMC_APPCLOSEWAIT2:
@@ -237,19 +242,21 @@ again:
237 release_sock(sk); 242 release_sock(sk);
238 cancel_delayed_work_sync(&conn->tx_work); 243 cancel_delayed_work_sync(&conn->tx_work);
239 lock_sock(sk); 244 lock_sock(sk);
240 if (sk->sk_err != ECONNABORTED) { 245 if (sk->sk_state != SMC_APPCLOSEWAIT1 &&
241 /* confirm close from peer */ 246 sk->sk_state != SMC_APPCLOSEWAIT2)
242 rc = smc_close_final(conn); 247 goto again;
243 if (rc) 248 /* confirm close from peer */
244 break; 249 rc = smc_close_final(conn);
245 } 250 if (rc)
246 if (smc_cdc_rxed_any_close(conn)) 251 break;
252 if (smc_cdc_rxed_any_close(conn)) {
247 /* peer has closed the socket already */ 253 /* peer has closed the socket already */
248 sk->sk_state = SMC_CLOSED; 254 sk->sk_state = SMC_CLOSED;
249 else 255 sock_put(sk); /* postponed passive closing */
256 } else {
250 /* peer has just issued a shutdown write */ 257 /* peer has just issued a shutdown write */
251 sk->sk_state = SMC_PEERFINCLOSEWAIT; 258 sk->sk_state = SMC_PEERFINCLOSEWAIT;
252 smc_close_wait_tx_pends(smc); 259 }
253 break; 260 break;
254 case SMC_PEERCLOSEWAIT1: 261 case SMC_PEERCLOSEWAIT1:
255 case SMC_PEERCLOSEWAIT2: 262 case SMC_PEERCLOSEWAIT2:
@@ -257,6 +264,8 @@ again:
257 !smc_close_sent_any_close(conn)) { 264 !smc_close_sent_any_close(conn)) {
258 /* just shutdown wr done, send close request */ 265 /* just shutdown wr done, send close request */
259 rc = smc_close_final(conn); 266 rc = smc_close_final(conn);
267 if (rc)
268 break;
260 } 269 }
261 /* peer sending PeerConnectionClosed will cause transition */ 270 /* peer sending PeerConnectionClosed will cause transition */
262 break; 271 break;
@@ -264,12 +273,8 @@ again:
264 /* peer sending PeerConnectionClosed will cause transition */ 273 /* peer sending PeerConnectionClosed will cause transition */
265 break; 274 break;
266 case SMC_PROCESSABORT: 275 case SMC_PROCESSABORT:
267 release_sock(sk);
268 cancel_delayed_work_sync(&conn->tx_work);
269 lock_sock(sk);
270 smc_close_abort(conn); 276 smc_close_abort(conn);
271 sk->sk_state = SMC_CLOSED; 277 sk->sk_state = SMC_CLOSED;
272 smc_close_wait_tx_pends(smc);
273 break; 278 break;
274 case SMC_PEERABORTWAIT: 279 case SMC_PEERABORTWAIT:
275 case SMC_CLOSED: 280 case SMC_CLOSED:
@@ -278,7 +283,7 @@ again:
278 } 283 }
279 284
280 if (old_state != sk->sk_state) 285 if (old_state != sk->sk_state)
281 sk->sk_state_change(&smc->sk); 286 sk->sk_state_change(sk);
282 return rc; 287 return rc;
283} 288}
284 289
@@ -289,37 +294,42 @@ static void smc_close_passive_abort_received(struct smc_sock *smc)
289 struct sock *sk = &smc->sk; 294 struct sock *sk = &smc->sk;
290 295
291 switch (sk->sk_state) { 296 switch (sk->sk_state) {
297 case SMC_INIT:
292 case SMC_ACTIVE: 298 case SMC_ACTIVE:
293 case SMC_APPFINCLOSEWAIT:
294 case SMC_APPCLOSEWAIT1: 299 case SMC_APPCLOSEWAIT1:
295 case SMC_APPCLOSEWAIT2: 300 sk->sk_state = SMC_PROCESSABORT;
296 smc_close_abort(&smc->conn); 301 sock_put(sk); /* passive closing */
302 break;
303 case SMC_APPFINCLOSEWAIT:
297 sk->sk_state = SMC_PROCESSABORT; 304 sk->sk_state = SMC_PROCESSABORT;
298 break; 305 break;
299 case SMC_PEERCLOSEWAIT1: 306 case SMC_PEERCLOSEWAIT1:
300 case SMC_PEERCLOSEWAIT2: 307 case SMC_PEERCLOSEWAIT2:
301 if (txflags->peer_done_writing && 308 if (txflags->peer_done_writing &&
302 !smc_close_sent_any_close(&smc->conn)) { 309 !smc_close_sent_any_close(&smc->conn))
303 /* just shutdown, but not yet closed locally */ 310 /* just shutdown, but not yet closed locally */
304 smc_close_abort(&smc->conn);
305 sk->sk_state = SMC_PROCESSABORT; 311 sk->sk_state = SMC_PROCESSABORT;
306 } else { 312 else
307 sk->sk_state = SMC_CLOSED; 313 sk->sk_state = SMC_CLOSED;
308 } 314 sock_put(sk); /* passive closing */
309 break; 315 break;
316 case SMC_APPCLOSEWAIT2:
310 case SMC_PEERFINCLOSEWAIT: 317 case SMC_PEERFINCLOSEWAIT:
318 sk->sk_state = SMC_CLOSED;
319 sock_put(sk); /* passive closing */
320 break;
311 case SMC_PEERABORTWAIT: 321 case SMC_PEERABORTWAIT:
312 sk->sk_state = SMC_CLOSED; 322 sk->sk_state = SMC_CLOSED;
313 break; 323 break;
314 case SMC_INIT:
315 case SMC_PROCESSABORT: 324 case SMC_PROCESSABORT:
316 /* nothing to do, add tracing in future patch */ 325 /* nothing to do, add tracing in future patch */
317 break; 326 break;
318 } 327 }
319} 328}
320 329
321/* Some kind of closing has been received: peer_conn_closed, peer_conn_abort, 330/* Either some kind of closing has been received: peer_conn_closed,
322 * or peer_done_writing. 331 * peer_conn_abort, or peer_done_writing
332 * or the link group of the connection terminates abnormally.
323 */ 333 */
324static void smc_close_passive_work(struct work_struct *work) 334static void smc_close_passive_work(struct work_struct *work)
325{ 335{
@@ -331,7 +341,7 @@ static void smc_close_passive_work(struct work_struct *work)
331 struct sock *sk = &smc->sk; 341 struct sock *sk = &smc->sk;
332 int old_state; 342 int old_state;
333 343
334 lock_sock(&smc->sk); 344 lock_sock(sk);
335 old_state = sk->sk_state; 345 old_state = sk->sk_state;
336 346
337 if (!conn->alert_token_local) { 347 if (!conn->alert_token_local) {
@@ -340,23 +350,32 @@ static void smc_close_passive_work(struct work_struct *work)
340 goto wakeup; 350 goto wakeup;
341 } 351 }
342 352
343 rxflags = &smc->conn.local_rx_ctrl.conn_state_flags; 353 rxflags = &conn->local_rx_ctrl.conn_state_flags;
344 if (rxflags->peer_conn_abort) { 354 if (rxflags->peer_conn_abort) {
355 /* peer has not received all data */
345 smc_close_passive_abort_received(smc); 356 smc_close_passive_abort_received(smc);
357 release_sock(&smc->sk);
358 cancel_delayed_work_sync(&conn->tx_work);
359 lock_sock(&smc->sk);
346 goto wakeup; 360 goto wakeup;
347 } 361 }
348 362
349 switch (sk->sk_state) { 363 switch (sk->sk_state) {
350 case SMC_INIT: 364 case SMC_INIT:
351 if (atomic_read(&smc->conn.bytes_to_rcv) || 365 if (atomic_read(&conn->bytes_to_rcv) ||
352 (rxflags->peer_done_writing && 366 (rxflags->peer_done_writing &&
353 !smc_cdc_rxed_any_close(conn))) 367 !smc_cdc_rxed_any_close(conn))) {
354 sk->sk_state = SMC_APPCLOSEWAIT1; 368 sk->sk_state = SMC_APPCLOSEWAIT1;
355 else 369 } else {
356 sk->sk_state = SMC_CLOSED; 370 sk->sk_state = SMC_CLOSED;
371 sock_put(sk); /* passive closing */
372 }
357 break; 373 break;
358 case SMC_ACTIVE: 374 case SMC_ACTIVE:
359 sk->sk_state = SMC_APPCLOSEWAIT1; 375 sk->sk_state = SMC_APPCLOSEWAIT1;
376 /* postpone sock_put() for passive closing to cover
377 * received SEND_SHUTDOWN as well
378 */
360 break; 379 break;
361 case SMC_PEERCLOSEWAIT1: 380 case SMC_PEERCLOSEWAIT1:
362 if (rxflags->peer_done_writing) 381 if (rxflags->peer_done_writing)
@@ -364,8 +383,7 @@ static void smc_close_passive_work(struct work_struct *work)
364 /* fall through */ 383 /* fall through */
365 /* to check for closing */ 384 /* to check for closing */
366 case SMC_PEERCLOSEWAIT2: 385 case SMC_PEERCLOSEWAIT2:
367 case SMC_PEERFINCLOSEWAIT: 386 if (!smc_cdc_rxed_any_close(conn))
368 if (!smc_cdc_rxed_any_close(&smc->conn))
369 break; 387 break;
370 if (sock_flag(sk, SOCK_DEAD) && 388 if (sock_flag(sk, SOCK_DEAD) &&
371 smc_close_sent_any_close(conn)) { 389 smc_close_sent_any_close(conn)) {
@@ -375,9 +393,20 @@ static void smc_close_passive_work(struct work_struct *work)
375 /* just shutdown, but not yet closed locally */ 393 /* just shutdown, but not yet closed locally */
376 sk->sk_state = SMC_APPFINCLOSEWAIT; 394 sk->sk_state = SMC_APPFINCLOSEWAIT;
377 } 395 }
396 sock_put(sk); /* passive closing */
397 break;
398 case SMC_PEERFINCLOSEWAIT:
399 if (smc_cdc_rxed_any_close(conn)) {
400 sk->sk_state = SMC_CLOSED;
401 sock_put(sk); /* passive closing */
402 }
378 break; 403 break;
379 case SMC_APPCLOSEWAIT1: 404 case SMC_APPCLOSEWAIT1:
380 case SMC_APPCLOSEWAIT2: 405 case SMC_APPCLOSEWAIT2:
406 /* postpone sock_put() for passive closing to cover
407 * received SEND_SHUTDOWN as well
408 */
409 break;
381 case SMC_APPFINCLOSEWAIT: 410 case SMC_APPFINCLOSEWAIT:
382 case SMC_PEERABORTWAIT: 411 case SMC_PEERABORTWAIT:
383 case SMC_PROCESSABORT: 412 case SMC_PROCESSABORT:
@@ -393,23 +422,11 @@ wakeup:
393 if (old_state != sk->sk_state) { 422 if (old_state != sk->sk_state) {
394 sk->sk_state_change(sk); 423 sk->sk_state_change(sk);
395 if ((sk->sk_state == SMC_CLOSED) && 424 if ((sk->sk_state == SMC_CLOSED) &&
396 (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { 425 (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket))
397 smc_conn_free(&smc->conn); 426 smc_conn_free(conn);
398 schedule_delayed_work(&smc->sock_put_work,
399 SMC_CLOSE_SOCK_PUT_DELAY);
400 }
401 } 427 }
402 release_sock(&smc->sk); 428 release_sock(sk);
403} 429 sock_put(sk); /* sock_hold done by schedulers of close_work */
404
405void smc_close_sock_put_work(struct work_struct *work)
406{
407 struct smc_sock *smc = container_of(to_delayed_work(work),
408 struct smc_sock,
409 sock_put_work);
410
411 smc->sk.sk_prot->unhash(&smc->sk);
412 sock_put(&smc->sk);
413} 430}
414 431
415int smc_close_shutdown_write(struct smc_sock *smc) 432int smc_close_shutdown_write(struct smc_sock *smc)
@@ -424,20 +441,21 @@ int smc_close_shutdown_write(struct smc_sock *smc)
424 0 : sock_flag(sk, SOCK_LINGER) ? 441 0 : sock_flag(sk, SOCK_LINGER) ?
425 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 442 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
426 443
427again:
428 old_state = sk->sk_state; 444 old_state = sk->sk_state;
429 switch (old_state) { 445again:
446 switch (sk->sk_state) {
430 case SMC_ACTIVE: 447 case SMC_ACTIVE:
431 smc_close_stream_wait(smc, timeout); 448 smc_close_stream_wait(smc, timeout);
432 release_sock(sk); 449 release_sock(sk);
433 cancel_delayed_work_sync(&conn->tx_work); 450 cancel_delayed_work_sync(&conn->tx_work);
434 lock_sock(sk); 451 lock_sock(sk);
452 if (sk->sk_state != SMC_ACTIVE)
453 goto again;
435 /* send close wr request */ 454 /* send close wr request */
436 rc = smc_close_wr(conn); 455 rc = smc_close_wr(conn);
437 if (sk->sk_state == SMC_ACTIVE) 456 if (rc)
438 sk->sk_state = SMC_PEERCLOSEWAIT1; 457 break;
439 else 458 sk->sk_state = SMC_PEERCLOSEWAIT1;
440 goto again;
441 break; 459 break;
442 case SMC_APPCLOSEWAIT1: 460 case SMC_APPCLOSEWAIT1:
443 /* passive close */ 461 /* passive close */
@@ -446,8 +464,12 @@ again:
446 release_sock(sk); 464 release_sock(sk);
447 cancel_delayed_work_sync(&conn->tx_work); 465 cancel_delayed_work_sync(&conn->tx_work);
448 lock_sock(sk); 466 lock_sock(sk);
467 if (sk->sk_state != SMC_APPCLOSEWAIT1)
468 goto again;
449 /* confirm close from peer */ 469 /* confirm close from peer */
450 rc = smc_close_wr(conn); 470 rc = smc_close_wr(conn);
471 if (rc)
472 break;
451 sk->sk_state = SMC_APPCLOSEWAIT2; 473 sk->sk_state = SMC_APPCLOSEWAIT2;
452 break; 474 break;
453 case SMC_APPCLOSEWAIT2: 475 case SMC_APPCLOSEWAIT2:
@@ -462,7 +484,7 @@ again:
462 } 484 }
463 485
464 if (old_state != sk->sk_state) 486 if (old_state != sk->sk_state)
465 sk->sk_state_change(&smc->sk); 487 sk->sk_state_change(sk);
466 return rc; 488 return rc;
467} 489}
468 490
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index ed82506b1b0a..19eb6a211c23 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -20,9 +20,7 @@
20#define SMC_CLOSE_SOCK_PUT_DELAY HZ 20#define SMC_CLOSE_SOCK_PUT_DELAY HZ
21 21
22void smc_close_wake_tx_prepared(struct smc_sock *smc); 22void smc_close_wake_tx_prepared(struct smc_sock *smc);
23void smc_close_active_abort(struct smc_sock *smc);
24int smc_close_active(struct smc_sock *smc); 23int smc_close_active(struct smc_sock *smc);
25void smc_close_sock_put_work(struct work_struct *work);
26int smc_close_shutdown_write(struct smc_sock *smc); 24int smc_close_shutdown_write(struct smc_sock *smc);
27void smc_close_init(struct smc_sock *smc); 25void smc_close_init(struct smc_sock *smc);
28 26
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 94f21116dac5..2424c7100aaf 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -128,6 +128,8 @@ static void smc_lgr_free_work(struct work_struct *work)
128 bool conns; 128 bool conns;
129 129
130 spin_lock_bh(&smc_lgr_list.lock); 130 spin_lock_bh(&smc_lgr_list.lock);
131 if (list_empty(&lgr->list))
132 goto free;
131 read_lock_bh(&lgr->conns_lock); 133 read_lock_bh(&lgr->conns_lock);
132 conns = RB_EMPTY_ROOT(&lgr->conns_all); 134 conns = RB_EMPTY_ROOT(&lgr->conns_all);
133 read_unlock_bh(&lgr->conns_lock); 135 read_unlock_bh(&lgr->conns_lock);
@@ -136,6 +138,7 @@ static void smc_lgr_free_work(struct work_struct *work)
136 return; 138 return;
137 } 139 }
138 list_del_init(&lgr->list); /* remove from smc_lgr_list */ 140 list_del_init(&lgr->list); /* remove from smc_lgr_list */
141free:
139 spin_unlock_bh(&smc_lgr_list.lock); 142 spin_unlock_bh(&smc_lgr_list.lock);
140 smc_lgr_free(lgr); 143 smc_lgr_free(lgr);
141} 144}
@@ -231,9 +234,7 @@ static void smc_buf_unuse(struct smc_connection *conn)
231/* remove a finished connection from its link group */ 234/* remove a finished connection from its link group */
232void smc_conn_free(struct smc_connection *conn) 235void smc_conn_free(struct smc_connection *conn)
233{ 236{
234 struct smc_link_group *lgr = conn->lgr; 237 if (!conn->lgr)
235
236 if (!lgr)
237 return; 238 return;
238 smc_cdc_tx_dismiss_slots(conn); 239 smc_cdc_tx_dismiss_slots(conn);
239 smc_lgr_unregister_conn(conn); 240 smc_lgr_unregister_conn(conn);
@@ -327,13 +328,17 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
327 while (node) { 328 while (node) {
328 conn = rb_entry(node, struct smc_connection, alert_node); 329 conn = rb_entry(node, struct smc_connection, alert_node);
329 smc = container_of(conn, struct smc_sock, conn); 330 smc = container_of(conn, struct smc_sock, conn);
330 sock_hold(&smc->sk); 331 sock_hold(&smc->sk); /* sock_put in close work */
332 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
331 __smc_lgr_unregister_conn(conn); 333 __smc_lgr_unregister_conn(conn);
332 schedule_work(&conn->close_work); 334 write_unlock_bh(&lgr->conns_lock);
333 sock_put(&smc->sk); 335 if (!schedule_work(&conn->close_work))
336 sock_put(&smc->sk);
337 write_lock_bh(&lgr->conns_lock);
334 node = rb_first(&lgr->conns_all); 338 node = rb_first(&lgr->conns_all);
335 } 339 }
336 write_unlock_bh(&lgr->conns_lock); 340 write_unlock_bh(&lgr->conns_lock);
341 wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
337} 342}
338 343
339/* Determine vlan of internal TCP socket. 344/* Determine vlan of internal TCP socket.
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index d2d01cf70224..427b91c1c964 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -86,7 +86,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
86 if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns)) 86 if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
87 goto errout; 87 goto errout;
88 88
89 if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) { 89 if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) &&
90 smc->conn.alert_token_local) {
90 struct smc_connection *conn = &smc->conn; 91 struct smc_connection *conn = &smc->conn;
91 struct smc_diag_conninfo cinfo = { 92 struct smc_diag_conninfo cinfo = {
92 .token = conn->alert_token_local, 93 .token = conn->alert_token_local,
@@ -124,7 +125,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
124 goto errout; 125 goto errout;
125 } 126 }
126 127
127 if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) { 128 if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr &&
129 !list_empty(&smc->conn.lgr->list)) {
128 struct smc_diag_lgrinfo linfo = { 130 struct smc_diag_lgrinfo linfo = {
129 .role = smc->conn.lgr->role, 131 .role = smc->conn.lgr->role,
130 .lnk[0].ibport = smc->conn.lgr->lnk[0].ibport, 132 .lnk[0].ibport = smc->conn.lgr->lnk[0].ibport,
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 90f1a7f9085c..2a8957bd6d38 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -141,6 +141,17 @@ out:
141 return rc; 141 return rc;
142} 142}
143 143
144static void smc_ib_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
145{
146 struct smc_link_group *lgr, *l;
147
148 list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
149 if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
150 lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
151 smc_lgr_terminate(lgr);
152 }
153}
154
144/* process context wrapper for might_sleep smc_ib_remember_port_attr */ 155/* process context wrapper for might_sleep smc_ib_remember_port_attr */
145static void smc_ib_port_event_work(struct work_struct *work) 156static void smc_ib_port_event_work(struct work_struct *work)
146{ 157{
@@ -151,6 +162,8 @@ static void smc_ib_port_event_work(struct work_struct *work)
151 for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) { 162 for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) {
152 smc_ib_remember_port_attr(smcibdev, port_idx + 1); 163 smc_ib_remember_port_attr(smcibdev, port_idx + 1);
153 clear_bit(port_idx, &smcibdev->port_event_mask); 164 clear_bit(port_idx, &smcibdev->port_event_mask);
165 if (!smc_ib_port_active(smcibdev, port_idx + 1))
166 smc_ib_port_terminate(smcibdev, port_idx + 1);
154 } 167 }
155} 168}
156 169
@@ -165,15 +178,7 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
165 178
166 switch (ibevent->event) { 179 switch (ibevent->event) {
167 case IB_EVENT_PORT_ERR: 180 case IB_EVENT_PORT_ERR:
168 port_idx = ibevent->element.port_num - 1;
169 set_bit(port_idx, &smcibdev->port_event_mask);
170 schedule_work(&smcibdev->port_event_work);
171 /* fall through */
172 case IB_EVENT_DEVICE_FATAL: 181 case IB_EVENT_DEVICE_FATAL:
173 /* tbd in follow-on patch:
174 * abnormal close of corresponding connections
175 */
176 break;
177 case IB_EVENT_PORT_ACTIVE: 182 case IB_EVENT_PORT_ACTIVE:
178 port_idx = ibevent->element.port_num - 1; 183 port_idx = ibevent->element.port_num - 1;
179 set_bit(port_idx, &smcibdev->port_event_mask); 184 set_bit(port_idx, &smcibdev->port_event_mask);
@@ -186,7 +191,8 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
186 191
187void smc_ib_dealloc_protection_domain(struct smc_link *lnk) 192void smc_ib_dealloc_protection_domain(struct smc_link *lnk)
188{ 193{
189 ib_dealloc_pd(lnk->roce_pd); 194 if (lnk->roce_pd)
195 ib_dealloc_pd(lnk->roce_pd);
190 lnk->roce_pd = NULL; 196 lnk->roce_pd = NULL;
191} 197}
192 198
@@ -203,14 +209,18 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
203 209
204static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) 210static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
205{ 211{
212 struct smc_ib_device *smcibdev =
213 (struct smc_ib_device *)ibevent->device;
214 u8 port_idx;
215
206 switch (ibevent->event) { 216 switch (ibevent->event) {
207 case IB_EVENT_DEVICE_FATAL: 217 case IB_EVENT_DEVICE_FATAL:
208 case IB_EVENT_GID_CHANGE: 218 case IB_EVENT_GID_CHANGE:
209 case IB_EVENT_PORT_ERR: 219 case IB_EVENT_PORT_ERR:
210 case IB_EVENT_QP_ACCESS_ERR: 220 case IB_EVENT_QP_ACCESS_ERR:
211 /* tbd in follow-on patch: 221 port_idx = ibevent->element.port_num - 1;
212 * abnormal close of corresponding connections 222 set_bit(port_idx, &smcibdev->port_event_mask);
213 */ 223 schedule_work(&smcibdev->port_event_work);
214 break; 224 break;
215 default: 225 default:
216 break; 226 break;
@@ -219,7 +229,8 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
219 229
220void smc_ib_destroy_queue_pair(struct smc_link *lnk) 230void smc_ib_destroy_queue_pair(struct smc_link *lnk)
221{ 231{
222 ib_destroy_qp(lnk->roce_qp); 232 if (lnk->roce_qp)
233 ib_destroy_qp(lnk->roce_qp);
223 lnk->roce_qp = NULL; 234 lnk->roce_qp = NULL;
224} 235}
225 236
@@ -462,6 +473,7 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev)
462{ 473{
463 if (!smcibdev->initialized) 474 if (!smcibdev->initialized)
464 return; 475 return;
476 smcibdev->initialized = 0;
465 smc_wr_remove_dev(smcibdev); 477 smc_wr_remove_dev(smcibdev);
466 ib_unregister_event_handler(&smcibdev->event_handler); 478 ib_unregister_event_handler(&smcibdev->event_handler);
467 ib_destroy_cq(smcibdev->roce_cq_recv); 479 ib_destroy_cq(smcibdev->roce_cq_recv);
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index cbf58637ee14..eff4e0d0bb31 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -35,8 +35,8 @@ static void smc_rx_data_ready(struct sock *sk)
35 rcu_read_lock(); 35 rcu_read_lock();
36 wq = rcu_dereference(sk->sk_wq); 36 wq = rcu_dereference(sk->sk_wq);
37 if (skwq_has_sleeper(wq)) 37 if (skwq_has_sleeper(wq))
38 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI | 38 wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
39 POLLRDNORM | POLLRDBAND); 39 EPOLLRDNORM | EPOLLRDBAND);
40 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); 40 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
41 if ((sk->sk_shutdown == SHUTDOWN_MASK) || 41 if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
42 (sk->sk_state == SMC_CLOSED)) 42 (sk->sk_state == SMC_CLOSED))
@@ -65,7 +65,6 @@ static int smc_rx_wait_data(struct smc_sock *smc, long *timeo)
65 rc = sk_wait_event(sk, timeo, 65 rc = sk_wait_event(sk, timeo,
66 sk->sk_err || 66 sk->sk_err ||
67 sk->sk_shutdown & RCV_SHUTDOWN || 67 sk->sk_shutdown & RCV_SHUTDOWN ||
68 sock_flag(sk, SOCK_DONE) ||
69 atomic_read(&conn->bytes_to_rcv) || 68 atomic_read(&conn->bytes_to_rcv) ||
70 smc_cdc_rxed_any_close_or_senddone(conn), 69 smc_cdc_rxed_any_close_or_senddone(conn),
71 &wait); 70 &wait);
@@ -116,7 +115,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
116 if (read_done) { 115 if (read_done) {
117 if (sk->sk_err || 116 if (sk->sk_err ||
118 sk->sk_state == SMC_CLOSED || 117 sk->sk_state == SMC_CLOSED ||
119 (sk->sk_shutdown & RCV_SHUTDOWN) || 118 sk->sk_shutdown & RCV_SHUTDOWN ||
120 !timeo || 119 !timeo ||
121 signal_pending(current) || 120 signal_pending(current) ||
122 smc_cdc_rxed_any_close_or_senddone(conn) || 121 smc_cdc_rxed_any_close_or_senddone(conn) ||
@@ -124,8 +123,6 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
124 peer_conn_abort) 123 peer_conn_abort)
125 break; 124 break;
126 } else { 125 } else {
127 if (sock_flag(sk, SOCK_DONE))
128 break;
129 if (sk->sk_err) { 126 if (sk->sk_err) {
130 read_done = sock_error(sk); 127 read_done = sock_error(sk);
131 break; 128 break;
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index c48dc2d5fd3a..72f004c9c9b1 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -46,8 +46,8 @@ static void smc_tx_write_space(struct sock *sk)
46 wq = rcu_dereference(sk->sk_wq); 46 wq = rcu_dereference(sk->sk_wq);
47 if (skwq_has_sleeper(wq)) 47 if (skwq_has_sleeper(wq))
48 wake_up_interruptible_poll(&wq->wait, 48 wake_up_interruptible_poll(&wq->wait,
49 POLLOUT | POLLWRNORM | 49 EPOLLOUT | EPOLLWRNORM |
50 POLLWRBAND); 50 EPOLLWRBAND);
51 if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) 51 if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
52 sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT); 52 sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
53 rcu_read_unlock(); 53 rcu_read_unlock();
@@ -86,7 +86,7 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
86 rc = -EPIPE; 86 rc = -EPIPE;
87 break; 87 break;
88 } 88 }
89 if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) { 89 if (smc_cdc_rxed_any_close(conn)) {
90 rc = -ECONNRESET; 90 rc = -ECONNRESET;
91 break; 91 break;
92 } 92 }
@@ -104,14 +104,12 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
104 if (atomic_read(&conn->sndbuf_space)) 104 if (atomic_read(&conn->sndbuf_space))
105 break; /* at least 1 byte of free space available */ 105 break; /* at least 1 byte of free space available */
106 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 106 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
107 sk->sk_write_pending++;
108 sk_wait_event(sk, &timeo, 107 sk_wait_event(sk, &timeo,
109 sk->sk_err || 108 sk->sk_err ||
110 (sk->sk_shutdown & SEND_SHUTDOWN) || 109 (sk->sk_shutdown & SEND_SHUTDOWN) ||
111 smc_cdc_rxed_any_close_or_senddone(conn) || 110 smc_cdc_rxed_any_close(conn) ||
112 atomic_read(&conn->sndbuf_space), 111 atomic_read(&conn->sndbuf_space),
113 &wait); 112 &wait);
114 sk->sk_write_pending--;
115 } 113 }
116 remove_wait_queue(sk_sleep(sk), &wait); 114 remove_wait_queue(sk_sleep(sk), &wait);
117 return rc; 115 return rc;
@@ -250,8 +248,10 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
250 peer_rmbe_offset; 248 peer_rmbe_offset;
251 rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; 249 rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
252 rc = ib_post_send(link->roce_qp, &rdma_wr.wr, &failed_wr); 250 rc = ib_post_send(link->roce_qp, &rdma_wr.wr, &failed_wr);
253 if (rc) 251 if (rc) {
254 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 252 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
253 smc_lgr_terminate(lgr);
254 }
255 return rc; 255 return rc;
256} 256}
257 257
@@ -408,8 +408,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
408 goto out_unlock; 408 goto out_unlock;
409 } 409 }
410 rc = 0; 410 rc = 0;
411 schedule_delayed_work(&conn->tx_work, 411 if (conn->alert_token_local) /* connection healthy */
412 SMC_TX_WORK_DELAY); 412 schedule_delayed_work(&conn->tx_work,
413 SMC_TX_WORK_DELAY);
413 } 414 }
414 goto out_unlock; 415 goto out_unlock;
415 } 416 }
@@ -440,19 +441,24 @@ static void smc_tx_work(struct work_struct *work)
440 int rc; 441 int rc;
441 442
442 lock_sock(&smc->sk); 443 lock_sock(&smc->sk);
444 if (smc->sk.sk_err ||
445 !conn->alert_token_local ||
446 conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
447 goto out;
448
443 rc = smc_tx_sndbuf_nonempty(conn); 449 rc = smc_tx_sndbuf_nonempty(conn);
444 if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked && 450 if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
445 !atomic_read(&conn->bytes_to_rcv)) 451 !atomic_read(&conn->bytes_to_rcv))
446 conn->local_rx_ctrl.prod_flags.write_blocked = 0; 452 conn->local_rx_ctrl.prod_flags.write_blocked = 0;
453
454out:
447 release_sock(&smc->sk); 455 release_sock(&smc->sk);
448} 456}
449 457
450void smc_tx_consumer_update(struct smc_connection *conn) 458void smc_tx_consumer_update(struct smc_connection *conn)
451{ 459{
452 union smc_host_cursor cfed, cons; 460 union smc_host_cursor cfed, cons;
453 struct smc_cdc_tx_pend *pend; 461 int to_confirm;
454 struct smc_wr_buf *wr_buf;
455 int to_confirm, rc;
456 462
457 smc_curs_write(&cons, 463 smc_curs_write(&cons,
458 smc_curs_read(&conn->local_tx_ctrl.cons, conn), 464 smc_curs_read(&conn->local_tx_ctrl.cons, conn),
@@ -466,10 +472,8 @@ void smc_tx_consumer_update(struct smc_connection *conn)
466 ((to_confirm > conn->rmbe_update_limit) && 472 ((to_confirm > conn->rmbe_update_limit) &&
467 ((to_confirm > (conn->rmbe_size / 2)) || 473 ((to_confirm > (conn->rmbe_size / 2)) ||
468 conn->local_rx_ctrl.prod_flags.write_blocked))) { 474 conn->local_rx_ctrl.prod_flags.write_blocked))) {
469 rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); 475 if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
470 if (!rc) 476 conn->alert_token_local) { /* connection healthy */
471 rc = smc_cdc_msg_send(conn, wr_buf, pend);
472 if (rc < 0) {
473 schedule_delayed_work(&conn->tx_work, 477 schedule_delayed_work(&conn->tx_work,
474 SMC_TX_WORK_DELAY); 478 SMC_TX_WORK_DELAY);
475 return; 479 return;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index de4537f66832..1b8af23e6e2b 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -122,6 +122,7 @@ static void smc_wr_tx_tasklet_fn(unsigned long data)
122again: 122again:
123 polled++; 123 polled++;
124 do { 124 do {
125 memset(&wc, 0, sizeof(wc));
125 rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc); 126 rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc);
126 if (polled == 1) { 127 if (polled == 1) {
127 ib_req_notify_cq(dev->roce_cq_send, 128 ib_req_notify_cq(dev->roce_cq_send,
@@ -173,9 +174,9 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
173 struct smc_wr_tx_pend_priv **wr_pend_priv) 174 struct smc_wr_tx_pend_priv **wr_pend_priv)
174{ 175{
175 struct smc_wr_tx_pend *wr_pend; 176 struct smc_wr_tx_pend *wr_pend;
177 u32 idx = link->wr_tx_cnt;
176 struct ib_send_wr *wr_ib; 178 struct ib_send_wr *wr_ib;
177 u64 wr_id; 179 u64 wr_id;
178 u32 idx;
179 int rc; 180 int rc;
180 181
181 *wr_buf = NULL; 182 *wr_buf = NULL;
@@ -185,21 +186,20 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
185 if (rc) 186 if (rc)
186 return rc; 187 return rc;
187 } else { 188 } else {
188 rc = wait_event_interruptible_timeout( 189 struct smc_link_group *lgr;
190
191 lgr = container_of(link, struct smc_link_group,
192 lnk[SMC_SINGLE_LINK]);
193 rc = wait_event_timeout(
189 link->wr_tx_wait, 194 link->wr_tx_wait,
195 list_empty(&lgr->list) || /* lgr terminated */
190 (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), 196 (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
191 SMC_WR_TX_WAIT_FREE_SLOT_TIME); 197 SMC_WR_TX_WAIT_FREE_SLOT_TIME);
192 if (!rc) { 198 if (!rc) {
193 /* timeout - terminate connections */ 199 /* timeout - terminate connections */
194 struct smc_link_group *lgr;
195
196 lgr = container_of(link, struct smc_link_group,
197 lnk[SMC_SINGLE_LINK]);
198 smc_lgr_terminate(lgr); 200 smc_lgr_terminate(lgr);
199 return -EPIPE; 201 return -EPIPE;
200 } 202 }
201 if (rc == -ERESTARTSYS)
202 return -EINTR;
203 if (idx == link->wr_tx_cnt) 203 if (idx == link->wr_tx_cnt)
204 return -EPIPE; 204 return -EPIPE;
205 } 205 }
@@ -249,8 +249,14 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
249 pend = container_of(priv, struct smc_wr_tx_pend, priv); 249 pend = container_of(priv, struct smc_wr_tx_pend, priv);
250 rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], 250 rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx],
251 &failed_wr); 251 &failed_wr);
252 if (rc) 252 if (rc) {
253 struct smc_link_group *lgr =
254 container_of(link, struct smc_link_group,
255 lnk[SMC_SINGLE_LINK]);
256
253 smc_wr_tx_put_slot(link, priv); 257 smc_wr_tx_put_slot(link, priv);
258 smc_lgr_terminate(lgr);
259 }
254 return rc; 260 return rc;
255} 261}
256 262
@@ -300,18 +306,18 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
300 return rc; 306 return rc;
301} 307}
302 308
303void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type, 309void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type,
304 smc_wr_tx_filter filter, 310 smc_wr_tx_filter filter,
305 smc_wr_tx_dismisser dismisser, 311 smc_wr_tx_dismisser dismisser,
306 unsigned long data) 312 unsigned long data)
307{ 313{
308 struct smc_wr_tx_pend_priv *tx_pend; 314 struct smc_wr_tx_pend_priv *tx_pend;
309 struct smc_wr_rx_hdr *wr_rx; 315 struct smc_wr_rx_hdr *wr_tx;
310 int i; 316 int i;
311 317
312 for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { 318 for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
313 wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i]; 319 wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i];
314 if (wr_rx->type != wr_rx_hdr_type) 320 if (wr_tx->type != wr_tx_hdr_type)
315 continue; 321 continue;
316 tx_pend = &link->wr_tx_pends[i].priv; 322 tx_pend = &link->wr_tx_pends[i].priv;
317 if (filter(tx_pend, data)) 323 if (filter(tx_pend, data))
@@ -319,24 +325,6 @@ void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
319 } 325 }
320} 326}
321 327
322bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type,
323 smc_wr_tx_filter filter, unsigned long data)
324{
325 struct smc_wr_tx_pend_priv *tx_pend;
326 struct smc_wr_rx_hdr *wr_rx;
327 int i;
328
329 for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
330 wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i];
331 if (wr_rx->type != wr_rx_hdr_type)
332 continue;
333 tx_pend = &link->wr_tx_pends[i].priv;
334 if (filter(tx_pend, data))
335 return true;
336 }
337 return false;
338}
339
340/****************************** receive queue ********************************/ 328/****************************** receive queue ********************************/
341 329
342int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler) 330int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 2acf12b06063..ef0c3494c9cb 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -93,8 +93,6 @@ int smc_wr_tx_put_slot(struct smc_link *link,
93int smc_wr_tx_send(struct smc_link *link, 93int smc_wr_tx_send(struct smc_link *link,
94 struct smc_wr_tx_pend_priv *wr_pend_priv); 94 struct smc_wr_tx_pend_priv *wr_pend_priv);
95void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context); 95void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
96bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type,
97 smc_wr_tx_filter filter, unsigned long data);
98void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, 96void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
99 smc_wr_tx_filter filter, 97 smc_wr_tx_filter filter,
100 smc_wr_tx_dismisser dismisser, 98 smc_wr_tx_dismisser dismisser,
diff --git a/net/socket.c b/net/socket.c
index 6f05d5c4bf30..a93c99b518ca 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -118,7 +118,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
118static int sock_mmap(struct file *file, struct vm_area_struct *vma); 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
119 119
120static int sock_close(struct inode *inode, struct file *file); 120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file, 121static __poll_t sock_poll(struct file *file,
122 struct poll_table_struct *wait); 122 struct poll_table_struct *wait);
123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
124#ifdef CONFIG_COMPAT 124#ifdef CONFIG_COMPAT
@@ -163,12 +163,6 @@ static DEFINE_SPINLOCK(net_family_lock);
163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly; 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
164 164
165/* 165/*
166 * Statistics counters of the socket lists
167 */
168
169static DEFINE_PER_CPU(int, sockets_in_use);
170
171/*
172 * Support routines. 166 * Support routines.
173 * Move socket addresses back and forth across the kernel/user 167 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits. 168 * divide and look after the messy bits.
@@ -580,7 +574,6 @@ struct socket *sock_alloc(void)
580 inode->i_gid = current_fsgid(); 574 inode->i_gid = current_fsgid();
581 inode->i_op = &sockfs_inode_ops; 575 inode->i_op = &sockfs_inode_ops;
582 576
583 this_cpu_add(sockets_in_use, 1);
584 return sock; 577 return sock;
585} 578}
586EXPORT_SYMBOL(sock_alloc); 579EXPORT_SYMBOL(sock_alloc);
@@ -607,7 +600,6 @@ void sock_release(struct socket *sock)
607 if (rcu_dereference_protected(sock->wq, 1)->fasync_list) 600 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
608 pr_err("%s: fasync list not empty!\n", __func__); 601 pr_err("%s: fasync list not empty!\n", __func__);
609 602
610 this_cpu_sub(sockets_in_use, 1);
611 if (!sock->file) { 603 if (!sock->file) {
612 iput(SOCK_INODE(sock)); 604 iput(SOCK_INODE(sock));
613 return; 605 return;
@@ -969,9 +961,28 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
969 * If this ioctl is unknown try to hand it down 961 * If this ioctl is unknown try to hand it down
970 * to the NIC driver. 962 * to the NIC driver.
971 */ 963 */
972 if (err == -ENOIOCTLCMD) 964 if (err != -ENOIOCTLCMD)
973 err = dev_ioctl(net, cmd, argp); 965 return err;
974 966
967 if (cmd == SIOCGIFCONF) {
968 struct ifconf ifc;
969 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
970 return -EFAULT;
971 rtnl_lock();
972 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
973 rtnl_unlock();
974 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
975 err = -EFAULT;
976 } else {
977 struct ifreq ifr;
978 bool need_copyout;
979 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
980 return -EFAULT;
981 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
982 if (!err && need_copyout)
983 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
984 return -EFAULT;
985 }
975 return err; 986 return err;
976} 987}
977 988
@@ -996,12 +1007,19 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
996 sock = file->private_data; 1007 sock = file->private_data;
997 sk = sock->sk; 1008 sk = sock->sk;
998 net = sock_net(sk); 1009 net = sock_net(sk);
999 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { 1010 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1000 err = dev_ioctl(net, cmd, argp); 1011 struct ifreq ifr;
1012 bool need_copyout;
1013 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1014 return -EFAULT;
1015 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1016 if (!err && need_copyout)
1017 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1018 return -EFAULT;
1001 } else 1019 } else
1002#ifdef CONFIG_WEXT_CORE 1020#ifdef CONFIG_WEXT_CORE
1003 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { 1021 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
1004 err = dev_ioctl(net, cmd, argp); 1022 err = wext_handle_ioctl(net, cmd, argp);
1005 } else 1023 } else
1006#endif 1024#endif
1007 switch (cmd) { 1025 switch (cmd) {
@@ -1097,9 +1115,9 @@ out_release:
1097EXPORT_SYMBOL(sock_create_lite); 1115EXPORT_SYMBOL(sock_create_lite);
1098 1116
1099/* No kernel lock held - perfect */ 1117/* No kernel lock held - perfect */
1100static unsigned int sock_poll(struct file *file, poll_table *wait) 1118static __poll_t sock_poll(struct file *file, poll_table *wait)
1101{ 1119{
1102 unsigned int busy_flag = 0; 1120 __poll_t busy_flag = 0;
1103 struct socket *sock; 1121 struct socket *sock;
1104 1122
1105 /* 1123 /*
@@ -2621,29 +2639,11 @@ out_fs:
2621 2639
2622core_initcall(sock_init); /* early initcall */ 2640core_initcall(sock_init); /* early initcall */
2623 2641
2624static int __init jit_init(void)
2625{
2626#ifdef CONFIG_BPF_JIT_ALWAYS_ON
2627 bpf_jit_enable = 1;
2628#endif
2629 return 0;
2630}
2631pure_initcall(jit_init);
2632
2633#ifdef CONFIG_PROC_FS 2642#ifdef CONFIG_PROC_FS
2634void socket_seq_show(struct seq_file *seq) 2643void socket_seq_show(struct seq_file *seq)
2635{ 2644{
2636 int cpu; 2645 seq_printf(seq, "sockets: used %d\n",
2637 int counter = 0; 2646 sock_inuse_get(seq->private));
2638
2639 for_each_possible_cpu(cpu)
2640 counter += per_cpu(sockets_in_use, cpu);
2641
2642 /* It can be negative, by the way. 8) */
2643 if (counter < 0)
2644 counter = 0;
2645
2646 seq_printf(seq, "sockets: used %d\n", counter);
2647} 2647}
2648#endif /* CONFIG_PROC_FS */ 2648#endif /* CONFIG_PROC_FS */
2649 2649
@@ -2680,89 +2680,25 @@ static int do_siocgstampns(struct net *net, struct socket *sock,
2680 return err; 2680 return err;
2681} 2681}
2682 2682
2683static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32) 2683static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
2684{
2685 struct ifreq __user *uifr;
2686 int err;
2687
2688 uifr = compat_alloc_user_space(sizeof(struct ifreq));
2689 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2690 return -EFAULT;
2691
2692 err = dev_ioctl(net, SIOCGIFNAME, uifr);
2693 if (err)
2694 return err;
2695
2696 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
2697 return -EFAULT;
2698
2699 return 0;
2700}
2701
2702static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
2703{ 2684{
2704 struct compat_ifconf ifc32; 2685 struct compat_ifconf ifc32;
2705 struct ifconf ifc; 2686 struct ifconf ifc;
2706 struct ifconf __user *uifc;
2707 struct compat_ifreq __user *ifr32;
2708 struct ifreq __user *ifr;
2709 unsigned int i, j;
2710 int err; 2687 int err;
2711 2688
2712 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) 2689 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
2713 return -EFAULT; 2690 return -EFAULT;
2714 2691
2715 memset(&ifc, 0, sizeof(ifc)); 2692 ifc.ifc_len = ifc32.ifc_len;
2716 if (ifc32.ifcbuf == 0) { 2693 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
2717 ifc32.ifc_len = 0;
2718 ifc.ifc_len = 0;
2719 ifc.ifc_req = NULL;
2720 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2721 } else {
2722 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2723 sizeof(struct ifreq);
2724 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2725 ifc.ifc_len = len;
2726 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2727 ifr32 = compat_ptr(ifc32.ifcbuf);
2728 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
2729 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
2730 return -EFAULT;
2731 ifr++;
2732 ifr32++;
2733 }
2734 }
2735 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2736 return -EFAULT;
2737 2694
2738 err = dev_ioctl(net, SIOCGIFCONF, uifc); 2695 rtnl_lock();
2696 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
2697 rtnl_unlock();
2739 if (err) 2698 if (err)
2740 return err; 2699 return err;
2741 2700
2742 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf))) 2701 ifc32.ifc_len = ifc.ifc_len;
2743 return -EFAULT;
2744
2745 ifr = ifc.ifc_req;
2746 ifr32 = compat_ptr(ifc32.ifcbuf);
2747 for (i = 0, j = 0;
2748 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2749 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2750 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
2751 return -EFAULT;
2752 ifr32++;
2753 ifr++;
2754 }
2755
2756 if (ifc32.ifcbuf == 0) {
2757 /* Translate from 64-bit structure multiple to
2758 * a 32-bit one.
2759 */
2760 i = ifc.ifc_len;
2761 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
2762 ifc32.ifc_len = i;
2763 } else {
2764 ifc32.ifc_len = i;
2765 }
2766 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf))) 2702 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
2767 return -EFAULT; 2703 return -EFAULT;
2768 2704
@@ -2773,9 +2709,9 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
2773{ 2709{
2774 struct compat_ethtool_rxnfc __user *compat_rxnfc; 2710 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2775 bool convert_in = false, convert_out = false; 2711 bool convert_in = false, convert_out = false;
2776 size_t buf_size = ALIGN(sizeof(struct ifreq), 8); 2712 size_t buf_size = 0;
2777 struct ethtool_rxnfc __user *rxnfc; 2713 struct ethtool_rxnfc __user *rxnfc = NULL;
2778 struct ifreq __user *ifr; 2714 struct ifreq ifr;
2779 u32 rule_cnt = 0, actual_rule_cnt; 2715 u32 rule_cnt = 0, actual_rule_cnt;
2780 u32 ethcmd; 2716 u32 ethcmd;
2781 u32 data; 2717 u32 data;
@@ -2812,18 +2748,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
2812 case ETHTOOL_SRXCLSRLDEL: 2748 case ETHTOOL_SRXCLSRLDEL:
2813 buf_size += sizeof(struct ethtool_rxnfc); 2749 buf_size += sizeof(struct ethtool_rxnfc);
2814 convert_in = true; 2750 convert_in = true;
2751 rxnfc = compat_alloc_user_space(buf_size);
2815 break; 2752 break;
2816 } 2753 }
2817 2754
2818 ifr = compat_alloc_user_space(buf_size); 2755 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2819 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
2820
2821 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2822 return -EFAULT; 2756 return -EFAULT;
2823 2757
2824 if (put_user(convert_in ? rxnfc : compat_ptr(data), 2758 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
2825 &ifr->ifr_ifru.ifru_data))
2826 return -EFAULT;
2827 2759
2828 if (convert_in) { 2760 if (convert_in) {
2829 /* We expect there to be holes between fs.m_ext and 2761 /* We expect there to be holes between fs.m_ext and
@@ -2851,7 +2783,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
2851 return -EFAULT; 2783 return -EFAULT;
2852 } 2784 }
2853 2785
2854 ret = dev_ioctl(net, SIOCETHTOOL, ifr); 2786 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
2855 if (ret) 2787 if (ret)
2856 return ret; 2788 return ret;
2857 2789
@@ -2892,113 +2824,43 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
2892 2824
2893static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) 2825static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2894{ 2826{
2895 void __user *uptr;
2896 compat_uptr_t uptr32; 2827 compat_uptr_t uptr32;
2897 struct ifreq __user *uifr; 2828 struct ifreq ifr;
2829 void __user *saved;
2830 int err;
2898 2831
2899 uifr = compat_alloc_user_space(sizeof(*uifr)); 2832 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
2900 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2901 return -EFAULT; 2833 return -EFAULT;
2902 2834
2903 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu)) 2835 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2904 return -EFAULT; 2836 return -EFAULT;
2905 2837
2906 uptr = compat_ptr(uptr32); 2838 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
2907 2839 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
2908 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2909 return -EFAULT;
2910
2911 return dev_ioctl(net, SIOCWANDEV, uifr);
2912}
2913
2914static int bond_ioctl(struct net *net, unsigned int cmd,
2915 struct compat_ifreq __user *ifr32)
2916{
2917 struct ifreq kifr;
2918 mm_segment_t old_fs;
2919 int err;
2920 2840
2921 switch (cmd) { 2841 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
2922 case SIOCBONDENSLAVE: 2842 if (!err) {
2923 case SIOCBONDRELEASE: 2843 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
2924 case SIOCBONDSETHWADDR: 2844 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
2925 case SIOCBONDCHANGEACTIVE: 2845 err = -EFAULT;
2926 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
2927 return -EFAULT;
2928
2929 old_fs = get_fs();
2930 set_fs(KERNEL_DS);
2931 err = dev_ioctl(net, cmd,
2932 (struct ifreq __user __force *) &kifr);
2933 set_fs(old_fs);
2934
2935 return err;
2936 default:
2937 return -ENOIOCTLCMD;
2938 } 2846 }
2847 return err;
2939} 2848}
2940 2849
2941/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */ 2850/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2942static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, 2851static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
2943 struct compat_ifreq __user *u_ifreq32) 2852 struct compat_ifreq __user *u_ifreq32)
2944{ 2853{
2945 struct ifreq __user *u_ifreq64; 2854 struct ifreq ifreq;
2946 char tmp_buf[IFNAMSIZ];
2947 void __user *data64;
2948 u32 data32; 2855 u32 data32;
2949 2856
2950 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]), 2857 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
2951 IFNAMSIZ))
2952 return -EFAULT; 2858 return -EFAULT;
2953 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) 2859 if (get_user(data32, &u_ifreq32->ifr_data))
2954 return -EFAULT; 2860 return -EFAULT;
2955 data64 = compat_ptr(data32); 2861 ifreq.ifr_data = compat_ptr(data32);
2956 2862
2957 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64)); 2863 return dev_ioctl(net, cmd, &ifreq, NULL);
2958
2959 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2960 IFNAMSIZ))
2961 return -EFAULT;
2962 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
2963 return -EFAULT;
2964
2965 return dev_ioctl(net, cmd, u_ifreq64);
2966}
2967
2968static int dev_ifsioc(struct net *net, struct socket *sock,
2969 unsigned int cmd, struct compat_ifreq __user *uifr32)
2970{
2971 struct ifreq __user *uifr;
2972 int err;
2973
2974 uifr = compat_alloc_user_space(sizeof(*uifr));
2975 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2976 return -EFAULT;
2977
2978 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2979
2980 if (!err) {
2981 switch (cmd) {
2982 case SIOCGIFFLAGS:
2983 case SIOCGIFMETRIC:
2984 case SIOCGIFMTU:
2985 case SIOCGIFMEM:
2986 case SIOCGIFHWADDR:
2987 case SIOCGIFINDEX:
2988 case SIOCGIFADDR:
2989 case SIOCGIFBRDADDR:
2990 case SIOCGIFDSTADDR:
2991 case SIOCGIFNETMASK:
2992 case SIOCGIFPFLAGS:
2993 case SIOCGIFTXQLEN:
2994 case SIOCGMIIPHY:
2995 case SIOCGMIIREG:
2996 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
2997 err = -EFAULT;
2998 break;
2999 }
3000 }
3001 return err;
3002} 2864}
3003 2865
3004static int compat_sioc_ifmap(struct net *net, unsigned int cmd, 2866static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
@@ -3006,7 +2868,6 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3006{ 2868{
3007 struct ifreq ifr; 2869 struct ifreq ifr;
3008 struct compat_ifmap __user *uifmap32; 2870 struct compat_ifmap __user *uifmap32;
3009 mm_segment_t old_fs;
3010 int err; 2871 int err;
3011 2872
3012 uifmap32 = &uifr32->ifr_ifru.ifru_map; 2873 uifmap32 = &uifr32->ifr_ifru.ifru_map;
@@ -3020,10 +2881,7 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3020 if (err) 2881 if (err)
3021 return -EFAULT; 2882 return -EFAULT;
3022 2883
3023 old_fs = get_fs(); 2884 err = dev_ioctl(net, cmd, &ifr, NULL);
3024 set_fs(KERNEL_DS);
3025 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
3026 set_fs(old_fs);
3027 2885
3028 if (cmd == SIOCGIFMAP && !err) { 2886 if (cmd == SIOCGIFMAP && !err) {
3029 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); 2887 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
@@ -3156,10 +3014,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3156 case SIOCSIFBR: 3014 case SIOCSIFBR:
3157 case SIOCGIFBR: 3015 case SIOCGIFBR:
3158 return old_bridge_ioctl(argp); 3016 return old_bridge_ioctl(argp);
3159 case SIOCGIFNAME:
3160 return dev_ifname32(net, argp);
3161 case SIOCGIFCONF: 3017 case SIOCGIFCONF:
3162 return dev_ifconf(net, argp); 3018 return compat_dev_ifconf(net, argp);
3163 case SIOCETHTOOL: 3019 case SIOCETHTOOL:
3164 return ethtool_ioctl(net, argp); 3020 return ethtool_ioctl(net, argp);
3165 case SIOCWANDEV: 3021 case SIOCWANDEV:
@@ -3167,11 +3023,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3167 case SIOCGIFMAP: 3023 case SIOCGIFMAP:
3168 case SIOCSIFMAP: 3024 case SIOCSIFMAP:
3169 return compat_sioc_ifmap(net, cmd, argp); 3025 return compat_sioc_ifmap(net, cmd, argp);
3170 case SIOCBONDENSLAVE:
3171 case SIOCBONDRELEASE:
3172 case SIOCBONDSETHWADDR:
3173 case SIOCBONDCHANGEACTIVE:
3174 return bond_ioctl(net, cmd, argp);
3175 case SIOCADDRT: 3026 case SIOCADDRT:
3176 case SIOCDELRT: 3027 case SIOCDELRT:
3177 return routing_ioctl(net, sock, cmd, argp); 3028 return routing_ioctl(net, sock, cmd, argp);
@@ -3231,12 +3082,15 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3231 case SIOCGMIIPHY: 3082 case SIOCGMIIPHY:
3232 case SIOCGMIIREG: 3083 case SIOCGMIIREG:
3233 case SIOCSMIIREG: 3084 case SIOCSMIIREG:
3234 return dev_ifsioc(net, sock, cmd, argp);
3235
3236 case SIOCSARP: 3085 case SIOCSARP:
3237 case SIOCGARP: 3086 case SIOCGARP:
3238 case SIOCDARP: 3087 case SIOCDARP:
3239 case SIOCATMARK: 3088 case SIOCATMARK:
3089 case SIOCBONDENSLAVE:
3090 case SIOCBONDRELEASE:
3091 case SIOCBONDSETHWADDR:
3092 case SIOCBONDCHANGEACTIVE:
3093 case SIOCGIFNAME:
3240 return sock_do_ioctl(net, sock, cmd, arg); 3094 return sock_do_ioctl(net, sock, cmd, arg);
3241 } 3095 }
3242 3096
@@ -3391,19 +3245,6 @@ int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3391} 3245}
3392EXPORT_SYMBOL(kernel_sendpage_locked); 3246EXPORT_SYMBOL(kernel_sendpage_locked);
3393 3247
3394int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3395{
3396 mm_segment_t oldfs = get_fs();
3397 int err;
3398
3399 set_fs(KERNEL_DS);
3400 err = sock->ops->ioctl(sock, cmd, arg);
3401 set_fs(oldfs);
3402
3403 return err;
3404}
3405EXPORT_SYMBOL(kernel_sock_ioctl);
3406
3407int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) 3248int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3408{ 3249{
3409 return sock->ops->shutdown(sock, how); 3250 return sock->ops->shutdown(sock, how);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index e68943895be4..8a7e1c774f9c 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -930,17 +930,17 @@ out:
930 930
931static DECLARE_WAIT_QUEUE_HEAD(queue_wait); 931static DECLARE_WAIT_QUEUE_HEAD(queue_wait);
932 932
933static unsigned int cache_poll(struct file *filp, poll_table *wait, 933static __poll_t cache_poll(struct file *filp, poll_table *wait,
934 struct cache_detail *cd) 934 struct cache_detail *cd)
935{ 935{
936 unsigned int mask; 936 __poll_t mask;
937 struct cache_reader *rp = filp->private_data; 937 struct cache_reader *rp = filp->private_data;
938 struct cache_queue *cq; 938 struct cache_queue *cq;
939 939
940 poll_wait(filp, &queue_wait, wait); 940 poll_wait(filp, &queue_wait, wait);
941 941
942 /* alway allow write */ 942 /* alway allow write */
943 mask = POLLOUT | POLLWRNORM; 943 mask = EPOLLOUT | EPOLLWRNORM;
944 944
945 if (!rp) 945 if (!rp)
946 return mask; 946 return mask;
@@ -950,7 +950,7 @@ static unsigned int cache_poll(struct file *filp, poll_table *wait,
950 for (cq= &rp->q; &cq->list != &cd->queue; 950 for (cq= &rp->q; &cq->list != &cd->queue;
951 cq = list_entry(cq->list.next, struct cache_queue, list)) 951 cq = list_entry(cq->list.next, struct cache_queue, list))
952 if (!cq->reader) { 952 if (!cq->reader) {
953 mask |= POLLIN | POLLRDNORM; 953 mask |= EPOLLIN | EPOLLRDNORM;
954 break; 954 break;
955 } 955 }
956 spin_unlock(&queue_lock); 956 spin_unlock(&queue_lock);
@@ -1501,7 +1501,7 @@ static ssize_t cache_write_procfs(struct file *filp, const char __user *buf,
1501 return cache_write(filp, buf, count, ppos, cd); 1501 return cache_write(filp, buf, count, ppos, cd);
1502} 1502}
1503 1503
1504static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait) 1504static __poll_t cache_poll_procfs(struct file *filp, poll_table *wait)
1505{ 1505{
1506 struct cache_detail *cd = PDE_DATA(file_inode(filp)); 1506 struct cache_detail *cd = PDE_DATA(file_inode(filp));
1507 1507
@@ -1720,7 +1720,7 @@ static ssize_t cache_write_pipefs(struct file *filp, const char __user *buf,
1720 return cache_write(filp, buf, count, ppos, cd); 1720 return cache_write(filp, buf, count, ppos, cd);
1721} 1721}
1722 1722
1723static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait) 1723static __poll_t cache_poll_pipefs(struct file *filp, poll_table *wait)
1724{ 1724{
1725 struct cache_detail *cd = RPC_I(file_inode(filp))->private; 1725 struct cache_detail *cd = RPC_I(file_inode(filp))->private;
1726 1726
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e2a4184f3c5d..6e432ecd7f99 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1376,22 +1376,6 @@ rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize
1376EXPORT_SYMBOL_GPL(rpc_setbufsize); 1376EXPORT_SYMBOL_GPL(rpc_setbufsize);
1377 1377
1378/** 1378/**
1379 * rpc_protocol - Get transport protocol number for an RPC client
1380 * @clnt: RPC client to query
1381 *
1382 */
1383int rpc_protocol(struct rpc_clnt *clnt)
1384{
1385 int protocol;
1386
1387 rcu_read_lock();
1388 protocol = rcu_dereference(clnt->cl_xprt)->prot;
1389 rcu_read_unlock();
1390 return protocol;
1391}
1392EXPORT_SYMBOL_GPL(rpc_protocol);
1393
1394/**
1395 * rpc_net_ns - Get the network namespace for this RPC client 1379 * rpc_net_ns - Get the network namespace for this RPC client
1396 * @clnt: RPC client to query 1380 * @clnt: RPC client to query
1397 * 1381 *
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 7803f3b6aa53..fc97fc3ed637 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -340,20 +340,20 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of
340 return res; 340 return res;
341} 341}
342 342
343static unsigned int 343static __poll_t
344rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) 344rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
345{ 345{
346 struct inode *inode = file_inode(filp); 346 struct inode *inode = file_inode(filp);
347 struct rpc_inode *rpci = RPC_I(inode); 347 struct rpc_inode *rpci = RPC_I(inode);
348 unsigned int mask = POLLOUT | POLLWRNORM; 348 __poll_t mask = EPOLLOUT | EPOLLWRNORM;
349 349
350 poll_wait(filp, &rpci->waitq, wait); 350 poll_wait(filp, &rpci->waitq, wait);
351 351
352 inode_lock(inode); 352 inode_lock(inode);
353 if (rpci->pipe == NULL) 353 if (rpci->pipe == NULL)
354 mask |= POLLERR | POLLHUP; 354 mask |= EPOLLERR | EPOLLHUP;
355 else if (filp->private_data || !list_empty(&rpci->pipe->pipe)) 355 else if (filp->private_data || !list_empty(&rpci->pipe->pipe))
356 mask |= POLLIN | POLLRDNORM; 356 mask |= EPOLLIN | EPOLLRDNORM;
357 inode_unlock(inode); 357 inode_unlock(inode);
358 return mask; 358 return mask;
359} 359}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index b1b49edd7c4d..d9db2eab3a8d 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -461,6 +461,18 @@ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct r
461/* 461/*
462 * Wake up a task on a specific queue 462 * Wake up a task on a specific queue
463 */ 463 */
464void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
465 struct rpc_wait_queue *queue,
466 struct rpc_task *task)
467{
468 spin_lock_bh(&queue->lock);
469 rpc_wake_up_task_on_wq_queue_locked(wq, queue, task);
470 spin_unlock_bh(&queue->lock);
471}
472
473/*
474 * Wake up a task on a specific queue
475 */
464void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task) 476void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task)
465{ 477{
466 spin_lock_bh(&queue->lock); 478 spin_lock_bh(&queue->lock);
@@ -755,22 +767,20 @@ static void __rpc_execute(struct rpc_task *task)
755 void (*do_action)(struct rpc_task *); 767 void (*do_action)(struct rpc_task *);
756 768
757 /* 769 /*
758 * Execute any pending callback first. 770 * Perform the next FSM step or a pending callback.
771 *
772 * tk_action may be NULL if the task has been killed.
773 * In particular, note that rpc_killall_tasks may
774 * do this at any time, so beware when dereferencing.
759 */ 775 */
760 do_action = task->tk_callback; 776 do_action = task->tk_action;
761 task->tk_callback = NULL; 777 if (task->tk_callback) {
762 if (do_action == NULL) { 778 do_action = task->tk_callback;
763 /* 779 task->tk_callback = NULL;
764 * Perform the next FSM step.
765 * tk_action may be NULL if the task has been killed.
766 * In particular, note that rpc_killall_tasks may
767 * do this at any time, so beware when dereferencing.
768 */
769 do_action = task->tk_action;
770 if (do_action == NULL)
771 break;
772 } 780 }
773 trace_rpc_task_run_action(task->tk_client, task, task->tk_action); 781 if (!do_action)
782 break;
783 trace_rpc_task_run_action(task->tk_client, task, do_action);
774 do_action(task); 784 do_action(task);
775 785
776 /* 786 /*
@@ -1094,12 +1104,12 @@ static int rpciod_start(void)
1094 * Create the rpciod thread and wait for it to start. 1104 * Create the rpciod thread and wait for it to start.
1095 */ 1105 */
1096 dprintk("RPC: creating workqueue rpciod\n"); 1106 dprintk("RPC: creating workqueue rpciod\n");
1097 wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0); 1107 wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
1098 if (!wq) 1108 if (!wq)
1099 goto out_failed; 1109 goto out_failed;
1100 rpciod_workqueue = wq; 1110 rpciod_workqueue = wq;
1101 /* Note: highpri because network receive is latency sensitive */ 1111 /* Note: highpri because network receive is latency sensitive */
1102 wq = alloc_workqueue("xprtiod", WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); 1112 wq = alloc_workqueue("xprtiod", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_HIGHPRI, 0);
1103 if (!wq) 1113 if (!wq)
1104 goto free_rpciod; 1114 goto free_rpciod;
1105 xprtiod_workqueue = wq; 1115 xprtiod_workqueue = wq;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index ff8e06cd067e..943f2a745cd5 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -338,8 +338,8 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
338 rqstp->rq_xprt_hlen = 0; 338 rqstp->rq_xprt_hlen = 0;
339 339
340 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 340 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
341 len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, 341 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nr, buflen);
342 msg.msg_flags); 342 len = sock_recvmsg(svsk->sk_sock, &msg, msg.msg_flags);
343 /* If we read a full record, then assume there may be more 343 /* If we read a full record, then assume there may be more
344 * data to read (stream based sockets only!) 344 * data to read (stream based sockets only!)
345 */ 345 */
@@ -384,25 +384,11 @@ static int svc_partial_recvfrom(struct svc_rqst *rqstp,
384static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, 384static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
385 unsigned int rcv) 385 unsigned int rcv)
386{ 386{
387#if 0
388 mm_segment_t oldfs;
389 oldfs = get_fs(); set_fs(KERNEL_DS);
390 sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
391 (char*)&snd, sizeof(snd));
392 sock_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
393 (char*)&rcv, sizeof(rcv));
394#else
395 /* sock_setsockopt limits use to sysctl_?mem_max,
396 * which isn't acceptable. Until that is made conditional
397 * on not having CAP_SYS_RESOURCE or similar, we go direct...
398 * DaveM said I could!
399 */
400 lock_sock(sock->sk); 387 lock_sock(sock->sk);
401 sock->sk->sk_sndbuf = snd * 2; 388 sock->sk->sk_sndbuf = snd * 2;
402 sock->sk->sk_rcvbuf = rcv * 2; 389 sock->sk->sk_rcvbuf = rcv * 2;
403 sock->sk->sk_write_space(sock->sk); 390 sock->sk->sk_write_space(sock->sk);
404 release_sock(sock->sk); 391 release_sock(sock->sk);
405#endif
406} 392}
407 393
408static int svc_sock_secure_port(struct svc_rqst *rqstp) 394static int svc_sock_secure_port(struct svc_rqst *rqstp)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 33b74fd84051..8f0ad4f268da 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -517,7 +517,8 @@ void xprt_write_space(struct rpc_xprt *xprt)
517 if (xprt->snd_task) { 517 if (xprt->snd_task) {
518 dprintk("RPC: write space: waking waiting task on " 518 dprintk("RPC: write space: waking waiting task on "
519 "xprt %p\n", xprt); 519 "xprt %p\n", xprt);
520 rpc_wake_up_queued_task(&xprt->pending, xprt->snd_task); 520 rpc_wake_up_queued_task_on_wq(xprtiod_workqueue,
521 &xprt->pending, xprt->snd_task);
521 } 522 }
522 spin_unlock_bh(&xprt->transport_lock); 523 spin_unlock_bh(&xprt->transport_lock);
523} 524}
@@ -940,8 +941,8 @@ static void xprt_timer(struct rpc_task *task)
940 941
941 if (task->tk_status != -ETIMEDOUT) 942 if (task->tk_status != -ETIMEDOUT)
942 return; 943 return;
943 dprintk("RPC: %5u xprt_timer\n", task->tk_pid);
944 944
945 trace_xprt_timer(xprt, req->rq_xid, task->tk_status);
945 if (!req->rq_reply_bytes_recvd) { 946 if (!req->rq_reply_bytes_recvd) {
946 if (xprt->ops->timer) 947 if (xprt->ops->timer)
947 xprt->ops->timer(xprt, task); 948 xprt->ops->timer(xprt, task);
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 8b818bb3518a..ed1a4a3065ee 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -43,7 +43,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
43 req = rpcrdma_create_req(r_xprt); 43 req = rpcrdma_create_req(r_xprt);
44 if (IS_ERR(req)) 44 if (IS_ERR(req))
45 return PTR_ERR(req); 45 return PTR_ERR(req);
46 __set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags);
47 46
48 rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, 47 rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
49 DMA_TO_DEVICE, GFP_KERNEL); 48 DMA_TO_DEVICE, GFP_KERNEL);
@@ -74,21 +73,13 @@ out_fail:
74static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, 73static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
75 unsigned int count) 74 unsigned int count)
76{ 75{
77 struct rpcrdma_rep *rep;
78 int rc = 0; 76 int rc = 0;
79 77
80 while (count--) { 78 while (count--) {
81 rep = rpcrdma_create_rep(r_xprt); 79 rc = rpcrdma_create_rep(r_xprt);
82 if (IS_ERR(rep)) { 80 if (rc)
83 pr_err("RPC: %s: reply buffer alloc failed\n",
84 __func__);
85 rc = PTR_ERR(rep);
86 break; 81 break;
87 }
88
89 rpcrdma_recv_buffer_put(rep);
90 } 82 }
91
92 return rc; 83 return rc;
93} 84}
94 85
@@ -129,6 +120,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
129 rqst->rq_xprt = &r_xprt->rx_xprt; 120 rqst->rq_xprt = &r_xprt->rx_xprt;
130 INIT_LIST_HEAD(&rqst->rq_list); 121 INIT_LIST_HEAD(&rqst->rq_list);
131 INIT_LIST_HEAD(&rqst->rq_bc_list); 122 INIT_LIST_HEAD(&rqst->rq_bc_list);
123 __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
132 124
133 if (rpcrdma_bc_setup_rqst(r_xprt, rqst)) 125 if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
134 goto out_free; 126 goto out_free;
@@ -148,7 +140,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
148 140
149 buffer->rb_bc_srv_max_requests = reqs; 141 buffer->rb_bc_srv_max_requests = reqs;
150 request_module("svcrdma"); 142 request_module("svcrdma");
151 143 trace_xprtrdma_cb_setup(r_xprt, reqs);
152 return 0; 144 return 0;
153 145
154out_free: 146out_free:
@@ -196,13 +188,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
196 return maxmsg - RPCRDMA_HDRLEN_MIN; 188 return maxmsg - RPCRDMA_HDRLEN_MIN;
197} 189}
198 190
199/** 191static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
200 * rpcrdma_bc_marshal_reply - Send backwards direction reply
201 * @rqst: buffer containing RPC reply data
202 *
203 * Returns zero on success.
204 */
205int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
206{ 192{
207 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); 193 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
208 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 194 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
@@ -226,7 +212,46 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
226 if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN, 212 if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
227 &rqst->rq_snd_buf, rpcrdma_noch)) 213 &rqst->rq_snd_buf, rpcrdma_noch))
228 return -EIO; 214 return -EIO;
215
216 trace_xprtrdma_cb_reply(rqst);
217 return 0;
218}
219
220/**
221 * xprt_rdma_bc_send_reply - marshal and send a backchannel reply
222 * @rqst: RPC rqst with a backchannel RPC reply in rq_snd_buf
223 *
224 * Caller holds the transport's write lock.
225 *
226 * Returns:
227 * %0 if the RPC message has been sent
228 * %-ENOTCONN if the caller should reconnect and call again
229 * %-EIO if a permanent error occurred and the request was not
230 * sent. Do not try to send this message again.
231 */
232int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
233{
234 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
235 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
236 int rc;
237
238 if (!xprt_connected(rqst->rq_xprt))
239 goto drop_connection;
240
241 rc = rpcrdma_bc_marshal_reply(rqst);
242 if (rc < 0)
243 goto failed_marshal;
244
245 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
246 goto drop_connection;
229 return 0; 247 return 0;
248
249failed_marshal:
250 if (rc != -ENOTCONN)
251 return rc;
252drop_connection:
253 xprt_disconnect_done(rqst->rq_xprt);
254 return -ENOTCONN;
230} 255}
231 256
232/** 257/**
@@ -262,11 +287,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
262 dprintk("RPC: %s: freeing rqst %p (req %p)\n", 287 dprintk("RPC: %s: freeing rqst %p (req %p)\n",
263 __func__, rqst, rpcr_to_rdmar(rqst)); 288 __func__, rqst, rpcr_to_rdmar(rqst));
264 289
265 smp_mb__before_atomic();
266 WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
267 clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
268 smp_mb__after_atomic();
269
270 spin_lock_bh(&xprt->bc_pa_lock); 290 spin_lock_bh(&xprt->bc_pa_lock);
271 list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); 291 list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
272 spin_unlock_bh(&xprt->bc_pa_lock); 292 spin_unlock_bh(&xprt->bc_pa_lock);
@@ -274,7 +294,7 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
274 294
275/** 295/**
276 * rpcrdma_bc_receive_call - Handle a backward direction call 296 * rpcrdma_bc_receive_call - Handle a backward direction call
277 * @xprt: transport receiving the call 297 * @r_xprt: transport receiving the call
278 * @rep: receive buffer containing the call 298 * @rep: receive buffer containing the call
279 * 299 *
280 * Operational assumptions: 300 * Operational assumptions:
@@ -313,7 +333,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
313 struct rpc_rqst, rq_bc_pa_list); 333 struct rpc_rqst, rq_bc_pa_list);
314 list_del(&rqst->rq_bc_pa_list); 334 list_del(&rqst->rq_bc_pa_list);
315 spin_unlock(&xprt->bc_pa_lock); 335 spin_unlock(&xprt->bc_pa_lock);
316 dprintk("RPC: %s: using rqst %p\n", __func__, rqst);
317 336
318 /* Prepare rqst */ 337 /* Prepare rqst */
319 rqst->rq_reply_bytes_recvd = 0; 338 rqst->rq_reply_bytes_recvd = 0;
@@ -321,7 +340,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
321 rqst->rq_xid = *p; 340 rqst->rq_xid = *p;
322 341
323 rqst->rq_private_buf.len = size; 342 rqst->rq_private_buf.len = size;
324 set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
325 343
326 buf = &rqst->rq_rcv_buf; 344 buf = &rqst->rq_rcv_buf;
327 memset(buf, 0, sizeof(*buf)); 345 memset(buf, 0, sizeof(*buf));
@@ -335,12 +353,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
335 * the Upper Layer is done decoding it. 353 * the Upper Layer is done decoding it.
336 */ 354 */
337 req = rpcr_to_rdmar(rqst); 355 req = rpcr_to_rdmar(rqst);
338 dprintk("RPC: %s: attaching rep %p to req %p\n",
339 __func__, rep, req);
340 req->rl_reply = rep; 356 req->rl_reply = rep;
341 357 trace_xprtrdma_cb_call(rqst);
342 /* Defeat the retransmit detection logic in send_request */
343 req->rl_connect_cookie = 0;
344 358
345 /* Queue rqst for ULP's callback service */ 359 /* Queue rqst for ULP's callback service */
346 bc_serv = xprt->bc_serv; 360 bc_serv = xprt->bc_serv;
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 29fc84c7ff98..d5f95bb39300 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -1,6 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2/* 2/*
3 * Copyright (c) 2015 Oracle. All rights reserved. 3 * Copyright (c) 2015, 2017 Oracle. All rights reserved.
4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
5 */ 5 */
6 6
@@ -47,7 +47,7 @@ fmr_is_supported(struct rpcrdma_ia *ia)
47} 47}
48 48
49static int 49static int
50fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw) 50fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
51{ 51{
52 static struct ib_fmr_attr fmr_attr = { 52 static struct ib_fmr_attr fmr_attr = {
53 .max_pages = RPCRDMA_MAX_FMR_SGES, 53 .max_pages = RPCRDMA_MAX_FMR_SGES,
@@ -55,106 +55,108 @@ fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw)
55 .page_shift = PAGE_SHIFT 55 .page_shift = PAGE_SHIFT
56 }; 56 };
57 57
58 mw->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES, 58 mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
59 sizeof(u64), GFP_KERNEL); 59 sizeof(u64), GFP_KERNEL);
60 if (!mw->fmr.fm_physaddrs) 60 if (!mr->fmr.fm_physaddrs)
61 goto out_free; 61 goto out_free;
62 62
63 mw->mw_sg = kcalloc(RPCRDMA_MAX_FMR_SGES, 63 mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
64 sizeof(*mw->mw_sg), GFP_KERNEL); 64 sizeof(*mr->mr_sg), GFP_KERNEL);
65 if (!mw->mw_sg) 65 if (!mr->mr_sg)
66 goto out_free; 66 goto out_free;
67 67
68 sg_init_table(mw->mw_sg, RPCRDMA_MAX_FMR_SGES); 68 sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
69 69
70 mw->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS, 70 mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
71 &fmr_attr); 71 &fmr_attr);
72 if (IS_ERR(mw->fmr.fm_mr)) 72 if (IS_ERR(mr->fmr.fm_mr))
73 goto out_fmr_err; 73 goto out_fmr_err;
74 74
75 return 0; 75 return 0;
76 76
77out_fmr_err: 77out_fmr_err:
78 dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__, 78 dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
79 PTR_ERR(mw->fmr.fm_mr)); 79 PTR_ERR(mr->fmr.fm_mr));
80 80
81out_free: 81out_free:
82 kfree(mw->mw_sg); 82 kfree(mr->mr_sg);
83 kfree(mw->fmr.fm_physaddrs); 83 kfree(mr->fmr.fm_physaddrs);
84 return -ENOMEM; 84 return -ENOMEM;
85} 85}
86 86
87static int 87static int
88__fmr_unmap(struct rpcrdma_mw *mw) 88__fmr_unmap(struct rpcrdma_mr *mr)
89{ 89{
90 LIST_HEAD(l); 90 LIST_HEAD(l);
91 int rc; 91 int rc;
92 92
93 list_add(&mw->fmr.fm_mr->list, &l); 93 list_add(&mr->fmr.fm_mr->list, &l);
94 rc = ib_unmap_fmr(&l); 94 rc = ib_unmap_fmr(&l);
95 list_del(&mw->fmr.fm_mr->list); 95 list_del(&mr->fmr.fm_mr->list);
96 return rc; 96 return rc;
97} 97}
98 98
99static void 99static void
100fmr_op_release_mr(struct rpcrdma_mw *r) 100fmr_op_release_mr(struct rpcrdma_mr *mr)
101{ 101{
102 LIST_HEAD(unmap_list); 102 LIST_HEAD(unmap_list);
103 int rc; 103 int rc;
104 104
105 /* Ensure MW is not on any rl_registered list */ 105 /* Ensure MW is not on any rl_registered list */
106 if (!list_empty(&r->mw_list)) 106 if (!list_empty(&mr->mr_list))
107 list_del(&r->mw_list); 107 list_del(&mr->mr_list);
108 108
109 kfree(r->fmr.fm_physaddrs); 109 kfree(mr->fmr.fm_physaddrs);
110 kfree(r->mw_sg); 110 kfree(mr->mr_sg);
111 111
112 /* In case this one was left mapped, try to unmap it 112 /* In case this one was left mapped, try to unmap it
113 * to prevent dealloc_fmr from failing with EBUSY 113 * to prevent dealloc_fmr from failing with EBUSY
114 */ 114 */
115 rc = __fmr_unmap(r); 115 rc = __fmr_unmap(mr);
116 if (rc) 116 if (rc)
117 pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n", 117 pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
118 r, rc); 118 mr, rc);
119 119
120 rc = ib_dealloc_fmr(r->fmr.fm_mr); 120 rc = ib_dealloc_fmr(mr->fmr.fm_mr);
121 if (rc) 121 if (rc)
122 pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n", 122 pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n",
123 r, rc); 123 mr, rc);
124 124
125 kfree(r); 125 kfree(mr);
126} 126}
127 127
128/* Reset of a single FMR. 128/* Reset of a single FMR.
129 */ 129 */
130static void 130static void
131fmr_op_recover_mr(struct rpcrdma_mw *mw) 131fmr_op_recover_mr(struct rpcrdma_mr *mr)
132{ 132{
133 struct rpcrdma_xprt *r_xprt = mw->mw_xprt; 133 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
134 int rc; 134 int rc;
135 135
136 /* ORDER: invalidate first */ 136 /* ORDER: invalidate first */
137 rc = __fmr_unmap(mw); 137 rc = __fmr_unmap(mr);
138
139 /* ORDER: then DMA unmap */
140 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
141 mw->mw_sg, mw->mw_nents, mw->mw_dir);
142 if (rc) 138 if (rc)
143 goto out_release; 139 goto out_release;
144 140
145 rpcrdma_put_mw(r_xprt, mw); 141 /* ORDER: then DMA unmap */
142 rpcrdma_mr_unmap_and_put(mr);
143
146 r_xprt->rx_stats.mrs_recovered++; 144 r_xprt->rx_stats.mrs_recovered++;
147 return; 145 return;
148 146
149out_release: 147out_release:
150 pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mw); 148 pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr);
151 r_xprt->rx_stats.mrs_orphaned++; 149 r_xprt->rx_stats.mrs_orphaned++;
152 150
153 spin_lock(&r_xprt->rx_buf.rb_mwlock); 151 trace_xprtrdma_dma_unmap(mr);
154 list_del(&mw->mw_all); 152 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
155 spin_unlock(&r_xprt->rx_buf.rb_mwlock); 153 mr->mr_sg, mr->mr_nents, mr->mr_dir);
154
155 spin_lock(&r_xprt->rx_buf.rb_mrlock);
156 list_del(&mr->mr_all);
157 spin_unlock(&r_xprt->rx_buf.rb_mrlock);
156 158
157 fmr_op_release_mr(mw); 159 fmr_op_release_mr(mr);
158} 160}
159 161
160static int 162static int
@@ -180,15 +182,15 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
180 */ 182 */
181static struct rpcrdma_mr_seg * 183static struct rpcrdma_mr_seg *
182fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, 184fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
183 int nsegs, bool writing, struct rpcrdma_mw **out) 185 int nsegs, bool writing, struct rpcrdma_mr **out)
184{ 186{
185 struct rpcrdma_mr_seg *seg1 = seg; 187 struct rpcrdma_mr_seg *seg1 = seg;
186 int len, pageoff, i, rc; 188 int len, pageoff, i, rc;
187 struct rpcrdma_mw *mw; 189 struct rpcrdma_mr *mr;
188 u64 *dma_pages; 190 u64 *dma_pages;
189 191
190 mw = rpcrdma_get_mw(r_xprt); 192 mr = rpcrdma_mr_get(r_xprt);
191 if (!mw) 193 if (!mr)
192 return ERR_PTR(-ENOBUFS); 194 return ERR_PTR(-ENOBUFS);
193 195
194 pageoff = offset_in_page(seg1->mr_offset); 196 pageoff = offset_in_page(seg1->mr_offset);
@@ -199,12 +201,12 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
199 nsegs = RPCRDMA_MAX_FMR_SGES; 201 nsegs = RPCRDMA_MAX_FMR_SGES;
200 for (i = 0; i < nsegs;) { 202 for (i = 0; i < nsegs;) {
201 if (seg->mr_page) 203 if (seg->mr_page)
202 sg_set_page(&mw->mw_sg[i], 204 sg_set_page(&mr->mr_sg[i],
203 seg->mr_page, 205 seg->mr_page,
204 seg->mr_len, 206 seg->mr_len,
205 offset_in_page(seg->mr_offset)); 207 offset_in_page(seg->mr_offset));
206 else 208 else
207 sg_set_buf(&mw->mw_sg[i], seg->mr_offset, 209 sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
208 seg->mr_len); 210 seg->mr_len);
209 len += seg->mr_len; 211 len += seg->mr_len;
210 ++seg; 212 ++seg;
@@ -214,40 +216,38 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
214 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) 216 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
215 break; 217 break;
216 } 218 }
217 mw->mw_dir = rpcrdma_data_dir(writing); 219 mr->mr_dir = rpcrdma_data_dir(writing);
218 220
219 mw->mw_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device, 221 mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
220 mw->mw_sg, i, mw->mw_dir); 222 mr->mr_sg, i, mr->mr_dir);
221 if (!mw->mw_nents) 223 if (!mr->mr_nents)
222 goto out_dmamap_err; 224 goto out_dmamap_err;
223 225
224 for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++) 226 for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
225 dma_pages[i] = sg_dma_address(&mw->mw_sg[i]); 227 dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
226 rc = ib_map_phys_fmr(mw->fmr.fm_mr, dma_pages, mw->mw_nents, 228 rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents,
227 dma_pages[0]); 229 dma_pages[0]);
228 if (rc) 230 if (rc)
229 goto out_maperr; 231 goto out_maperr;
230 232
231 mw->mw_handle = mw->fmr.fm_mr->rkey; 233 mr->mr_handle = mr->fmr.fm_mr->rkey;
232 mw->mw_length = len; 234 mr->mr_length = len;
233 mw->mw_offset = dma_pages[0] + pageoff; 235 mr->mr_offset = dma_pages[0] + pageoff;
234 236
235 *out = mw; 237 *out = mr;
236 return seg; 238 return seg;
237 239
238out_dmamap_err: 240out_dmamap_err:
239 pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", 241 pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
240 mw->mw_sg, i); 242 mr->mr_sg, i);
241 rpcrdma_put_mw(r_xprt, mw); 243 rpcrdma_mr_put(mr);
242 return ERR_PTR(-EIO); 244 return ERR_PTR(-EIO);
243 245
244out_maperr: 246out_maperr:
245 pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", 247 pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
246 len, (unsigned long long)dma_pages[0], 248 len, (unsigned long long)dma_pages[0],
247 pageoff, mw->mw_nents, rc); 249 pageoff, mr->mr_nents, rc);
248 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, 250 rpcrdma_mr_unmap_and_put(mr);
249 mw->mw_sg, mw->mw_nents, mw->mw_dir);
250 rpcrdma_put_mw(r_xprt, mw);
251 return ERR_PTR(-EIO); 251 return ERR_PTR(-EIO);
252} 252}
253 253
@@ -256,13 +256,13 @@ out_maperr:
256 * Sleeps until it is safe for the host CPU to access the 256 * Sleeps until it is safe for the host CPU to access the
257 * previously mapped memory regions. 257 * previously mapped memory regions.
258 * 258 *
259 * Caller ensures that @mws is not empty before the call. This 259 * Caller ensures that @mrs is not empty before the call. This
260 * function empties the list. 260 * function empties the list.
261 */ 261 */
262static void 262static void
263fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) 263fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
264{ 264{
265 struct rpcrdma_mw *mw; 265 struct rpcrdma_mr *mr;
266 LIST_HEAD(unmap_list); 266 LIST_HEAD(unmap_list);
267 int rc; 267 int rc;
268 268
@@ -271,10 +271,11 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
271 * ib_unmap_fmr() is slow, so use a single call instead 271 * ib_unmap_fmr() is slow, so use a single call instead
272 * of one call per mapped FMR. 272 * of one call per mapped FMR.
273 */ 273 */
274 list_for_each_entry(mw, mws, mw_list) { 274 list_for_each_entry(mr, mrs, mr_list) {
275 dprintk("RPC: %s: unmapping fmr %p\n", 275 dprintk("RPC: %s: unmapping fmr %p\n",
276 __func__, &mw->fmr); 276 __func__, &mr->fmr);
277 list_add_tail(&mw->fmr.fm_mr->list, &unmap_list); 277 trace_xprtrdma_localinv(mr);
278 list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
278 } 279 }
279 r_xprt->rx_stats.local_inv_needed++; 280 r_xprt->rx_stats.local_inv_needed++;
280 rc = ib_unmap_fmr(&unmap_list); 281 rc = ib_unmap_fmr(&unmap_list);
@@ -284,14 +285,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
284 /* ORDER: Now DMA unmap all of the req's MRs, and return 285 /* ORDER: Now DMA unmap all of the req's MRs, and return
285 * them to the free MW list. 286 * them to the free MW list.
286 */ 287 */
287 while (!list_empty(mws)) { 288 while (!list_empty(mrs)) {
288 mw = rpcrdma_pop_mw(mws); 289 mr = rpcrdma_mr_pop(mrs);
289 dprintk("RPC: %s: DMA unmapping fmr %p\n", 290 list_del(&mr->fmr.fm_mr->list);
290 __func__, &mw->fmr); 291 rpcrdma_mr_unmap_and_put(mr);
291 list_del(&mw->fmr.fm_mr->list);
292 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
293 mw->mw_sg, mw->mw_nents, mw->mw_dir);
294 rpcrdma_put_mw(r_xprt, mw);
295 } 292 }
296 293
297 return; 294 return;
@@ -299,10 +296,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
299out_reset: 296out_reset:
300 pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc); 297 pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
301 298
302 while (!list_empty(mws)) { 299 while (!list_empty(mrs)) {
303 mw = rpcrdma_pop_mw(mws); 300 mr = rpcrdma_mr_pop(mrs);
304 list_del(&mw->fmr.fm_mr->list); 301 list_del(&mr->fmr.fm_mr->list);
305 fmr_op_recover_mr(mw); 302 fmr_op_recover_mr(mr);
306 } 303 }
307} 304}
308 305
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 773e66e10a15..90f688f19783 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -1,11 +1,11 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2/* 2/*
3 * Copyright (c) 2015 Oracle. All rights reserved. 3 * Copyright (c) 2015, 2017 Oracle. All rights reserved.
4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
5 */ 5 */
6 6
7/* Lightweight memory registration using Fast Registration Work 7/* Lightweight memory registration using Fast Registration Work
8 * Requests (FRWR). Also referred to sometimes as FRMR mode. 8 * Requests (FRWR).
9 * 9 *
10 * FRWR features ordered asynchronous registration and deregistration 10 * FRWR features ordered asynchronous registration and deregistration
11 * of arbitrarily sized memory regions. This is the fastest and safest 11 * of arbitrarily sized memory regions. This is the fastest and safest
@@ -15,9 +15,9 @@
15/* Normal operation 15/* Normal operation
16 * 16 *
17 * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG 17 * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
18 * Work Request (frmr_op_map). When the RDMA operation is finished, this 18 * Work Request (frwr_op_map). When the RDMA operation is finished, this
19 * Memory Region is invalidated using a LOCAL_INV Work Request 19 * Memory Region is invalidated using a LOCAL_INV Work Request
20 * (frmr_op_unmap). 20 * (frwr_op_unmap_sync).
21 * 21 *
22 * Typically these Work Requests are not signaled, and neither are RDMA 22 * Typically these Work Requests are not signaled, and neither are RDMA
23 * SEND Work Requests (with the exception of signaling occasionally to 23 * SEND Work Requests (with the exception of signaling occasionally to
@@ -26,7 +26,7 @@
26 * 26 *
27 * As an optimization, frwr_op_unmap marks MRs INVALID before the 27 * As an optimization, frwr_op_unmap marks MRs INVALID before the
28 * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on 28 * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
29 * rb_mws immediately so that no work (like managing a linked list 29 * rb_mrs immediately so that no work (like managing a linked list
30 * under a spinlock) is needed in the completion upcall. 30 * under a spinlock) is needed in the completion upcall.
31 * 31 *
32 * But this means that frwr_op_map() can occasionally encounter an MR 32 * But this means that frwr_op_map() can occasionally encounter an MR
@@ -60,7 +60,7 @@
60 * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered 60 * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
61 * with ib_dereg_mr and then are re-initialized. Because MR recovery 61 * with ib_dereg_mr and then are re-initialized. Because MR recovery
62 * allocates fresh resources, it is deferred to a workqueue, and the 62 * allocates fresh resources, it is deferred to a workqueue, and the
63 * recovered MRs are placed back on the rb_mws list when recovery is 63 * recovered MRs are placed back on the rb_mrs list when recovery is
64 * complete. frwr_op_map allocates another MR for the current RPC while 64 * complete. frwr_op_map allocates another MR for the current RPC while
65 * the broken MR is reset. 65 * the broken MR is reset.
66 * 66 *
@@ -96,26 +96,26 @@ out_not_supported:
96} 96}
97 97
98static int 98static int
99frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) 99frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
100{ 100{
101 unsigned int depth = ia->ri_max_frmr_depth; 101 unsigned int depth = ia->ri_max_frwr_depth;
102 struct rpcrdma_frmr *f = &r->frmr; 102 struct rpcrdma_frwr *frwr = &mr->frwr;
103 int rc; 103 int rc;
104 104
105 f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); 105 frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
106 if (IS_ERR(f->fr_mr)) 106 if (IS_ERR(frwr->fr_mr))
107 goto out_mr_err; 107 goto out_mr_err;
108 108
109 r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL); 109 mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL);
110 if (!r->mw_sg) 110 if (!mr->mr_sg)
111 goto out_list_err; 111 goto out_list_err;
112 112
113 sg_init_table(r->mw_sg, depth); 113 sg_init_table(mr->mr_sg, depth);
114 init_completion(&f->fr_linv_done); 114 init_completion(&frwr->fr_linv_done);
115 return 0; 115 return 0;
116 116
117out_mr_err: 117out_mr_err:
118 rc = PTR_ERR(f->fr_mr); 118 rc = PTR_ERR(frwr->fr_mr);
119 dprintk("RPC: %s: ib_alloc_mr status %i\n", 119 dprintk("RPC: %s: ib_alloc_mr status %i\n",
120 __func__, rc); 120 __func__, rc);
121 return rc; 121 return rc;
@@ -124,83 +124,85 @@ out_list_err:
124 rc = -ENOMEM; 124 rc = -ENOMEM;
125 dprintk("RPC: %s: sg allocation failure\n", 125 dprintk("RPC: %s: sg allocation failure\n",
126 __func__); 126 __func__);
127 ib_dereg_mr(f->fr_mr); 127 ib_dereg_mr(frwr->fr_mr);
128 return rc; 128 return rc;
129} 129}
130 130
131static void 131static void
132frwr_op_release_mr(struct rpcrdma_mw *r) 132frwr_op_release_mr(struct rpcrdma_mr *mr)
133{ 133{
134 int rc; 134 int rc;
135 135
136 /* Ensure MW is not on any rl_registered list */ 136 /* Ensure MR is not on any rl_registered list */
137 if (!list_empty(&r->mw_list)) 137 if (!list_empty(&mr->mr_list))
138 list_del(&r->mw_list); 138 list_del(&mr->mr_list);
139 139
140 rc = ib_dereg_mr(r->frmr.fr_mr); 140 rc = ib_dereg_mr(mr->frwr.fr_mr);
141 if (rc) 141 if (rc)
142 pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", 142 pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
143 r, rc); 143 mr, rc);
144 kfree(r->mw_sg); 144 kfree(mr->mr_sg);
145 kfree(r); 145 kfree(mr);
146} 146}
147 147
148static int 148static int
149__frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) 149__frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
150{ 150{
151 struct rpcrdma_frmr *f = &r->frmr; 151 struct rpcrdma_frwr *frwr = &mr->frwr;
152 int rc; 152 int rc;
153 153
154 rc = ib_dereg_mr(f->fr_mr); 154 rc = ib_dereg_mr(frwr->fr_mr);
155 if (rc) { 155 if (rc) {
156 pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n", 156 pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
157 rc, r); 157 rc, mr);
158 return rc; 158 return rc;
159 } 159 }
160 160
161 f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, 161 frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
162 ia->ri_max_frmr_depth); 162 ia->ri_max_frwr_depth);
163 if (IS_ERR(f->fr_mr)) { 163 if (IS_ERR(frwr->fr_mr)) {
164 pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", 164 pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
165 PTR_ERR(f->fr_mr), r); 165 PTR_ERR(frwr->fr_mr), mr);
166 return PTR_ERR(f->fr_mr); 166 return PTR_ERR(frwr->fr_mr);
167 } 167 }
168 168
169 dprintk("RPC: %s: recovered FRMR %p\n", __func__, f); 169 dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr);
170 f->fr_state = FRMR_IS_INVALID; 170 frwr->fr_state = FRWR_IS_INVALID;
171 return 0; 171 return 0;
172} 172}
173 173
174/* Reset of a single FRMR. Generate a fresh rkey by replacing the MR. 174/* Reset of a single FRWR. Generate a fresh rkey by replacing the MR.
175 */ 175 */
176static void 176static void
177frwr_op_recover_mr(struct rpcrdma_mw *mw) 177frwr_op_recover_mr(struct rpcrdma_mr *mr)
178{ 178{
179 enum rpcrdma_frmr_state state = mw->frmr.fr_state; 179 enum rpcrdma_frwr_state state = mr->frwr.fr_state;
180 struct rpcrdma_xprt *r_xprt = mw->mw_xprt; 180 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
181 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 181 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
182 int rc; 182 int rc;
183 183
184 rc = __frwr_reset_mr(ia, mw); 184 rc = __frwr_mr_reset(ia, mr);
185 if (state != FRMR_FLUSHED_LI) 185 if (state != FRWR_FLUSHED_LI) {
186 trace_xprtrdma_dma_unmap(mr);
186 ib_dma_unmap_sg(ia->ri_device, 187 ib_dma_unmap_sg(ia->ri_device,
187 mw->mw_sg, mw->mw_nents, mw->mw_dir); 188 mr->mr_sg, mr->mr_nents, mr->mr_dir);
189 }
188 if (rc) 190 if (rc)
189 goto out_release; 191 goto out_release;
190 192
191 rpcrdma_put_mw(r_xprt, mw); 193 rpcrdma_mr_put(mr);
192 r_xprt->rx_stats.mrs_recovered++; 194 r_xprt->rx_stats.mrs_recovered++;
193 return; 195 return;
194 196
195out_release: 197out_release:
196 pr_err("rpcrdma: FRMR reset failed %d, %p release\n", rc, mw); 198 pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr);
197 r_xprt->rx_stats.mrs_orphaned++; 199 r_xprt->rx_stats.mrs_orphaned++;
198 200
199 spin_lock(&r_xprt->rx_buf.rb_mwlock); 201 spin_lock(&r_xprt->rx_buf.rb_mrlock);
200 list_del(&mw->mw_all); 202 list_del(&mr->mr_all);
201 spin_unlock(&r_xprt->rx_buf.rb_mwlock); 203 spin_unlock(&r_xprt->rx_buf.rb_mrlock);
202 204
203 frwr_op_release_mr(mw); 205 frwr_op_release_mr(mr);
204} 206}
205 207
206static int 208static int
@@ -214,31 +216,31 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
214 if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) 216 if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
215 ia->ri_mrtype = IB_MR_TYPE_SG_GAPS; 217 ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
216 218
217 ia->ri_max_frmr_depth = 219 ia->ri_max_frwr_depth =
218 min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, 220 min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
219 attrs->max_fast_reg_page_list_len); 221 attrs->max_fast_reg_page_list_len);
220 dprintk("RPC: %s: device's max FR page list len = %u\n", 222 dprintk("RPC: %s: device's max FR page list len = %u\n",
221 __func__, ia->ri_max_frmr_depth); 223 __func__, ia->ri_max_frwr_depth);
222 224
223 /* Add room for frmr register and invalidate WRs. 225 /* Add room for frwr register and invalidate WRs.
224 * 1. FRMR reg WR for head 226 * 1. FRWR reg WR for head
225 * 2. FRMR invalidate WR for head 227 * 2. FRWR invalidate WR for head
226 * 3. N FRMR reg WRs for pagelist 228 * 3. N FRWR reg WRs for pagelist
227 * 4. N FRMR invalidate WRs for pagelist 229 * 4. N FRWR invalidate WRs for pagelist
228 * 5. FRMR reg WR for tail 230 * 5. FRWR reg WR for tail
229 * 6. FRMR invalidate WR for tail 231 * 6. FRWR invalidate WR for tail
230 * 7. The RDMA_SEND WR 232 * 7. The RDMA_SEND WR
231 */ 233 */
232 depth = 7; 234 depth = 7;
233 235
234 /* Calculate N if the device max FRMR depth is smaller than 236 /* Calculate N if the device max FRWR depth is smaller than
235 * RPCRDMA_MAX_DATA_SEGS. 237 * RPCRDMA_MAX_DATA_SEGS.
236 */ 238 */
237 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { 239 if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) {
238 delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth; 240 delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth;
239 do { 241 do {
240 depth += 2; /* FRMR reg + invalidate */ 242 depth += 2; /* FRWR reg + invalidate */
241 delta -= ia->ri_max_frmr_depth; 243 delta -= ia->ri_max_frwr_depth;
242 } while (delta > 0); 244 } while (delta > 0);
243 } 245 }
244 246
@@ -252,7 +254,7 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
252 } 254 }
253 255
254 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / 256 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
255 ia->ri_max_frmr_depth); 257 ia->ri_max_frwr_depth);
256 return 0; 258 return 0;
257} 259}
258 260
@@ -265,7 +267,7 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
265 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 267 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
266 268
267 return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, 269 return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
268 RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frmr_depth); 270 RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);
269} 271}
270 272
271static void 273static void
@@ -286,16 +288,16 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
286static void 288static void
287frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) 289frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
288{ 290{
289 struct rpcrdma_frmr *frmr; 291 struct ib_cqe *cqe = wc->wr_cqe;
290 struct ib_cqe *cqe; 292 struct rpcrdma_frwr *frwr =
293 container_of(cqe, struct rpcrdma_frwr, fr_cqe);
291 294
292 /* WARNING: Only wr_cqe and status are reliable at this point */ 295 /* WARNING: Only wr_cqe and status are reliable at this point */
293 if (wc->status != IB_WC_SUCCESS) { 296 if (wc->status != IB_WC_SUCCESS) {
294 cqe = wc->wr_cqe; 297 frwr->fr_state = FRWR_FLUSHED_FR;
295 frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
296 frmr->fr_state = FRMR_FLUSHED_FR;
297 __frwr_sendcompletion_flush(wc, "fastreg"); 298 __frwr_sendcompletion_flush(wc, "fastreg");
298 } 299 }
300 trace_xprtrdma_wc_fastreg(wc, frwr);
299} 301}
300 302
301/** 303/**
@@ -307,16 +309,16 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
307static void 309static void
308frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) 310frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
309{ 311{
310 struct rpcrdma_frmr *frmr; 312 struct ib_cqe *cqe = wc->wr_cqe;
311 struct ib_cqe *cqe; 313 struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
314 fr_cqe);
312 315
313 /* WARNING: Only wr_cqe and status are reliable at this point */ 316 /* WARNING: Only wr_cqe and status are reliable at this point */
314 if (wc->status != IB_WC_SUCCESS) { 317 if (wc->status != IB_WC_SUCCESS) {
315 cqe = wc->wr_cqe; 318 frwr->fr_state = FRWR_FLUSHED_LI;
316 frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
317 frmr->fr_state = FRMR_FLUSHED_LI;
318 __frwr_sendcompletion_flush(wc, "localinv"); 319 __frwr_sendcompletion_flush(wc, "localinv");
319 } 320 }
321 trace_xprtrdma_wc_li(wc, frwr);
320} 322}
321 323
322/** 324/**
@@ -329,17 +331,17 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
329static void 331static void
330frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) 332frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
331{ 333{
332 struct rpcrdma_frmr *frmr; 334 struct ib_cqe *cqe = wc->wr_cqe;
333 struct ib_cqe *cqe; 335 struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
336 fr_cqe);
334 337
335 /* WARNING: Only wr_cqe and status are reliable at this point */ 338 /* WARNING: Only wr_cqe and status are reliable at this point */
336 cqe = wc->wr_cqe;
337 frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
338 if (wc->status != IB_WC_SUCCESS) { 339 if (wc->status != IB_WC_SUCCESS) {
339 frmr->fr_state = FRMR_FLUSHED_LI; 340 frwr->fr_state = FRWR_FLUSHED_LI;
340 __frwr_sendcompletion_flush(wc, "localinv"); 341 __frwr_sendcompletion_flush(wc, "localinv");
341 } 342 }
342 complete(&frmr->fr_linv_done); 343 complete(&frwr->fr_linv_done);
344 trace_xprtrdma_wc_li_wake(wc, frwr);
343} 345}
344 346
345/* Post a REG_MR Work Request to register a memory region 347/* Post a REG_MR Work Request to register a memory region
@@ -347,41 +349,39 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
347 */ 349 */
348static struct rpcrdma_mr_seg * 350static struct rpcrdma_mr_seg *
349frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, 351frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
350 int nsegs, bool writing, struct rpcrdma_mw **out) 352 int nsegs, bool writing, struct rpcrdma_mr **out)
351{ 353{
352 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 354 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
353 bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; 355 bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
354 struct rpcrdma_mw *mw; 356 struct rpcrdma_frwr *frwr;
355 struct rpcrdma_frmr *frmr; 357 struct rpcrdma_mr *mr;
356 struct ib_mr *mr; 358 struct ib_mr *ibmr;
357 struct ib_reg_wr *reg_wr; 359 struct ib_reg_wr *reg_wr;
358 struct ib_send_wr *bad_wr; 360 struct ib_send_wr *bad_wr;
359 int rc, i, n; 361 int rc, i, n;
360 u8 key; 362 u8 key;
361 363
362 mw = NULL; 364 mr = NULL;
363 do { 365 do {
364 if (mw) 366 if (mr)
365 rpcrdma_defer_mr_recovery(mw); 367 rpcrdma_mr_defer_recovery(mr);
366 mw = rpcrdma_get_mw(r_xprt); 368 mr = rpcrdma_mr_get(r_xprt);
367 if (!mw) 369 if (!mr)
368 return ERR_PTR(-ENOBUFS); 370 return ERR_PTR(-ENOBUFS);
369 } while (mw->frmr.fr_state != FRMR_IS_INVALID); 371 } while (mr->frwr.fr_state != FRWR_IS_INVALID);
370 frmr = &mw->frmr; 372 frwr = &mr->frwr;
371 frmr->fr_state = FRMR_IS_VALID; 373 frwr->fr_state = FRWR_IS_VALID;
372 mr = frmr->fr_mr; 374
373 reg_wr = &frmr->fr_regwr; 375 if (nsegs > ia->ri_max_frwr_depth)
374 376 nsegs = ia->ri_max_frwr_depth;
375 if (nsegs > ia->ri_max_frmr_depth)
376 nsegs = ia->ri_max_frmr_depth;
377 for (i = 0; i < nsegs;) { 377 for (i = 0; i < nsegs;) {
378 if (seg->mr_page) 378 if (seg->mr_page)
379 sg_set_page(&mw->mw_sg[i], 379 sg_set_page(&mr->mr_sg[i],
380 seg->mr_page, 380 seg->mr_page,
381 seg->mr_len, 381 seg->mr_len,
382 offset_in_page(seg->mr_offset)); 382 offset_in_page(seg->mr_offset));
383 else 383 else
384 sg_set_buf(&mw->mw_sg[i], seg->mr_offset, 384 sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
385 seg->mr_len); 385 seg->mr_len);
386 386
387 ++seg; 387 ++seg;
@@ -392,30 +392,29 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
392 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) 392 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
393 break; 393 break;
394 } 394 }
395 mw->mw_dir = rpcrdma_data_dir(writing); 395 mr->mr_dir = rpcrdma_data_dir(writing);
396 396
397 mw->mw_nents = ib_dma_map_sg(ia->ri_device, mw->mw_sg, i, mw->mw_dir); 397 mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
398 if (!mw->mw_nents) 398 if (!mr->mr_nents)
399 goto out_dmamap_err; 399 goto out_dmamap_err;
400 400
401 n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE); 401 ibmr = frwr->fr_mr;
402 if (unlikely(n != mw->mw_nents)) 402 n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
403 if (unlikely(n != mr->mr_nents))
403 goto out_mapmr_err; 404 goto out_mapmr_err;
404 405
405 dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n", 406 key = (u8)(ibmr->rkey & 0x000000FF);
406 __func__, frmr, mw->mw_nents, mr->length); 407 ib_update_fast_reg_key(ibmr, ++key);
407
408 key = (u8)(mr->rkey & 0x000000FF);
409 ib_update_fast_reg_key(mr, ++key);
410 408
409 reg_wr = &frwr->fr_regwr;
411 reg_wr->wr.next = NULL; 410 reg_wr->wr.next = NULL;
412 reg_wr->wr.opcode = IB_WR_REG_MR; 411 reg_wr->wr.opcode = IB_WR_REG_MR;
413 frmr->fr_cqe.done = frwr_wc_fastreg; 412 frwr->fr_cqe.done = frwr_wc_fastreg;
414 reg_wr->wr.wr_cqe = &frmr->fr_cqe; 413 reg_wr->wr.wr_cqe = &frwr->fr_cqe;
415 reg_wr->wr.num_sge = 0; 414 reg_wr->wr.num_sge = 0;
416 reg_wr->wr.send_flags = 0; 415 reg_wr->wr.send_flags = 0;
417 reg_wr->mr = mr; 416 reg_wr->mr = ibmr;
418 reg_wr->key = mr->rkey; 417 reg_wr->key = ibmr->rkey;
419 reg_wr->access = writing ? 418 reg_wr->access = writing ?
420 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 419 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
421 IB_ACCESS_REMOTE_READ; 420 IB_ACCESS_REMOTE_READ;
@@ -424,47 +423,64 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
424 if (rc) 423 if (rc)
425 goto out_senderr; 424 goto out_senderr;
426 425
427 mw->mw_handle = mr->rkey; 426 mr->mr_handle = ibmr->rkey;
428 mw->mw_length = mr->length; 427 mr->mr_length = ibmr->length;
429 mw->mw_offset = mr->iova; 428 mr->mr_offset = ibmr->iova;
430 429
431 *out = mw; 430 *out = mr;
432 return seg; 431 return seg;
433 432
434out_dmamap_err: 433out_dmamap_err:
435 pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", 434 pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
436 mw->mw_sg, i); 435 mr->mr_sg, i);
437 frmr->fr_state = FRMR_IS_INVALID; 436 frwr->fr_state = FRWR_IS_INVALID;
438 rpcrdma_put_mw(r_xprt, mw); 437 rpcrdma_mr_put(mr);
439 return ERR_PTR(-EIO); 438 return ERR_PTR(-EIO);
440 439
441out_mapmr_err: 440out_mapmr_err:
442 pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", 441 pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
443 frmr->fr_mr, n, mw->mw_nents); 442 frwr->fr_mr, n, mr->mr_nents);
444 rpcrdma_defer_mr_recovery(mw); 443 rpcrdma_mr_defer_recovery(mr);
445 return ERR_PTR(-EIO); 444 return ERR_PTR(-EIO);
446 445
447out_senderr: 446out_senderr:
448 pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc); 447 pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc);
449 rpcrdma_defer_mr_recovery(mw); 448 rpcrdma_mr_defer_recovery(mr);
450 return ERR_PTR(-ENOTCONN); 449 return ERR_PTR(-ENOTCONN);
451} 450}
452 451
452/* Handle a remotely invalidated mr on the @mrs list
453 */
454static void
455frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
456{
457 struct rpcrdma_mr *mr;
458
459 list_for_each_entry(mr, mrs, mr_list)
460 if (mr->mr_handle == rep->rr_inv_rkey) {
461 list_del(&mr->mr_list);
462 trace_xprtrdma_remoteinv(mr);
463 mr->frwr.fr_state = FRWR_IS_INVALID;
464 rpcrdma_mr_unmap_and_put(mr);
465 break; /* only one invalidated MR per RPC */
466 }
467}
468
453/* Invalidate all memory regions that were registered for "req". 469/* Invalidate all memory regions that were registered for "req".
454 * 470 *
455 * Sleeps until it is safe for the host CPU to access the 471 * Sleeps until it is safe for the host CPU to access the
456 * previously mapped memory regions. 472 * previously mapped memory regions.
457 * 473 *
458 * Caller ensures that @mws is not empty before the call. This 474 * Caller ensures that @mrs is not empty before the call. This
459 * function empties the list. 475 * function empties the list.
460 */ 476 */
461static void 477static void
462frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) 478frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
463{ 479{
464 struct ib_send_wr *first, **prev, *last, *bad_wr; 480 struct ib_send_wr *first, **prev, *last, *bad_wr;
465 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 481 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
466 struct rpcrdma_frmr *f; 482 struct rpcrdma_frwr *frwr;
467 struct rpcrdma_mw *mw; 483 struct rpcrdma_mr *mr;
468 int count, rc; 484 int count, rc;
469 485
470 /* ORDER: Invalidate all of the MRs first 486 /* ORDER: Invalidate all of the MRs first
@@ -472,31 +488,27 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
472 * Chain the LOCAL_INV Work Requests and post them with 488 * Chain the LOCAL_INV Work Requests and post them with
473 * a single ib_post_send() call. 489 * a single ib_post_send() call.
474 */ 490 */
475 f = NULL; 491 frwr = NULL;
476 count = 0; 492 count = 0;
477 prev = &first; 493 prev = &first;
478 list_for_each_entry(mw, mws, mw_list) { 494 list_for_each_entry(mr, mrs, mr_list) {
479 mw->frmr.fr_state = FRMR_IS_INVALID; 495 mr->frwr.fr_state = FRWR_IS_INVALID;
480 496
481 if (mw->mw_flags & RPCRDMA_MW_F_RI) 497 frwr = &mr->frwr;
482 continue; 498 trace_xprtrdma_localinv(mr);
483 499
484 f = &mw->frmr; 500 frwr->fr_cqe.done = frwr_wc_localinv;
485 dprintk("RPC: %s: invalidating frmr %p\n", 501 last = &frwr->fr_invwr;
486 __func__, f);
487
488 f->fr_cqe.done = frwr_wc_localinv;
489 last = &f->fr_invwr;
490 memset(last, 0, sizeof(*last)); 502 memset(last, 0, sizeof(*last));
491 last->wr_cqe = &f->fr_cqe; 503 last->wr_cqe = &frwr->fr_cqe;
492 last->opcode = IB_WR_LOCAL_INV; 504 last->opcode = IB_WR_LOCAL_INV;
493 last->ex.invalidate_rkey = mw->mw_handle; 505 last->ex.invalidate_rkey = mr->mr_handle;
494 count++; 506 count++;
495 507
496 *prev = last; 508 *prev = last;
497 prev = &last->next; 509 prev = &last->next;
498 } 510 }
499 if (!f) 511 if (!frwr)
500 goto unmap; 512 goto unmap;
501 513
502 /* Strong send queue ordering guarantees that when the 514 /* Strong send queue ordering guarantees that when the
@@ -504,8 +516,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
504 * are complete. 516 * are complete.
505 */ 517 */
506 last->send_flags = IB_SEND_SIGNALED; 518 last->send_flags = IB_SEND_SIGNALED;
507 f->fr_cqe.done = frwr_wc_localinv_wake; 519 frwr->fr_cqe.done = frwr_wc_localinv_wake;
508 reinit_completion(&f->fr_linv_done); 520 reinit_completion(&frwr->fr_linv_done);
509 521
510 /* Transport disconnect drains the receive CQ before it 522 /* Transport disconnect drains the receive CQ before it
511 * replaces the QP. The RPC reply handler won't call us 523 * replaces the QP. The RPC reply handler won't call us
@@ -515,36 +527,32 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
515 bad_wr = NULL; 527 bad_wr = NULL;
516 rc = ib_post_send(ia->ri_id->qp, first, &bad_wr); 528 rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
517 if (bad_wr != first) 529 if (bad_wr != first)
518 wait_for_completion(&f->fr_linv_done); 530 wait_for_completion(&frwr->fr_linv_done);
519 if (rc) 531 if (rc)
520 goto reset_mrs; 532 goto reset_mrs;
521 533
522 /* ORDER: Now DMA unmap all of the MRs, and return 534 /* ORDER: Now DMA unmap all of the MRs, and return
523 * them to the free MW list. 535 * them to the free MR list.
524 */ 536 */
525unmap: 537unmap:
526 while (!list_empty(mws)) { 538 while (!list_empty(mrs)) {
527 mw = rpcrdma_pop_mw(mws); 539 mr = rpcrdma_mr_pop(mrs);
528 dprintk("RPC: %s: DMA unmapping frmr %p\n", 540 rpcrdma_mr_unmap_and_put(mr);
529 __func__, &mw->frmr);
530 ib_dma_unmap_sg(ia->ri_device,
531 mw->mw_sg, mw->mw_nents, mw->mw_dir);
532 rpcrdma_put_mw(r_xprt, mw);
533 } 541 }
534 return; 542 return;
535 543
536reset_mrs: 544reset_mrs:
537 pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc); 545 pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
538 546
539 /* Find and reset the MRs in the LOCAL_INV WRs that did not 547 /* Find and reset the MRs in the LOCAL_INV WRs that did not
540 * get posted. 548 * get posted.
541 */ 549 */
542 while (bad_wr) { 550 while (bad_wr) {
543 f = container_of(bad_wr, struct rpcrdma_frmr, 551 frwr = container_of(bad_wr, struct rpcrdma_frwr,
544 fr_invwr); 552 fr_invwr);
545 mw = container_of(f, struct rpcrdma_mw, frmr); 553 mr = container_of(frwr, struct rpcrdma_mr, frwr);
546 554
547 __frwr_reset_mr(ia, mw); 555 __frwr_mr_reset(ia, mr);
548 556
549 bad_wr = bad_wr->next; 557 bad_wr = bad_wr->next;
550 } 558 }
@@ -553,6 +561,7 @@ reset_mrs:
553 561
554const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { 562const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
555 .ro_map = frwr_op_map, 563 .ro_map = frwr_op_map,
564 .ro_reminv = frwr_op_reminv,
556 .ro_unmap_sync = frwr_op_unmap_sync, 565 .ro_unmap_sync = frwr_op_unmap_sync,
557 .ro_recover_mr = frwr_op_recover_mr, 566 .ro_recover_mr = frwr_op_recover_mr,
558 .ro_open = frwr_op_open, 567 .ro_open = frwr_op_open,
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
index 560712bd9fa2..a762d192372b 100644
--- a/net/sunrpc/xprtrdma/module.c
+++ b/net/sunrpc/xprtrdma/module.c
@@ -1,18 +1,20 @@
1/* 1/*
2 * Copyright (c) 2015 Oracle. All rights reserved. 2 * Copyright (c) 2015, 2017 Oracle. All rights reserved.
3 */ 3 */
4 4
5/* rpcrdma.ko module initialization 5/* rpcrdma.ko module initialization
6 */ 6 */
7 7
8#include <linux/types.h>
9#include <linux/compiler.h>
8#include <linux/module.h> 10#include <linux/module.h>
9#include <linux/init.h> 11#include <linux/init.h>
10#include <linux/sunrpc/svc_rdma.h> 12#include <linux/sunrpc/svc_rdma.h>
11#include "xprt_rdma.h"
12 13
13#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 14#include <asm/swab.h>
14# define RPCDBG_FACILITY RPCDBG_TRANS 15
15#endif 16#define CREATE_TRACE_POINTS
17#include "xprt_rdma.h"
16 18
17MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc."); 19MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
18MODULE_DESCRIPTION("RPC/RDMA Transport"); 20MODULE_DESCRIPTION("RPC/RDMA Transport");
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index a3f2ab283aeb..f0855a959a27 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -143,7 +143,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
143 if (xdr->page_len) { 143 if (xdr->page_len) {
144 remaining = xdr->page_len; 144 remaining = xdr->page_len;
145 offset = offset_in_page(xdr->page_base); 145 offset = offset_in_page(xdr->page_base);
146 count = 0; 146 count = RPCRDMA_MIN_SEND_SGES;
147 while (remaining) { 147 while (remaining) {
148 remaining -= min_t(unsigned int, 148 remaining -= min_t(unsigned int,
149 PAGE_SIZE - offset, remaining); 149 PAGE_SIZE - offset, remaining);
@@ -292,15 +292,15 @@ encode_item_not_present(struct xdr_stream *xdr)
292} 292}
293 293
294static void 294static void
295xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw) 295xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr)
296{ 296{
297 *iptr++ = cpu_to_be32(mw->mw_handle); 297 *iptr++ = cpu_to_be32(mr->mr_handle);
298 *iptr++ = cpu_to_be32(mw->mw_length); 298 *iptr++ = cpu_to_be32(mr->mr_length);
299 xdr_encode_hyper(iptr, mw->mw_offset); 299 xdr_encode_hyper(iptr, mr->mr_offset);
300} 300}
301 301
302static int 302static int
303encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw) 303encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
304{ 304{
305 __be32 *p; 305 __be32 *p;
306 306
@@ -308,12 +308,12 @@ encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw)
308 if (unlikely(!p)) 308 if (unlikely(!p))
309 return -EMSGSIZE; 309 return -EMSGSIZE;
310 310
311 xdr_encode_rdma_segment(p, mw); 311 xdr_encode_rdma_segment(p, mr);
312 return 0; 312 return 0;
313} 313}
314 314
315static int 315static int
316encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw, 316encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
317 u32 position) 317 u32 position)
318{ 318{
319 __be32 *p; 319 __be32 *p;
@@ -324,7 +324,7 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw,
324 324
325 *p++ = xdr_one; /* Item present */ 325 *p++ = xdr_one; /* Item present */
326 *p++ = cpu_to_be32(position); 326 *p++ = cpu_to_be32(position);
327 xdr_encode_rdma_segment(p, mw); 327 xdr_encode_rdma_segment(p, mr);
328 return 0; 328 return 0;
329} 329}
330 330
@@ -348,7 +348,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
348{ 348{
349 struct xdr_stream *xdr = &req->rl_stream; 349 struct xdr_stream *xdr = &req->rl_stream;
350 struct rpcrdma_mr_seg *seg; 350 struct rpcrdma_mr_seg *seg;
351 struct rpcrdma_mw *mw; 351 struct rpcrdma_mr *mr;
352 unsigned int pos; 352 unsigned int pos;
353 int nsegs; 353 int nsegs;
354 354
@@ -363,21 +363,17 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
363 363
364 do { 364 do {
365 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 365 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
366 false, &mw); 366 false, &mr);
367 if (IS_ERR(seg)) 367 if (IS_ERR(seg))
368 return PTR_ERR(seg); 368 return PTR_ERR(seg);
369 rpcrdma_push_mw(mw, &req->rl_registered); 369 rpcrdma_mr_push(mr, &req->rl_registered);
370 370
371 if (encode_read_segment(xdr, mw, pos) < 0) 371 if (encode_read_segment(xdr, mr, pos) < 0)
372 return -EMSGSIZE; 372 return -EMSGSIZE;
373 373
374 dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n", 374 trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs);
375 rqst->rq_task->tk_pid, __func__, pos,
376 mw->mw_length, (unsigned long long)mw->mw_offset,
377 mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
378
379 r_xprt->rx_stats.read_chunk_count++; 375 r_xprt->rx_stats.read_chunk_count++;
380 nsegs -= mw->mw_nents; 376 nsegs -= mr->mr_nents;
381 } while (nsegs); 377 } while (nsegs);
382 378
383 return 0; 379 return 0;
@@ -404,7 +400,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
404{ 400{
405 struct xdr_stream *xdr = &req->rl_stream; 401 struct xdr_stream *xdr = &req->rl_stream;
406 struct rpcrdma_mr_seg *seg; 402 struct rpcrdma_mr_seg *seg;
407 struct rpcrdma_mw *mw; 403 struct rpcrdma_mr *mr;
408 int nsegs, nchunks; 404 int nsegs, nchunks;
409 __be32 *segcount; 405 __be32 *segcount;
410 406
@@ -425,23 +421,19 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
425 nchunks = 0; 421 nchunks = 0;
426 do { 422 do {
427 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 423 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
428 true, &mw); 424 true, &mr);
429 if (IS_ERR(seg)) 425 if (IS_ERR(seg))
430 return PTR_ERR(seg); 426 return PTR_ERR(seg);
431 rpcrdma_push_mw(mw, &req->rl_registered); 427 rpcrdma_mr_push(mr, &req->rl_registered);
432 428
433 if (encode_rdma_segment(xdr, mw) < 0) 429 if (encode_rdma_segment(xdr, mr) < 0)
434 return -EMSGSIZE; 430 return -EMSGSIZE;
435 431
436 dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n", 432 trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs);
437 rqst->rq_task->tk_pid, __func__,
438 mw->mw_length, (unsigned long long)mw->mw_offset,
439 mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
440
441 r_xprt->rx_stats.write_chunk_count++; 433 r_xprt->rx_stats.write_chunk_count++;
442 r_xprt->rx_stats.total_rdma_request += seg->mr_len; 434 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
443 nchunks++; 435 nchunks++;
444 nsegs -= mw->mw_nents; 436 nsegs -= mr->mr_nents;
445 } while (nsegs); 437 } while (nsegs);
446 438
447 /* Update count of segments in this Write chunk */ 439 /* Update count of segments in this Write chunk */
@@ -468,7 +460,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
468{ 460{
469 struct xdr_stream *xdr = &req->rl_stream; 461 struct xdr_stream *xdr = &req->rl_stream;
470 struct rpcrdma_mr_seg *seg; 462 struct rpcrdma_mr_seg *seg;
471 struct rpcrdma_mw *mw; 463 struct rpcrdma_mr *mr;
472 int nsegs, nchunks; 464 int nsegs, nchunks;
473 __be32 *segcount; 465 __be32 *segcount;
474 466
@@ -487,23 +479,19 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
487 nchunks = 0; 479 nchunks = 0;
488 do { 480 do {
489 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 481 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
490 true, &mw); 482 true, &mr);
491 if (IS_ERR(seg)) 483 if (IS_ERR(seg))
492 return PTR_ERR(seg); 484 return PTR_ERR(seg);
493 rpcrdma_push_mw(mw, &req->rl_registered); 485 rpcrdma_mr_push(mr, &req->rl_registered);
494 486
495 if (encode_rdma_segment(xdr, mw) < 0) 487 if (encode_rdma_segment(xdr, mr) < 0)
496 return -EMSGSIZE; 488 return -EMSGSIZE;
497 489
498 dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n", 490 trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs);
499 rqst->rq_task->tk_pid, __func__,
500 mw->mw_length, (unsigned long long)mw->mw_offset,
501 mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
502
503 r_xprt->rx_stats.reply_chunk_count++; 491 r_xprt->rx_stats.reply_chunk_count++;
504 r_xprt->rx_stats.total_rdma_request += seg->mr_len; 492 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
505 nchunks++; 493 nchunks++;
506 nsegs -= mw->mw_nents; 494 nsegs -= mr->mr_nents;
507 } while (nsegs); 495 } while (nsegs);
508 496
509 /* Update count of segments in the Reply chunk */ 497 /* Update count of segments in the Reply chunk */
@@ -524,9 +512,6 @@ rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
524 struct ib_sge *sge; 512 struct ib_sge *sge;
525 unsigned int count; 513 unsigned int count;
526 514
527 dprintk("RPC: %s: unmapping %u sges for sc=%p\n",
528 __func__, sc->sc_unmap_count, sc);
529
530 /* The first two SGEs contain the transport header and 515 /* The first two SGEs contain the transport header and
531 * the inline buffer. These are always left mapped so 516 * the inline buffer. These are always left mapped so
532 * they can be cheaply re-used. 517 * they can be cheaply re-used.
@@ -754,11 +739,6 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
754 __be32 *p; 739 __be32 *p;
755 int ret; 740 int ret;
756 741
757#if defined(CONFIG_SUNRPC_BACKCHANNEL)
758 if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state))
759 return rpcrdma_bc_marshal_reply(rqst);
760#endif
761
762 rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); 742 rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
763 xdr_init_encode(xdr, &req->rl_hdrbuf, 743 xdr_init_encode(xdr, &req->rl_hdrbuf,
764 req->rl_rdmabuf->rg_base); 744 req->rl_rdmabuf->rg_base);
@@ -821,6 +801,17 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
821 rtype = rpcrdma_areadch; 801 rtype = rpcrdma_areadch;
822 } 802 }
823 803
804 /* If this is a retransmit, discard previously registered
805 * chunks. Very likely the connection has been replaced,
806 * so these registrations are invalid and unusable.
807 */
808 while (unlikely(!list_empty(&req->rl_registered))) {
809 struct rpcrdma_mr *mr;
810
811 mr = rpcrdma_mr_pop(&req->rl_registered);
812 rpcrdma_mr_defer_recovery(mr);
813 }
814
824 /* This implementation supports the following combinations 815 /* This implementation supports the following combinations
825 * of chunk lists in one RPC-over-RDMA Call message: 816 * of chunk lists in one RPC-over-RDMA Call message:
826 * 817 *
@@ -868,10 +859,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
868 if (ret) 859 if (ret)
869 goto out_err; 860 goto out_err;
870 861
871 dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n", 862 trace_xprtrdma_marshal(rqst, xdr_stream_pos(xdr), rtype, wtype);
872 rqst->rq_task->tk_pid, __func__,
873 transfertypes[rtype], transfertypes[wtype],
874 xdr_stream_pos(xdr));
875 863
876 ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr), 864 ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
877 &rqst->rq_snd_buf, rtype); 865 &rqst->rq_snd_buf, rtype);
@@ -926,8 +914,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
926 curlen = rqst->rq_rcv_buf.head[0].iov_len; 914 curlen = rqst->rq_rcv_buf.head[0].iov_len;
927 if (curlen > copy_len) 915 if (curlen > copy_len)
928 curlen = copy_len; 916 curlen = copy_len;
929 dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n", 917 trace_xprtrdma_fixup(rqst, copy_len, curlen);
930 __func__, srcp, copy_len, curlen);
931 srcp += curlen; 918 srcp += curlen;
932 copy_len -= curlen; 919 copy_len -= curlen;
933 920
@@ -947,9 +934,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
947 if (curlen > pagelist_len) 934 if (curlen > pagelist_len)
948 curlen = pagelist_len; 935 curlen = pagelist_len;
949 936
950 dprintk("RPC: %s: page %d" 937 trace_xprtrdma_fixup_pg(rqst, i, srcp,
951 " srcp 0x%p len %d curlen %d\n", 938 copy_len, curlen);
952 __func__, i, srcp, copy_len, curlen);
953 destp = kmap_atomic(ppages[i]); 939 destp = kmap_atomic(ppages[i]);
954 memcpy(destp + page_base, srcp, curlen); 940 memcpy(destp + page_base, srcp, curlen);
955 flush_dcache_page(ppages[i]); 941 flush_dcache_page(ppages[i]);
@@ -984,24 +970,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
984 return fixup_copy_count; 970 return fixup_copy_count;
985} 971}
986 972
987/* Caller must guarantee @rep remains stable during this call.
988 */
989static void
990rpcrdma_mark_remote_invalidation(struct list_head *mws,
991 struct rpcrdma_rep *rep)
992{
993 struct rpcrdma_mw *mw;
994
995 if (!(rep->rr_wc_flags & IB_WC_WITH_INVALIDATE))
996 return;
997
998 list_for_each_entry(mw, mws, mw_list)
999 if (mw->mw_handle == rep->rr_inv_rkey) {
1000 mw->mw_flags = RPCRDMA_MW_F_RI;
1001 break; /* only one invalidated MR per RPC */
1002 }
1003}
1004
1005/* By convention, backchannel calls arrive via rdma_msg type 973/* By convention, backchannel calls arrive via rdma_msg type
1006 * messages, and never populate the chunk lists. This makes 974 * messages, and never populate the chunk lists. This makes
1007 * the RPC/RDMA header small and fixed in size, so it is 975 * the RPC/RDMA header small and fixed in size, so it is
@@ -1058,26 +1026,19 @@ out_short:
1058 1026
1059static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length) 1027static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
1060{ 1028{
1029 u32 handle;
1030 u64 offset;
1061 __be32 *p; 1031 __be32 *p;
1062 1032
1063 p = xdr_inline_decode(xdr, 4 * sizeof(*p)); 1033 p = xdr_inline_decode(xdr, 4 * sizeof(*p));
1064 if (unlikely(!p)) 1034 if (unlikely(!p))
1065 return -EIO; 1035 return -EIO;
1066 1036
1067 ifdebug(FACILITY) { 1037 handle = be32_to_cpup(p++);
1068 u64 offset; 1038 *length = be32_to_cpup(p++);
1069 u32 handle; 1039 xdr_decode_hyper(p, &offset);
1070
1071 handle = be32_to_cpup(p++);
1072 *length = be32_to_cpup(p++);
1073 xdr_decode_hyper(p, &offset);
1074 dprintk("RPC: %s: segment %u@0x%016llx:0x%08x\n",
1075 __func__, *length, (unsigned long long)offset,
1076 handle);
1077 } else {
1078 *length = be32_to_cpup(p + 1);
1079 }
1080 1040
1041 trace_xprtrdma_decode_seg(handle, *length, offset);
1081 return 0; 1042 return 0;
1082} 1043}
1083 1044
@@ -1098,8 +1059,6 @@ static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
1098 *length += seglength; 1059 *length += seglength;
1099 } 1060 }
1100 1061
1101 dprintk("RPC: %s: segcount=%u, %u bytes\n",
1102 __func__, be32_to_cpup(p), *length);
1103 return 0; 1062 return 0;
1104} 1063}
1105 1064
@@ -1296,8 +1255,7 @@ out:
1296 * being marshaled. 1255 * being marshaled.
1297 */ 1256 */
1298out_badheader: 1257out_badheader:
1299 dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n", 1258 trace_xprtrdma_reply_hdr(rep);
1300 rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc));
1301 r_xprt->rx_stats.bad_reply_count++; 1259 r_xprt->rx_stats.bad_reply_count++;
1302 status = -EIO; 1260 status = -EIO;
1303 goto out; 1261 goto out;
@@ -1339,9 +1297,12 @@ void rpcrdma_deferred_completion(struct work_struct *work)
1339 struct rpcrdma_rep *rep = 1297 struct rpcrdma_rep *rep =
1340 container_of(work, struct rpcrdma_rep, rr_work); 1298 container_of(work, struct rpcrdma_rep, rr_work);
1341 struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst); 1299 struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
1300 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1342 1301
1343 rpcrdma_mark_remote_invalidation(&req->rl_registered, rep); 1302 trace_xprtrdma_defer_cmp(rep);
1344 rpcrdma_release_rqst(rep->rr_rxprt, req); 1303 if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
1304 r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered);
1305 rpcrdma_release_rqst(r_xprt, req);
1345 rpcrdma_complete_rqst(rep); 1306 rpcrdma_complete_rqst(rep);
1346} 1307}
1347 1308
@@ -1360,8 +1321,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1360 u32 credits; 1321 u32 credits;
1361 __be32 *p; 1322 __be32 *p;
1362 1323
1363 dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
1364
1365 if (rep->rr_hdrbuf.head[0].iov_len == 0) 1324 if (rep->rr_hdrbuf.head[0].iov_len == 0)
1366 goto out_badstatus; 1325 goto out_badstatus;
1367 1326
@@ -1405,8 +1364,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1405 rep->rr_rqst = rqst; 1364 rep->rr_rqst = rqst;
1406 clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); 1365 clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
1407 1366
1408 dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n", 1367 trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
1409 __func__, rep, req, be32_to_cpu(rep->rr_xid));
1410 1368
1411 queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work); 1369 queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
1412 return; 1370 return;
@@ -1420,8 +1378,7 @@ out_badstatus:
1420 return; 1378 return;
1421 1379
1422out_badversion: 1380out_badversion:
1423 dprintk("RPC: %s: invalid version %d\n", 1381 trace_xprtrdma_reply_vers(rep);
1424 __func__, be32_to_cpu(rep->rr_vers));
1425 goto repost; 1382 goto repost;
1426 1383
1427/* The RPC transaction has already been terminated, or the header 1384/* The RPC transaction has already been terminated, or the header
@@ -1429,12 +1386,11 @@ out_badversion:
1429 */ 1386 */
1430out_norqst: 1387out_norqst:
1431 spin_unlock(&xprt->recv_lock); 1388 spin_unlock(&xprt->recv_lock);
1432 dprintk("RPC: %s: no match for incoming xid 0x%08x\n", 1389 trace_xprtrdma_reply_rqst(rep);
1433 __func__, be32_to_cpu(rep->rr_xid));
1434 goto repost; 1390 goto repost;
1435 1391
1436out_shortreply: 1392out_shortreply:
1437 dprintk("RPC: %s: short/invalid reply\n", __func__); 1393 trace_xprtrdma_reply_short(rep);
1438 1394
1439/* If no pending RPC transaction was matched, post a replacement 1395/* If no pending RPC transaction was matched, post a replacement
1440 * receive buffer before returning. 1396 * receive buffer before returning.
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index af7893501e40..a73632ca9048 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -95,7 +95,6 @@ out_shortreply:
95out_notfound: 95out_notfound:
96 dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n", 96 dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n",
97 xprt, be32_to_cpu(xid)); 97 xprt, be32_to_cpu(xid));
98
99 goto out_unlock; 98 goto out_unlock;
100} 99}
101 100
@@ -129,10 +128,6 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
129 if (ret < 0) 128 if (ret < 0)
130 goto out_err; 129 goto out_err;
131 130
132 ret = svc_rdma_repost_recv(rdma, GFP_NOIO);
133 if (ret)
134 goto out_err;
135
136 /* Bump page refcnt so Send completion doesn't release 131 /* Bump page refcnt so Send completion doesn't release
137 * the rq_buffer before all retransmits are complete. 132 * the rq_buffer before all retransmits are complete.
138 */ 133 */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index ad4bd62eebf1..19e9c6b33042 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -400,10 +400,6 @@ static void svc_rdma_send_error(struct svcxprt_rdma *xprt,
400 struct page *page; 400 struct page *page;
401 int ret; 401 int ret;
402 402
403 ret = svc_rdma_repost_recv(xprt, GFP_KERNEL);
404 if (ret)
405 return;
406
407 page = alloc_page(GFP_KERNEL); 403 page = alloc_page(GFP_KERNEL);
408 if (!page) 404 if (!page)
409 return; 405 return;
@@ -554,8 +550,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
554 ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p, 550 ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p,
555 &rqstp->rq_arg); 551 &rqstp->rq_arg);
556 svc_rdma_put_context(ctxt, 0); 552 svc_rdma_put_context(ctxt, 0);
557 if (ret)
558 goto repost;
559 return ret; 553 return ret;
560 } 554 }
561 555
@@ -590,6 +584,5 @@ out_postfail:
590 584
591out_drop: 585out_drop:
592 svc_rdma_put_context(ctxt, 1); 586 svc_rdma_put_context(ctxt, 1);
593repost: 587 return 0;
594 return svc_rdma_repost_recv(rdma_xprt, GFP_KERNEL);
595} 588}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 9bd04549a1ad..12b9a7e0b6d2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -727,12 +727,16 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
727 head->arg.head[0].iov_len - info->ri_position; 727 head->arg.head[0].iov_len - info->ri_position;
728 head->arg.head[0].iov_len = info->ri_position; 728 head->arg.head[0].iov_len = info->ri_position;
729 729
730 /* Read chunk may need XDR roundup (see RFC 5666, s. 3.7). 730 /* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2).
731 * 731 *
732 * NFSv2/3 write decoders need the length of the tail to 732 * If the client already rounded up the chunk length, the
733 * contain the size of the roundup padding. 733 * length does not change. Otherwise, the length of the page
734 * list is increased to include XDR round-up.
735 *
736 * Currently these chunks always start at page offset 0,
737 * thus the rounded-up length never crosses a page boundary.
734 */ 738 */
735 head->arg.tail[0].iov_len += 4 - (info->ri_chunklen & 3); 739 info->ri_chunklen = XDR_QUADLEN(info->ri_chunklen) << 2;
736 740
737 head->arg.page_len = info->ri_chunklen; 741 head->arg.page_len = info->ri_chunklen;
738 head->arg.len += info->ri_chunklen; 742 head->arg.len += info->ri_chunklen;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 7c3a211e0e9a..649441d5087d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -674,9 +674,6 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
674 svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret); 674 svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret);
675 } 675 }
676 676
677 ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
678 if (ret)
679 goto err1;
680 ret = svc_rdma_send_reply_msg(rdma, rdma_argp, rdma_resp, rqstp, 677 ret = svc_rdma_send_reply_msg(rdma, rdma_argp, rdma_resp, rqstp,
681 wr_lst, rp_ch); 678 wr_lst, rp_ch);
682 if (ret < 0) 679 if (ret < 0)
@@ -687,9 +684,6 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
687 if (ret != -E2BIG && ret != -EINVAL) 684 if (ret != -E2BIG && ret != -EINVAL)
688 goto err1; 685 goto err1;
689 686
690 ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
691 if (ret)
692 goto err1;
693 ret = svc_rdma_send_error_msg(rdma, rdma_resp, rqstp); 687 ret = svc_rdma_send_error_msg(rdma, rdma_resp, rqstp);
694 if (ret < 0) 688 if (ret < 0)
695 goto err0; 689 goto err0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 46ec069150d5..9ad12a215b51 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -58,6 +58,7 @@
58 58
59#define RPCDBG_FACILITY RPCDBG_SVCXPRT 59#define RPCDBG_FACILITY RPCDBG_SVCXPRT
60 60
61static int svc_rdma_post_recv(struct svcxprt_rdma *xprt);
61static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *, int); 62static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *, int);
62static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 63static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
63 struct net *net, 64 struct net *net,
@@ -320,6 +321,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
320 list_add_tail(&ctxt->list, &xprt->sc_rq_dto_q); 321 list_add_tail(&ctxt->list, &xprt->sc_rq_dto_q);
321 spin_unlock(&xprt->sc_rq_dto_lock); 322 spin_unlock(&xprt->sc_rq_dto_lock);
322 323
324 svc_rdma_post_recv(xprt);
325
323 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); 326 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
324 if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) 327 if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
325 goto out; 328 goto out;
@@ -404,7 +407,8 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
404 return cma_xprt; 407 return cma_xprt;
405} 408}
406 409
407int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags) 410static int
411svc_rdma_post_recv(struct svcxprt_rdma *xprt)
408{ 412{
409 struct ib_recv_wr recv_wr, *bad_recv_wr; 413 struct ib_recv_wr recv_wr, *bad_recv_wr;
410 struct svc_rdma_op_ctxt *ctxt; 414 struct svc_rdma_op_ctxt *ctxt;
@@ -423,7 +427,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
423 pr_err("svcrdma: Too many sges (%d)\n", sge_no); 427 pr_err("svcrdma: Too many sges (%d)\n", sge_no);
424 goto err_put_ctxt; 428 goto err_put_ctxt;
425 } 429 }
426 page = alloc_page(flags); 430 page = alloc_page(GFP_KERNEL);
427 if (!page) 431 if (!page)
428 goto err_put_ctxt; 432 goto err_put_ctxt;
429 ctxt->pages[sge_no] = page; 433 ctxt->pages[sge_no] = page;
@@ -459,21 +463,6 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
459 return -ENOMEM; 463 return -ENOMEM;
460} 464}
461 465
462int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags)
463{
464 int ret = 0;
465
466 ret = svc_rdma_post_recv(xprt, flags);
467 if (ret) {
468 pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
469 ret);
470 pr_err("svcrdma: closing transport %p.\n", xprt);
471 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
472 ret = -ENOTCONN;
473 }
474 return ret;
475}
476
477static void 466static void
478svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt, 467svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt,
479 struct rdma_conn_param *param) 468 struct rdma_conn_param *param)
@@ -833,7 +822,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
833 822
834 /* Post receive buffers */ 823 /* Post receive buffers */
835 for (i = 0; i < newxprt->sc_max_requests; i++) { 824 for (i = 0; i < newxprt->sc_max_requests; i++) {
836 ret = svc_rdma_post_recv(newxprt, GFP_KERNEL); 825 ret = svc_rdma_post_recv(newxprt);
837 if (ret) { 826 if (ret) {
838 dprintk("svcrdma: failure posting receive buffers\n"); 827 dprintk("svcrdma: failure posting receive buffers\n");
839 goto errout; 828 goto errout;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 6ee1ad8978f3..4b1ecfe979cf 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -67,8 +67,7 @@
67static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; 67static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
68unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; 68unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
69static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; 69static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
70static unsigned int xprt_rdma_inline_write_padding; 70unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR;
71unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
72int xprt_rdma_pad_optimize; 71int xprt_rdma_pad_optimize;
73 72
74#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 73#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -81,6 +80,7 @@ static unsigned int zero;
81static unsigned int max_padding = PAGE_SIZE; 80static unsigned int max_padding = PAGE_SIZE;
82static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; 81static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
83static unsigned int max_memreg = RPCRDMA_LAST - 1; 82static unsigned int max_memreg = RPCRDMA_LAST - 1;
83static unsigned int dummy;
84 84
85static struct ctl_table_header *sunrpc_table_header; 85static struct ctl_table_header *sunrpc_table_header;
86 86
@@ -114,7 +114,7 @@ static struct ctl_table xr_tunables_table[] = {
114 }, 114 },
115 { 115 {
116 .procname = "rdma_inline_write_padding", 116 .procname = "rdma_inline_write_padding",
117 .data = &xprt_rdma_inline_write_padding, 117 .data = &dummy,
118 .maxlen = sizeof(unsigned int), 118 .maxlen = sizeof(unsigned int),
119 .mode = 0644, 119 .mode = 0644,
120 .proc_handler = proc_dointvec_minmax, 120 .proc_handler = proc_dointvec_minmax,
@@ -259,13 +259,10 @@ xprt_rdma_connect_worker(struct work_struct *work)
259 259
260 xprt_clear_connected(xprt); 260 xprt_clear_connected(xprt);
261 261
262 dprintk("RPC: %s: %sconnect\n", __func__,
263 r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
264 rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); 262 rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
265 if (rc) 263 if (rc)
266 xprt_wake_pending_tasks(xprt, rc); 264 xprt_wake_pending_tasks(xprt, rc);
267 265
268 dprintk("RPC: %s: exit\n", __func__);
269 xprt_clear_connecting(xprt); 266 xprt_clear_connecting(xprt);
270} 267}
271 268
@@ -275,7 +272,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
275 struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, 272 struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
276 rx_xprt); 273 rx_xprt);
277 274
278 pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt); 275 trace_xprtrdma_inject_dsc(r_xprt);
279 rdma_disconnect(r_xprt->rx_ia.ri_id); 276 rdma_disconnect(r_xprt->rx_ia.ri_id);
280} 277}
281 278
@@ -295,7 +292,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
295{ 292{
296 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 293 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
297 294
298 dprintk("RPC: %s: called\n", __func__); 295 trace_xprtrdma_destroy(r_xprt);
299 296
300 cancel_delayed_work_sync(&r_xprt->rx_connect_worker); 297 cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
301 298
@@ -306,11 +303,8 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
306 rpcrdma_ia_close(&r_xprt->rx_ia); 303 rpcrdma_ia_close(&r_xprt->rx_ia);
307 304
308 xprt_rdma_free_addresses(xprt); 305 xprt_rdma_free_addresses(xprt);
309
310 xprt_free(xprt); 306 xprt_free(xprt);
311 307
312 dprintk("RPC: %s: returning\n", __func__);
313
314 module_put(THIS_MODULE); 308 module_put(THIS_MODULE);
315} 309}
316 310
@@ -361,9 +355,7 @@ xprt_setup_rdma(struct xprt_create *args)
361 /* 355 /*
362 * Set up RDMA-specific connect data. 356 * Set up RDMA-specific connect data.
363 */ 357 */
364 358 sap = args->dstaddr;
365 sap = (struct sockaddr *)&cdata.addr;
366 memcpy(sap, args->dstaddr, args->addrlen);
367 359
368 /* Ensure xprt->addr holds valid server TCP (not RDMA) 360 /* Ensure xprt->addr holds valid server TCP (not RDMA)
369 * address, for any side protocols which peek at it */ 361 * address, for any side protocols which peek at it */
@@ -373,6 +365,7 @@ xprt_setup_rdma(struct xprt_create *args)
373 365
374 if (rpc_get_port(sap)) 366 if (rpc_get_port(sap))
375 xprt_set_bound(xprt); 367 xprt_set_bound(xprt);
368 xprt_rdma_format_addresses(xprt, sap);
376 369
377 cdata.max_requests = xprt->max_reqs; 370 cdata.max_requests = xprt->max_reqs;
378 371
@@ -387,8 +380,6 @@ xprt_setup_rdma(struct xprt_create *args)
387 if (cdata.inline_rsize > cdata.rsize) 380 if (cdata.inline_rsize > cdata.rsize)
388 cdata.inline_rsize = cdata.rsize; 381 cdata.inline_rsize = cdata.rsize;
389 382
390 cdata.padding = xprt_rdma_inline_write_padding;
391
392 /* 383 /*
393 * Create new transport instance, which includes initialized 384 * Create new transport instance, which includes initialized
394 * o ia 385 * o ia
@@ -398,7 +389,7 @@ xprt_setup_rdma(struct xprt_create *args)
398 389
399 new_xprt = rpcx_to_rdmax(xprt); 390 new_xprt = rpcx_to_rdmax(xprt);
400 391
401 rc = rpcrdma_ia_open(new_xprt, sap); 392 rc = rpcrdma_ia_open(new_xprt);
402 if (rc) 393 if (rc)
403 goto out1; 394 goto out1;
404 395
@@ -407,31 +398,19 @@ xprt_setup_rdma(struct xprt_create *args)
407 */ 398 */
408 new_xprt->rx_data = cdata; 399 new_xprt->rx_data = cdata;
409 new_ep = &new_xprt->rx_ep; 400 new_ep = &new_xprt->rx_ep;
410 new_ep->rep_remote_addr = cdata.addr;
411 401
412 rc = rpcrdma_ep_create(&new_xprt->rx_ep, 402 rc = rpcrdma_ep_create(&new_xprt->rx_ep,
413 &new_xprt->rx_ia, &new_xprt->rx_data); 403 &new_xprt->rx_ia, &new_xprt->rx_data);
414 if (rc) 404 if (rc)
415 goto out2; 405 goto out2;
416 406
417 /*
418 * Allocate pre-registered send and receive buffers for headers and
419 * any inline data. Also specify any padding which will be provided
420 * from a preregistered zero buffer.
421 */
422 rc = rpcrdma_buffer_create(new_xprt); 407 rc = rpcrdma_buffer_create(new_xprt);
423 if (rc) 408 if (rc)
424 goto out3; 409 goto out3;
425 410
426 /*
427 * Register a callback for connection events. This is necessary because
428 * connection loss notification is async. We also catch connection loss
429 * when reaping receives.
430 */
431 INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, 411 INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
432 xprt_rdma_connect_worker); 412 xprt_rdma_connect_worker);
433 413
434 xprt_rdma_format_addresses(xprt, sap);
435 xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); 414 xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
436 if (xprt->max_payload == 0) 415 if (xprt->max_payload == 0)
437 goto out4; 416 goto out4;
@@ -445,16 +424,19 @@ xprt_setup_rdma(struct xprt_create *args)
445 dprintk("RPC: %s: %s:%s\n", __func__, 424 dprintk("RPC: %s: %s:%s\n", __func__,
446 xprt->address_strings[RPC_DISPLAY_ADDR], 425 xprt->address_strings[RPC_DISPLAY_ADDR],
447 xprt->address_strings[RPC_DISPLAY_PORT]); 426 xprt->address_strings[RPC_DISPLAY_PORT]);
427 trace_xprtrdma_create(new_xprt);
448 return xprt; 428 return xprt;
449 429
450out4: 430out4:
451 xprt_rdma_free_addresses(xprt); 431 rpcrdma_buffer_destroy(&new_xprt->rx_buf);
452 rc = -EINVAL; 432 rc = -ENODEV;
453out3: 433out3:
454 rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); 434 rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
455out2: 435out2:
456 rpcrdma_ia_close(&new_xprt->rx_ia); 436 rpcrdma_ia_close(&new_xprt->rx_ia);
457out1: 437out1:
438 trace_xprtrdma_destroy(new_xprt);
439 xprt_rdma_free_addresses(xprt);
458 xprt_free(xprt); 440 xprt_free(xprt);
459 return ERR_PTR(rc); 441 return ERR_PTR(rc);
460} 442}
@@ -488,16 +470,34 @@ xprt_rdma_close(struct rpc_xprt *xprt)
488 rpcrdma_ep_disconnect(ep, ia); 470 rpcrdma_ep_disconnect(ep, ia);
489} 471}
490 472
473/**
474 * xprt_rdma_set_port - update server port with rpcbind result
475 * @xprt: controlling RPC transport
476 * @port: new port value
477 *
478 * Transport connect status is unchanged.
479 */
491static void 480static void
492xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) 481xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
493{ 482{
494 struct sockaddr_in *sap; 483 struct sockaddr *sap = (struct sockaddr *)&xprt->addr;
484 char buf[8];
495 485
496 sap = (struct sockaddr_in *)&xprt->addr; 486 dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n",
497 sap->sin_port = htons(port); 487 __func__, xprt,
498 sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr; 488 xprt->address_strings[RPC_DISPLAY_ADDR],
499 sap->sin_port = htons(port); 489 xprt->address_strings[RPC_DISPLAY_PORT],
500 dprintk("RPC: %s: %u\n", __func__, port); 490 port);
491
492 rpc_set_port(sap, port);
493
494 kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
495 snprintf(buf, sizeof(buf), "%u", port);
496 xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
497
498 kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
499 snprintf(buf, sizeof(buf), "%4hx", port);
500 xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
501} 501}
502 502
503/** 503/**
@@ -516,8 +516,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
516static void 516static void
517xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task) 517xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
518{ 518{
519 dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt);
520
521 xprt_force_disconnect(xprt); 519 xprt_force_disconnect(xprt);
522} 520}
523 521
@@ -640,7 +638,7 @@ xprt_rdma_allocate(struct rpc_task *task)
640 638
641 req = rpcrdma_buffer_get(&r_xprt->rx_buf); 639 req = rpcrdma_buffer_get(&r_xprt->rx_buf);
642 if (req == NULL) 640 if (req == NULL)
643 return -ENOMEM; 641 goto out_get;
644 642
645 flags = RPCRDMA_DEF_GFP; 643 flags = RPCRDMA_DEF_GFP;
646 if (RPC_IS_SWAPPER(task)) 644 if (RPC_IS_SWAPPER(task))
@@ -653,19 +651,18 @@ xprt_rdma_allocate(struct rpc_task *task)
653 if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) 651 if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
654 goto out_fail; 652 goto out_fail;
655 653
656 dprintk("RPC: %5u %s: send size = %zd, recv size = %zd, req = %p\n",
657 task->tk_pid, __func__, rqst->rq_callsize,
658 rqst->rq_rcvsize, req);
659
660 req->rl_cpu = smp_processor_id(); 654 req->rl_cpu = smp_processor_id();
661 req->rl_connect_cookie = 0; /* our reserved value */ 655 req->rl_connect_cookie = 0; /* our reserved value */
662 rpcrdma_set_xprtdata(rqst, req); 656 rpcrdma_set_xprtdata(rqst, req);
663 rqst->rq_buffer = req->rl_sendbuf->rg_base; 657 rqst->rq_buffer = req->rl_sendbuf->rg_base;
664 rqst->rq_rbuffer = req->rl_recvbuf->rg_base; 658 rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
659 trace_xprtrdma_allocate(task, req);
665 return 0; 660 return 0;
666 661
667out_fail: 662out_fail:
668 rpcrdma_buffer_put(req); 663 rpcrdma_buffer_put(req);
664out_get:
665 trace_xprtrdma_allocate(task, NULL);
669 return -ENOMEM; 666 return -ENOMEM;
670} 667}
671 668
@@ -682,13 +679,9 @@ xprt_rdma_free(struct rpc_task *task)
682 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); 679 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
683 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 680 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
684 681
685 if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags))
686 return;
687
688 dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
689
690 if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) 682 if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
691 rpcrdma_release_rqst(r_xprt, req); 683 rpcrdma_release_rqst(r_xprt, req);
684 trace_xprtrdma_rpc_done(task, req);
692 rpcrdma_buffer_put(req); 685 rpcrdma_buffer_put(req);
693} 686}
694 687
@@ -698,22 +691,12 @@ xprt_rdma_free(struct rpc_task *task)
698 * 691 *
699 * Caller holds the transport's write lock. 692 * Caller holds the transport's write lock.
700 * 693 *
701 * Return values: 694 * Returns:
702 * 0: The request has been sent 695 * %0 if the RPC message has been sent
703 * ENOTCONN: Caller needs to invoke connect logic then call again 696 * %-ENOTCONN if the caller should reconnect and call again
704 * ENOBUFS: Call again later to send the request 697 * %-ENOBUFS if the caller should call again later
705 * EIO: A permanent error occurred. The request was not sent, 698 * %-EIO if a permanent error occurred and the request was not
706 * and don't try it again 699 * sent. Do not try to send this message again.
707 *
708 * send_request invokes the meat of RPC RDMA. It must do the following:
709 *
710 * 1. Marshal the RPC request into an RPC RDMA request, which means
711 * putting a header in front of data, and creating IOVs for RDMA
712 * from those in the request.
713 * 2. In marshaling, detect opportunities for RDMA, and use them.
714 * 3. Post a recv message to set up asynch completion, then send
715 * the request (rpcrdma_ep_post).
716 * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP).
717 */ 700 */
718static int 701static int
719xprt_rdma_send_request(struct rpc_task *task) 702xprt_rdma_send_request(struct rpc_task *task)
@@ -724,14 +707,14 @@ xprt_rdma_send_request(struct rpc_task *task)
724 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 707 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
725 int rc = 0; 708 int rc = 0;
726 709
710#if defined(CONFIG_SUNRPC_BACKCHANNEL)
711 if (unlikely(!rqst->rq_buffer))
712 return xprt_rdma_bc_send_reply(rqst);
713#endif /* CONFIG_SUNRPC_BACKCHANNEL */
714
727 if (!xprt_connected(xprt)) 715 if (!xprt_connected(xprt))
728 goto drop_connection; 716 goto drop_connection;
729 717
730 /* On retransmit, remove any previously registered chunks */
731 if (unlikely(!list_empty(&req->rl_registered)))
732 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
733 &req->rl_registered);
734
735 rc = rpcrdma_marshal_req(r_xprt, rqst); 718 rc = rpcrdma_marshal_req(r_xprt, rqst);
736 if (rc < 0) 719 if (rc < 0)
737 goto failed_marshal; 720 goto failed_marshal;
@@ -744,7 +727,7 @@ xprt_rdma_send_request(struct rpc_task *task)
744 goto drop_connection; 727 goto drop_connection;
745 req->rl_connect_cookie = xprt->connect_cookie; 728 req->rl_connect_cookie = xprt->connect_cookie;
746 729
747 set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); 730 __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
748 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) 731 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
749 goto drop_connection; 732 goto drop_connection;
750 733
@@ -904,8 +887,7 @@ int xprt_rdma_init(void)
904 "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", 887 "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
905 xprt_rdma_slot_table_entries, 888 xprt_rdma_slot_table_entries,
906 xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); 889 xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
907 dprintk("\tPadding %d\n\tMemreg %d\n", 890 dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy);
908 xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
909 891
910#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 892#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
911 if (!sunrpc_table_header) 893 if (!sunrpc_table_header)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 8607c029c0dd..e6f84a6434a0 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -71,8 +71,8 @@
71/* 71/*
72 * internal functions 72 * internal functions
73 */ 73 */
74static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt); 74static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
75static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf); 75static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
76static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); 76static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
77 77
78struct workqueue_struct *rpcrdma_receive_wq __read_mostly; 78struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
@@ -108,7 +108,10 @@ static void
108rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) 108rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
109{ 109{
110 struct rpcrdma_ep *ep = context; 110 struct rpcrdma_ep *ep = context;
111 struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt,
112 rx_ep);
111 113
114 trace_xprtrdma_qp_error(r_xprt, event);
112 pr_err("rpcrdma: %s on device %s ep %p\n", 115 pr_err("rpcrdma: %s on device %s ep %p\n",
113 ib_event_msg(event->event), event->device->name, context); 116 ib_event_msg(event->event), event->device->name, context);
114 117
@@ -133,6 +136,7 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
133 container_of(cqe, struct rpcrdma_sendctx, sc_cqe); 136 container_of(cqe, struct rpcrdma_sendctx, sc_cqe);
134 137
135 /* WARNING: Only wr_cqe and status are reliable at this point */ 138 /* WARNING: Only wr_cqe and status are reliable at this point */
139 trace_xprtrdma_wc_send(sc, wc);
136 if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) 140 if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
137 pr_err("rpcrdma: Send: %s (%u/0x%x)\n", 141 pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
138 ib_wc_status_msg(wc->status), 142 ib_wc_status_msg(wc->status),
@@ -155,13 +159,11 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
155 rr_cqe); 159 rr_cqe);
156 160
157 /* WARNING: Only wr_id and status are reliable at this point */ 161 /* WARNING: Only wr_id and status are reliable at this point */
162 trace_xprtrdma_wc_receive(rep, wc);
158 if (wc->status != IB_WC_SUCCESS) 163 if (wc->status != IB_WC_SUCCESS)
159 goto out_fail; 164 goto out_fail;
160 165
161 /* status == SUCCESS means all fields in wc are trustworthy */ 166 /* status == SUCCESS means all fields in wc are trustworthy */
162 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
163 __func__, rep, wc->byte_len);
164
165 rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); 167 rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len);
166 rep->rr_wc_flags = wc->wc_flags; 168 rep->rr_wc_flags = wc->wc_flags;
167 rep->rr_inv_rkey = wc->ex.invalidate_rkey; 169 rep->rr_inv_rkey = wc->ex.invalidate_rkey;
@@ -192,7 +194,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
192 unsigned int rsize, wsize; 194 unsigned int rsize, wsize;
193 195
194 /* Default settings for RPC-over-RDMA Version One */ 196 /* Default settings for RPC-over-RDMA Version One */
195 r_xprt->rx_ia.ri_reminv_expected = false;
196 r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize; 197 r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
197 rsize = RPCRDMA_V1_DEF_INLINE_SIZE; 198 rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
198 wsize = RPCRDMA_V1_DEF_INLINE_SIZE; 199 wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
@@ -200,7 +201,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
200 if (pmsg && 201 if (pmsg &&
201 pmsg->cp_magic == rpcrdma_cmp_magic && 202 pmsg->cp_magic == rpcrdma_cmp_magic &&
202 pmsg->cp_version == RPCRDMA_CMP_VERSION) { 203 pmsg->cp_version == RPCRDMA_CMP_VERSION) {
203 r_xprt->rx_ia.ri_reminv_expected = true;
204 r_xprt->rx_ia.ri_implicit_roundup = true; 204 r_xprt->rx_ia.ri_implicit_roundup = true;
205 rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); 205 rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
206 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); 206 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
@@ -221,11 +221,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
221 struct rpcrdma_xprt *xprt = id->context; 221 struct rpcrdma_xprt *xprt = id->context;
222 struct rpcrdma_ia *ia = &xprt->rx_ia; 222 struct rpcrdma_ia *ia = &xprt->rx_ia;
223 struct rpcrdma_ep *ep = &xprt->rx_ep; 223 struct rpcrdma_ep *ep = &xprt->rx_ep;
224#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
225 struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
226#endif
227 int connstate = 0; 224 int connstate = 0;
228 225
226 trace_xprtrdma_conn_upcall(xprt, event);
229 switch (event->event) { 227 switch (event->event) {
230 case RDMA_CM_EVENT_ADDR_RESOLVED: 228 case RDMA_CM_EVENT_ADDR_RESOLVED:
231 case RDMA_CM_EVENT_ROUTE_RESOLVED: 229 case RDMA_CM_EVENT_ROUTE_RESOLVED:
@@ -234,21 +232,17 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
234 break; 232 break;
235 case RDMA_CM_EVENT_ADDR_ERROR: 233 case RDMA_CM_EVENT_ADDR_ERROR:
236 ia->ri_async_rc = -EHOSTUNREACH; 234 ia->ri_async_rc = -EHOSTUNREACH;
237 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
238 __func__, ep);
239 complete(&ia->ri_done); 235 complete(&ia->ri_done);
240 break; 236 break;
241 case RDMA_CM_EVENT_ROUTE_ERROR: 237 case RDMA_CM_EVENT_ROUTE_ERROR:
242 ia->ri_async_rc = -ENETUNREACH; 238 ia->ri_async_rc = -ENETUNREACH;
243 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
244 __func__, ep);
245 complete(&ia->ri_done); 239 complete(&ia->ri_done);
246 break; 240 break;
247 case RDMA_CM_EVENT_DEVICE_REMOVAL: 241 case RDMA_CM_EVENT_DEVICE_REMOVAL:
248#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 242#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
249 pr_info("rpcrdma: removing device %s for %pIS:%u\n", 243 pr_info("rpcrdma: removing device %s for %s:%s\n",
250 ia->ri_device->name, 244 ia->ri_device->name,
251 sap, rpc_get_port(sap)); 245 rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt));
252#endif 246#endif
253 set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); 247 set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
254 ep->rep_connected = -ENODEV; 248 ep->rep_connected = -ENODEV;
@@ -271,8 +265,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
271 connstate = -ENETDOWN; 265 connstate = -ENETDOWN;
272 goto connected; 266 goto connected;
273 case RDMA_CM_EVENT_REJECTED: 267 case RDMA_CM_EVENT_REJECTED:
274 dprintk("rpcrdma: connection to %pIS:%u rejected: %s\n", 268 dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
275 sap, rpc_get_port(sap), 269 rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
276 rdma_reject_msg(id, event->status)); 270 rdma_reject_msg(id, event->status));
277 connstate = -ECONNREFUSED; 271 connstate = -ECONNREFUSED;
278 if (event->status == IB_CM_REJ_STALE_CONN) 272 if (event->status == IB_CM_REJ_STALE_CONN)
@@ -287,8 +281,9 @@ connected:
287 wake_up_all(&ep->rep_connect_wait); 281 wake_up_all(&ep->rep_connect_wait);
288 /*FALLTHROUGH*/ 282 /*FALLTHROUGH*/
289 default: 283 default:
290 dprintk("RPC: %s: %pIS:%u on %s/%s (ep 0x%p): %s\n", 284 dprintk("RPC: %s: %s:%s on %s/%s (ep 0x%p): %s\n",
291 __func__, sap, rpc_get_port(sap), 285 __func__,
286 rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
292 ia->ri_device->name, ia->ri_ops->ro_displayname, 287 ia->ri_device->name, ia->ri_ops->ro_displayname,
293 ep, rdma_event_msg(event->event)); 288 ep, rdma_event_msg(event->event));
294 break; 289 break;
@@ -298,13 +293,14 @@ connected:
298} 293}
299 294
300static struct rdma_cm_id * 295static struct rdma_cm_id *
301rpcrdma_create_id(struct rpcrdma_xprt *xprt, 296rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
302 struct rpcrdma_ia *ia, struct sockaddr *addr)
303{ 297{
304 unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; 298 unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1;
305 struct rdma_cm_id *id; 299 struct rdma_cm_id *id;
306 int rc; 300 int rc;
307 301
302 trace_xprtrdma_conn_start(xprt);
303
308 init_completion(&ia->ri_done); 304 init_completion(&ia->ri_done);
309 init_completion(&ia->ri_remove_done); 305 init_completion(&ia->ri_remove_done);
310 306
@@ -318,7 +314,9 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
318 } 314 }
319 315
320 ia->ri_async_rc = -ETIMEDOUT; 316 ia->ri_async_rc = -ETIMEDOUT;
321 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); 317 rc = rdma_resolve_addr(id, NULL,
318 (struct sockaddr *)&xprt->rx_xprt.addr,
319 RDMA_RESOLVE_TIMEOUT);
322 if (rc) { 320 if (rc) {
323 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", 321 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
324 __func__, rc); 322 __func__, rc);
@@ -326,8 +324,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
326 } 324 }
327 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); 325 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
328 if (rc < 0) { 326 if (rc < 0) {
329 dprintk("RPC: %s: wait() exited: %i\n", 327 trace_xprtrdma_conn_tout(xprt);
330 __func__, rc);
331 goto out; 328 goto out;
332 } 329 }
333 330
@@ -344,8 +341,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
344 } 341 }
345 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); 342 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
346 if (rc < 0) { 343 if (rc < 0) {
347 dprintk("RPC: %s: wait() exited: %i\n", 344 trace_xprtrdma_conn_tout(xprt);
348 __func__, rc);
349 goto out; 345 goto out;
350 } 346 }
351 rc = ia->ri_async_rc; 347 rc = ia->ri_async_rc;
@@ -365,19 +361,18 @@ out:
365 361
366/** 362/**
367 * rpcrdma_ia_open - Open and initialize an Interface Adapter. 363 * rpcrdma_ia_open - Open and initialize an Interface Adapter.
368 * @xprt: controlling transport 364 * @xprt: transport with IA to (re)initialize
369 * @addr: IP address of remote peer
370 * 365 *
371 * Returns 0 on success, negative errno if an appropriate 366 * Returns 0 on success, negative errno if an appropriate
372 * Interface Adapter could not be found and opened. 367 * Interface Adapter could not be found and opened.
373 */ 368 */
374int 369int
375rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr) 370rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
376{ 371{
377 struct rpcrdma_ia *ia = &xprt->rx_ia; 372 struct rpcrdma_ia *ia = &xprt->rx_ia;
378 int rc; 373 int rc;
379 374
380 ia->ri_id = rpcrdma_create_id(xprt, ia, addr); 375 ia->ri_id = rpcrdma_create_id(xprt, ia);
381 if (IS_ERR(ia->ri_id)) { 376 if (IS_ERR(ia->ri_id)) {
382 rc = PTR_ERR(ia->ri_id); 377 rc = PTR_ERR(ia->ri_id);
383 goto out_err; 378 goto out_err;
@@ -392,7 +387,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
392 } 387 }
393 388
394 switch (xprt_rdma_memreg_strategy) { 389 switch (xprt_rdma_memreg_strategy) {
395 case RPCRDMA_FRMR: 390 case RPCRDMA_FRWR:
396 if (frwr_is_supported(ia)) { 391 if (frwr_is_supported(ia)) {
397 ia->ri_ops = &rpcrdma_frwr_memreg_ops; 392 ia->ri_ops = &rpcrdma_frwr_memreg_ops;
398 break; 393 break;
@@ -462,10 +457,12 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
462 rpcrdma_dma_unmap_regbuf(req->rl_sendbuf); 457 rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
463 rpcrdma_dma_unmap_regbuf(req->rl_recvbuf); 458 rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
464 } 459 }
465 rpcrdma_destroy_mrs(buf); 460 rpcrdma_mrs_destroy(buf);
466 461
467 /* Allow waiters to continue */ 462 /* Allow waiters to continue */
468 complete(&ia->ri_remove_done); 463 complete(&ia->ri_remove_done);
464
465 trace_xprtrdma_remove(r_xprt);
469} 466}
470 467
471/** 468/**
@@ -476,7 +473,6 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
476void 473void
477rpcrdma_ia_close(struct rpcrdma_ia *ia) 474rpcrdma_ia_close(struct rpcrdma_ia *ia)
478{ 475{
479 dprintk("RPC: %s: entering\n", __func__);
480 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { 476 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
481 if (ia->ri_id->qp) 477 if (ia->ri_id->qp)
482 rdma_destroy_qp(ia->ri_id); 478 rdma_destroy_qp(ia->ri_id);
@@ -509,7 +505,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
509 pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); 505 pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
510 return -ENOMEM; 506 return -ENOMEM;
511 } 507 }
512 ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES; 508 ia->ri_max_send_sges = max_sge;
513 509
514 if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { 510 if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
515 dprintk("RPC: %s: insufficient wqe's available\n", 511 dprintk("RPC: %s: insufficient wqe's available\n",
@@ -630,9 +626,6 @@ out1:
630void 626void
631rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) 627rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
632{ 628{
633 dprintk("RPC: %s: entering, connected is %d\n",
634 __func__, ep->rep_connected);
635
636 cancel_delayed_work_sync(&ep->rep_connect_worker); 629 cancel_delayed_work_sync(&ep->rep_connect_worker);
637 630
638 if (ia->ri_id->qp) { 631 if (ia->ri_id->qp) {
@@ -653,13 +646,12 @@ static int
653rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, 646rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
654 struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) 647 struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
655{ 648{
656 struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
657 int rc, err; 649 int rc, err;
658 650
659 pr_info("%s: r_xprt = %p\n", __func__, r_xprt); 651 trace_xprtrdma_reinsert(r_xprt);
660 652
661 rc = -EHOSTUNREACH; 653 rc = -EHOSTUNREACH;
662 if (rpcrdma_ia_open(r_xprt, sap)) 654 if (rpcrdma_ia_open(r_xprt))
663 goto out1; 655 goto out1;
664 656
665 rc = -ENOMEM; 657 rc = -ENOMEM;
@@ -676,7 +668,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
676 goto out3; 668 goto out3;
677 } 669 }
678 670
679 rpcrdma_create_mrs(r_xprt); 671 rpcrdma_mrs_create(r_xprt);
680 return 0; 672 return 0;
681 673
682out3: 674out3:
@@ -691,16 +683,15 @@ static int
691rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, 683rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
692 struct rpcrdma_ia *ia) 684 struct rpcrdma_ia *ia)
693{ 685{
694 struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
695 struct rdma_cm_id *id, *old; 686 struct rdma_cm_id *id, *old;
696 int err, rc; 687 int err, rc;
697 688
698 dprintk("RPC: %s: reconnecting...\n", __func__); 689 trace_xprtrdma_reconnect(r_xprt);
699 690
700 rpcrdma_ep_disconnect(ep, ia); 691 rpcrdma_ep_disconnect(ep, ia);
701 692
702 rc = -EHOSTUNREACH; 693 rc = -EHOSTUNREACH;
703 id = rpcrdma_create_id(r_xprt, ia, sap); 694 id = rpcrdma_create_id(r_xprt, ia);
704 if (IS_ERR(id)) 695 if (IS_ERR(id))
705 goto out; 696 goto out;
706 697
@@ -817,16 +808,14 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
817 int rc; 808 int rc;
818 809
819 rc = rdma_disconnect(ia->ri_id); 810 rc = rdma_disconnect(ia->ri_id);
820 if (!rc) { 811 if (!rc)
821 /* returns without wait if not connected */ 812 /* returns without wait if not connected */
822 wait_event_interruptible(ep->rep_connect_wait, 813 wait_event_interruptible(ep->rep_connect_wait,
823 ep->rep_connected != 1); 814 ep->rep_connected != 1);
824 dprintk("RPC: %s: after wait, %sconnected\n", __func__, 815 else
825 (ep->rep_connected == 1) ? "still " : "dis");
826 } else {
827 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
828 ep->rep_connected = rc; 816 ep->rep_connected = rc;
829 } 817 trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt,
818 rx_ep), rc);
830 819
831 ib_drain_qp(ia->ri_id->qp); 820 ib_drain_qp(ia->ri_id->qp);
832} 821}
@@ -998,15 +987,15 @@ rpcrdma_mr_recovery_worker(struct work_struct *work)
998{ 987{
999 struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, 988 struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
1000 rb_recovery_worker.work); 989 rb_recovery_worker.work);
1001 struct rpcrdma_mw *mw; 990 struct rpcrdma_mr *mr;
1002 991
1003 spin_lock(&buf->rb_recovery_lock); 992 spin_lock(&buf->rb_recovery_lock);
1004 while (!list_empty(&buf->rb_stale_mrs)) { 993 while (!list_empty(&buf->rb_stale_mrs)) {
1005 mw = rpcrdma_pop_mw(&buf->rb_stale_mrs); 994 mr = rpcrdma_mr_pop(&buf->rb_stale_mrs);
1006 spin_unlock(&buf->rb_recovery_lock); 995 spin_unlock(&buf->rb_recovery_lock);
1007 996
1008 dprintk("RPC: %s: recovering MR %p\n", __func__, mw); 997 trace_xprtrdma_recover_mr(mr);
1009 mw->mw_xprt->rx_ia.ri_ops->ro_recover_mr(mw); 998 mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr);
1010 999
1011 spin_lock(&buf->rb_recovery_lock); 1000 spin_lock(&buf->rb_recovery_lock);
1012 } 1001 }
@@ -1014,20 +1003,20 @@ rpcrdma_mr_recovery_worker(struct work_struct *work)
1014} 1003}
1015 1004
1016void 1005void
1017rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw) 1006rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr)
1018{ 1007{
1019 struct rpcrdma_xprt *r_xprt = mw->mw_xprt; 1008 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
1020 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1009 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1021 1010
1022 spin_lock(&buf->rb_recovery_lock); 1011 spin_lock(&buf->rb_recovery_lock);
1023 rpcrdma_push_mw(mw, &buf->rb_stale_mrs); 1012 rpcrdma_mr_push(mr, &buf->rb_stale_mrs);
1024 spin_unlock(&buf->rb_recovery_lock); 1013 spin_unlock(&buf->rb_recovery_lock);
1025 1014
1026 schedule_delayed_work(&buf->rb_recovery_worker, 0); 1015 schedule_delayed_work(&buf->rb_recovery_worker, 0);
1027} 1016}
1028 1017
1029static void 1018static void
1030rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt) 1019rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
1031{ 1020{
1032 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1021 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1033 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 1022 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
@@ -1036,32 +1025,32 @@ rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt)
1036 LIST_HEAD(all); 1025 LIST_HEAD(all);
1037 1026
1038 for (count = 0; count < 32; count++) { 1027 for (count = 0; count < 32; count++) {
1039 struct rpcrdma_mw *mw; 1028 struct rpcrdma_mr *mr;
1040 int rc; 1029 int rc;
1041 1030
1042 mw = kzalloc(sizeof(*mw), GFP_KERNEL); 1031 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1043 if (!mw) 1032 if (!mr)
1044 break; 1033 break;
1045 1034
1046 rc = ia->ri_ops->ro_init_mr(ia, mw); 1035 rc = ia->ri_ops->ro_init_mr(ia, mr);
1047 if (rc) { 1036 if (rc) {
1048 kfree(mw); 1037 kfree(mr);
1049 break; 1038 break;
1050 } 1039 }
1051 1040
1052 mw->mw_xprt = r_xprt; 1041 mr->mr_xprt = r_xprt;
1053 1042
1054 list_add(&mw->mw_list, &free); 1043 list_add(&mr->mr_list, &free);
1055 list_add(&mw->mw_all, &all); 1044 list_add(&mr->mr_all, &all);
1056 } 1045 }
1057 1046
1058 spin_lock(&buf->rb_mwlock); 1047 spin_lock(&buf->rb_mrlock);
1059 list_splice(&free, &buf->rb_mws); 1048 list_splice(&free, &buf->rb_mrs);
1060 list_splice(&all, &buf->rb_all); 1049 list_splice(&all, &buf->rb_all);
1061 r_xprt->rx_stats.mrs_allocated += count; 1050 r_xprt->rx_stats.mrs_allocated += count;
1062 spin_unlock(&buf->rb_mwlock); 1051 spin_unlock(&buf->rb_mrlock);
1063 1052
1064 dprintk("RPC: %s: created %u MRs\n", __func__, count); 1053 trace_xprtrdma_createmrs(r_xprt, count);
1065} 1054}
1066 1055
1067static void 1056static void
@@ -1072,7 +1061,7 @@ rpcrdma_mr_refresh_worker(struct work_struct *work)
1072 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, 1061 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
1073 rx_buf); 1062 rx_buf);
1074 1063
1075 rpcrdma_create_mrs(r_xprt); 1064 rpcrdma_mrs_create(r_xprt);
1076} 1065}
1077 1066
1078struct rpcrdma_req * 1067struct rpcrdma_req *
@@ -1093,10 +1082,17 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
1093 return req; 1082 return req;
1094} 1083}
1095 1084
1096struct rpcrdma_rep * 1085/**
1086 * rpcrdma_create_rep - Allocate an rpcrdma_rep object
1087 * @r_xprt: controlling transport
1088 *
1089 * Returns 0 on success or a negative errno on failure.
1090 */
1091int
1097rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) 1092rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
1098{ 1093{
1099 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; 1094 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
1095 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1100 struct rpcrdma_rep *rep; 1096 struct rpcrdma_rep *rep;
1101 int rc; 1097 int rc;
1102 1098
@@ -1121,12 +1117,18 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
1121 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; 1117 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
1122 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; 1118 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
1123 rep->rr_recv_wr.num_sge = 1; 1119 rep->rr_recv_wr.num_sge = 1;
1124 return rep; 1120
1121 spin_lock(&buf->rb_lock);
1122 list_add(&rep->rr_list, &buf->rb_recv_bufs);
1123 spin_unlock(&buf->rb_lock);
1124 return 0;
1125 1125
1126out_free: 1126out_free:
1127 kfree(rep); 1127 kfree(rep);
1128out: 1128out:
1129 return ERR_PTR(rc); 1129 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1130 __func__, rc);
1131 return rc;
1130} 1132}
1131 1133
1132int 1134int
@@ -1137,10 +1139,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1137 1139
1138 buf->rb_max_requests = r_xprt->rx_data.max_requests; 1140 buf->rb_max_requests = r_xprt->rx_data.max_requests;
1139 buf->rb_bc_srv_max_requests = 0; 1141 buf->rb_bc_srv_max_requests = 0;
1140 spin_lock_init(&buf->rb_mwlock); 1142 spin_lock_init(&buf->rb_mrlock);
1141 spin_lock_init(&buf->rb_lock); 1143 spin_lock_init(&buf->rb_lock);
1142 spin_lock_init(&buf->rb_recovery_lock); 1144 spin_lock_init(&buf->rb_recovery_lock);
1143 INIT_LIST_HEAD(&buf->rb_mws); 1145 INIT_LIST_HEAD(&buf->rb_mrs);
1144 INIT_LIST_HEAD(&buf->rb_all); 1146 INIT_LIST_HEAD(&buf->rb_all);
1145 INIT_LIST_HEAD(&buf->rb_stale_mrs); 1147 INIT_LIST_HEAD(&buf->rb_stale_mrs);
1146 INIT_DELAYED_WORK(&buf->rb_refresh_worker, 1148 INIT_DELAYED_WORK(&buf->rb_refresh_worker,
@@ -1148,7 +1150,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1148 INIT_DELAYED_WORK(&buf->rb_recovery_worker, 1150 INIT_DELAYED_WORK(&buf->rb_recovery_worker,
1149 rpcrdma_mr_recovery_worker); 1151 rpcrdma_mr_recovery_worker);
1150 1152
1151 rpcrdma_create_mrs(r_xprt); 1153 rpcrdma_mrs_create(r_xprt);
1152 1154
1153 INIT_LIST_HEAD(&buf->rb_send_bufs); 1155 INIT_LIST_HEAD(&buf->rb_send_bufs);
1154 INIT_LIST_HEAD(&buf->rb_allreqs); 1156 INIT_LIST_HEAD(&buf->rb_allreqs);
@@ -1167,17 +1169,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1167 } 1169 }
1168 1170
1169 INIT_LIST_HEAD(&buf->rb_recv_bufs); 1171 INIT_LIST_HEAD(&buf->rb_recv_bufs);
1170 for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) { 1172 for (i = 0; i <= buf->rb_max_requests; i++) {
1171 struct rpcrdma_rep *rep; 1173 rc = rpcrdma_create_rep(r_xprt);
1172 1174 if (rc)
1173 rep = rpcrdma_create_rep(r_xprt);
1174 if (IS_ERR(rep)) {
1175 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1176 __func__, i);
1177 rc = PTR_ERR(rep);
1178 goto out; 1175 goto out;
1179 }
1180 list_add(&rep->rr_list, &buf->rb_recv_bufs);
1181 } 1176 }
1182 1177
1183 rc = rpcrdma_sendctxs_create(r_xprt); 1178 rc = rpcrdma_sendctxs_create(r_xprt);
@@ -1229,26 +1224,26 @@ rpcrdma_destroy_req(struct rpcrdma_req *req)
1229} 1224}
1230 1225
1231static void 1226static void
1232rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf) 1227rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
1233{ 1228{
1234 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, 1229 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
1235 rx_buf); 1230 rx_buf);
1236 struct rpcrdma_ia *ia = rdmab_to_ia(buf); 1231 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1237 struct rpcrdma_mw *mw; 1232 struct rpcrdma_mr *mr;
1238 unsigned int count; 1233 unsigned int count;
1239 1234
1240 count = 0; 1235 count = 0;
1241 spin_lock(&buf->rb_mwlock); 1236 spin_lock(&buf->rb_mrlock);
1242 while (!list_empty(&buf->rb_all)) { 1237 while (!list_empty(&buf->rb_all)) {
1243 mw = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); 1238 mr = list_entry(buf->rb_all.next, struct rpcrdma_mr, mr_all);
1244 list_del(&mw->mw_all); 1239 list_del(&mr->mr_all);
1245 1240
1246 spin_unlock(&buf->rb_mwlock); 1241 spin_unlock(&buf->rb_mrlock);
1247 ia->ri_ops->ro_release_mr(mw); 1242 ia->ri_ops->ro_release_mr(mr);
1248 count++; 1243 count++;
1249 spin_lock(&buf->rb_mwlock); 1244 spin_lock(&buf->rb_mrlock);
1250 } 1245 }
1251 spin_unlock(&buf->rb_mwlock); 1246 spin_unlock(&buf->rb_mrlock);
1252 r_xprt->rx_stats.mrs_allocated = 0; 1247 r_xprt->rx_stats.mrs_allocated = 0;
1253 1248
1254 dprintk("RPC: %s: released %u MRs\n", __func__, count); 1249 dprintk("RPC: %s: released %u MRs\n", __func__, count);
@@ -1285,27 +1280,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1285 spin_unlock(&buf->rb_reqslock); 1280 spin_unlock(&buf->rb_reqslock);
1286 buf->rb_recv_count = 0; 1281 buf->rb_recv_count = 0;
1287 1282
1288 rpcrdma_destroy_mrs(buf); 1283 rpcrdma_mrs_destroy(buf);
1289} 1284}
1290 1285
1291struct rpcrdma_mw * 1286/**
1292rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) 1287 * rpcrdma_mr_get - Allocate an rpcrdma_mr object
1288 * @r_xprt: controlling transport
1289 *
1290 * Returns an initialized rpcrdma_mr or NULL if no free
1291 * rpcrdma_mr objects are available.
1292 */
1293struct rpcrdma_mr *
1294rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
1293{ 1295{
1294 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1296 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1295 struct rpcrdma_mw *mw = NULL; 1297 struct rpcrdma_mr *mr = NULL;
1296 1298
1297 spin_lock(&buf->rb_mwlock); 1299 spin_lock(&buf->rb_mrlock);
1298 if (!list_empty(&buf->rb_mws)) 1300 if (!list_empty(&buf->rb_mrs))
1299 mw = rpcrdma_pop_mw(&buf->rb_mws); 1301 mr = rpcrdma_mr_pop(&buf->rb_mrs);
1300 spin_unlock(&buf->rb_mwlock); 1302 spin_unlock(&buf->rb_mrlock);
1301 1303
1302 if (!mw) 1304 if (!mr)
1303 goto out_nomws; 1305 goto out_nomrs;
1304 mw->mw_flags = 0; 1306 return mr;
1305 return mw;
1306 1307
1307out_nomws: 1308out_nomrs:
1308 dprintk("RPC: %s: no MWs available\n", __func__); 1309 trace_xprtrdma_nomrs(r_xprt);
1309 if (r_xprt->rx_ep.rep_connected != -ENODEV) 1310 if (r_xprt->rx_ep.rep_connected != -ENODEV)
1310 schedule_delayed_work(&buf->rb_refresh_worker, 0); 1311 schedule_delayed_work(&buf->rb_refresh_worker, 0);
1311 1312
@@ -1315,14 +1316,39 @@ out_nomws:
1315 return NULL; 1316 return NULL;
1316} 1317}
1317 1318
1319static void
1320__rpcrdma_mr_put(struct rpcrdma_buffer *buf, struct rpcrdma_mr *mr)
1321{
1322 spin_lock(&buf->rb_mrlock);
1323 rpcrdma_mr_push(mr, &buf->rb_mrs);
1324 spin_unlock(&buf->rb_mrlock);
1325}
1326
1327/**
1328 * rpcrdma_mr_put - Release an rpcrdma_mr object
1329 * @mr: object to release
1330 *
1331 */
1318void 1332void
1319rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) 1333rpcrdma_mr_put(struct rpcrdma_mr *mr)
1320{ 1334{
1321 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1335 __rpcrdma_mr_put(&mr->mr_xprt->rx_buf, mr);
1336}
1322 1337
1323 spin_lock(&buf->rb_mwlock); 1338/**
1324 rpcrdma_push_mw(mw, &buf->rb_mws); 1339 * rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it
1325 spin_unlock(&buf->rb_mwlock); 1340 * @mr: object to release
1341 *
1342 */
1343void
1344rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
1345{
1346 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
1347
1348 trace_xprtrdma_dma_unmap(mr);
1349 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
1350 mr->mr_sg, mr->mr_nents, mr->mr_dir);
1351 __rpcrdma_mr_put(&r_xprt->rx_buf, mr);
1326} 1352}
1327 1353
1328static struct rpcrdma_rep * 1354static struct rpcrdma_rep *
@@ -1359,11 +1385,11 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1359 req = rpcrdma_buffer_get_req_locked(buffers); 1385 req = rpcrdma_buffer_get_req_locked(buffers);
1360 req->rl_reply = rpcrdma_buffer_get_rep(buffers); 1386 req->rl_reply = rpcrdma_buffer_get_rep(buffers);
1361 spin_unlock(&buffers->rb_lock); 1387 spin_unlock(&buffers->rb_lock);
1388
1362 return req; 1389 return req;
1363 1390
1364out_reqbuf: 1391out_reqbuf:
1365 spin_unlock(&buffers->rb_lock); 1392 spin_unlock(&buffers->rb_lock);
1366 pr_warn("RPC: %s: out of request buffers\n", __func__);
1367 return NULL; 1393 return NULL;
1368} 1394}
1369 1395
@@ -1476,6 +1502,9 @@ __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
1476static void 1502static void
1477rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb) 1503rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb)
1478{ 1504{
1505 if (!rb)
1506 return;
1507
1479 if (!rpcrdma_regbuf_is_mapped(rb)) 1508 if (!rpcrdma_regbuf_is_mapped(rb))
1480 return; 1509 return;
1481 1510
@@ -1491,9 +1520,6 @@ rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb)
1491void 1520void
1492rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb) 1521rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb)
1493{ 1522{
1494 if (!rb)
1495 return;
1496
1497 rpcrdma_dma_unmap_regbuf(rb); 1523 rpcrdma_dma_unmap_regbuf(rb);
1498 kfree(rb); 1524 kfree(rb);
1499} 1525}
@@ -1519,9 +1545,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
1519 req->rl_reply = NULL; 1545 req->rl_reply = NULL;
1520 } 1546 }
1521 1547
1522 dprintk("RPC: %s: posting %d s/g entries\n",
1523 __func__, send_wr->num_sge);
1524
1525 if (!ep->rep_send_count || 1548 if (!ep->rep_send_count ||
1526 test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { 1549 test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
1527 send_wr->send_flags |= IB_SEND_SIGNALED; 1550 send_wr->send_flags |= IB_SEND_SIGNALED;
@@ -1530,14 +1553,12 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
1530 send_wr->send_flags &= ~IB_SEND_SIGNALED; 1553 send_wr->send_flags &= ~IB_SEND_SIGNALED;
1531 --ep->rep_send_count; 1554 --ep->rep_send_count;
1532 } 1555 }
1556
1533 rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); 1557 rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
1558 trace_xprtrdma_post_send(req, rc);
1534 if (rc) 1559 if (rc)
1535 goto out_postsend_err; 1560 return -ENOTCONN;
1536 return 0; 1561 return 0;
1537
1538out_postsend_err:
1539 pr_err("rpcrdma: RDMA Send ib_post_send returned %i\n", rc);
1540 return -ENOTCONN;
1541} 1562}
1542 1563
1543int 1564int
@@ -1550,23 +1571,20 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1550 if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf)) 1571 if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf))
1551 goto out_map; 1572 goto out_map;
1552 rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail); 1573 rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail);
1574 trace_xprtrdma_post_recv(rep, rc);
1553 if (rc) 1575 if (rc)
1554 goto out_postrecv; 1576 return -ENOTCONN;
1555 return 0; 1577 return 0;
1556 1578
1557out_map: 1579out_map:
1558 pr_err("rpcrdma: failed to DMA map the Receive buffer\n"); 1580 pr_err("rpcrdma: failed to DMA map the Receive buffer\n");
1559 return -EIO; 1581 return -EIO;
1560
1561out_postrecv:
1562 pr_err("rpcrdma: ib_post_recv returned %i\n", rc);
1563 return -ENOTCONN;
1564} 1582}
1565 1583
1566/** 1584/**
1567 * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests 1585 * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests
1568 * @r_xprt: transport associated with these backchannel resources 1586 * @r_xprt: transport associated with these backchannel resources
1569 * @min_reqs: minimum number of incoming requests expected 1587 * @count: minimum number of incoming requests expected
1570 * 1588 *
1571 * Returns zero if all requested buffers were posted, or a negative errno. 1589 * Returns zero if all requested buffers were posted, or a negative errno.
1572 */ 1590 */
@@ -1594,7 +1612,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
1594 1612
1595out_reqbuf: 1613out_reqbuf:
1596 spin_unlock(&buffers->rb_lock); 1614 spin_unlock(&buffers->rb_lock);
1597 pr_warn("%s: no extra receive buffers\n", __func__); 1615 trace_xprtrdma_noreps(r_xprt);
1598 return -ENOMEM; 1616 return -ENOMEM;
1599 1617
1600out_rc: 1618out_rc:
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 1342f743f1c4..69883a960a3f 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -73,11 +73,10 @@ struct rpcrdma_ia {
73 struct completion ri_remove_done; 73 struct completion ri_remove_done;
74 int ri_async_rc; 74 int ri_async_rc;
75 unsigned int ri_max_segs; 75 unsigned int ri_max_segs;
76 unsigned int ri_max_frmr_depth; 76 unsigned int ri_max_frwr_depth;
77 unsigned int ri_max_inline_write; 77 unsigned int ri_max_inline_write;
78 unsigned int ri_max_inline_read; 78 unsigned int ri_max_inline_read;
79 unsigned int ri_max_send_sges; 79 unsigned int ri_max_send_sges;
80 bool ri_reminv_expected;
81 bool ri_implicit_roundup; 80 bool ri_implicit_roundup;
82 enum ib_mr_type ri_mrtype; 81 enum ib_mr_type ri_mrtype;
83 unsigned long ri_flags; 82 unsigned long ri_flags;
@@ -101,7 +100,6 @@ struct rpcrdma_ep {
101 wait_queue_head_t rep_connect_wait; 100 wait_queue_head_t rep_connect_wait;
102 struct rpcrdma_connect_private rep_cm_private; 101 struct rpcrdma_connect_private rep_cm_private;
103 struct rdma_conn_param rep_remote_cma; 102 struct rdma_conn_param rep_remote_cma;
104 struct sockaddr_storage rep_remote_addr;
105 struct delayed_work rep_connect_worker; 103 struct delayed_work rep_connect_worker;
106}; 104};
107 105
@@ -232,29 +230,29 @@ enum {
232}; 230};
233 231
234/* 232/*
235 * struct rpcrdma_mw - external memory region metadata 233 * struct rpcrdma_mr - external memory region metadata
236 * 234 *
237 * An external memory region is any buffer or page that is registered 235 * An external memory region is any buffer or page that is registered
238 * on the fly (ie, not pre-registered). 236 * on the fly (ie, not pre-registered).
239 * 237 *
240 * Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During 238 * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During
241 * call_allocate, rpcrdma_buffer_get() assigns one to each segment in 239 * call_allocate, rpcrdma_buffer_get() assigns one to each segment in
242 * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep 240 * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
243 * track of registration metadata while each RPC is pending. 241 * track of registration metadata while each RPC is pending.
244 * rpcrdma_deregister_external() uses this metadata to unmap and 242 * rpcrdma_deregister_external() uses this metadata to unmap and
245 * release these resources when an RPC is complete. 243 * release these resources when an RPC is complete.
246 */ 244 */
247enum rpcrdma_frmr_state { 245enum rpcrdma_frwr_state {
248 FRMR_IS_INVALID, /* ready to be used */ 246 FRWR_IS_INVALID, /* ready to be used */
249 FRMR_IS_VALID, /* in use */ 247 FRWR_IS_VALID, /* in use */
250 FRMR_FLUSHED_FR, /* flushed FASTREG WR */ 248 FRWR_FLUSHED_FR, /* flushed FASTREG WR */
251 FRMR_FLUSHED_LI, /* flushed LOCALINV WR */ 249 FRWR_FLUSHED_LI, /* flushed LOCALINV WR */
252}; 250};
253 251
254struct rpcrdma_frmr { 252struct rpcrdma_frwr {
255 struct ib_mr *fr_mr; 253 struct ib_mr *fr_mr;
256 struct ib_cqe fr_cqe; 254 struct ib_cqe fr_cqe;
257 enum rpcrdma_frmr_state fr_state; 255 enum rpcrdma_frwr_state fr_state;
258 struct completion fr_linv_done; 256 struct completion fr_linv_done;
259 union { 257 union {
260 struct ib_reg_wr fr_regwr; 258 struct ib_reg_wr fr_regwr;
@@ -267,26 +265,20 @@ struct rpcrdma_fmr {
267 u64 *fm_physaddrs; 265 u64 *fm_physaddrs;
268}; 266};
269 267
270struct rpcrdma_mw { 268struct rpcrdma_mr {
271 struct list_head mw_list; 269 struct list_head mr_list;
272 struct scatterlist *mw_sg; 270 struct scatterlist *mr_sg;
273 int mw_nents; 271 int mr_nents;
274 enum dma_data_direction mw_dir; 272 enum dma_data_direction mr_dir;
275 unsigned long mw_flags;
276 union { 273 union {
277 struct rpcrdma_fmr fmr; 274 struct rpcrdma_fmr fmr;
278 struct rpcrdma_frmr frmr; 275 struct rpcrdma_frwr frwr;
279 }; 276 };
280 struct rpcrdma_xprt *mw_xprt; 277 struct rpcrdma_xprt *mr_xprt;
281 u32 mw_handle; 278 u32 mr_handle;
282 u32 mw_length; 279 u32 mr_length;
283 u64 mw_offset; 280 u64 mr_offset;
284 struct list_head mw_all; 281 struct list_head mr_all;
285};
286
287/* mw_flags */
288enum {
289 RPCRDMA_MW_F_RI = 1,
290}; 282};
291 283
292/* 284/*
@@ -362,8 +354,7 @@ struct rpcrdma_req {
362 354
363/* rl_flags */ 355/* rl_flags */
364enum { 356enum {
365 RPCRDMA_REQ_F_BACKCHANNEL = 0, 357 RPCRDMA_REQ_F_PENDING = 0,
366 RPCRDMA_REQ_F_PENDING,
367 RPCRDMA_REQ_F_TX_RESOURCES, 358 RPCRDMA_REQ_F_TX_RESOURCES,
368}; 359};
369 360
@@ -374,25 +365,25 @@ rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
374} 365}
375 366
376static inline struct rpcrdma_req * 367static inline struct rpcrdma_req *
377rpcr_to_rdmar(struct rpc_rqst *rqst) 368rpcr_to_rdmar(const struct rpc_rqst *rqst)
378{ 369{
379 return rqst->rq_xprtdata; 370 return rqst->rq_xprtdata;
380} 371}
381 372
382static inline void 373static inline void
383rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list) 374rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list)
384{ 375{
385 list_add_tail(&mw->mw_list, list); 376 list_add_tail(&mr->mr_list, list);
386} 377}
387 378
388static inline struct rpcrdma_mw * 379static inline struct rpcrdma_mr *
389rpcrdma_pop_mw(struct list_head *list) 380rpcrdma_mr_pop(struct list_head *list)
390{ 381{
391 struct rpcrdma_mw *mw; 382 struct rpcrdma_mr *mr;
392 383
393 mw = list_first_entry(list, struct rpcrdma_mw, mw_list); 384 mr = list_first_entry(list, struct rpcrdma_mr, mr_list);
394 list_del(&mw->mw_list); 385 list_del(&mr->mr_list);
395 return mw; 386 return mr;
396} 387}
397 388
398/* 389/*
@@ -402,8 +393,8 @@ rpcrdma_pop_mw(struct list_head *list)
402 * One of these is associated with a transport instance 393 * One of these is associated with a transport instance
403 */ 394 */
404struct rpcrdma_buffer { 395struct rpcrdma_buffer {
405 spinlock_t rb_mwlock; /* protect rb_mws list */ 396 spinlock_t rb_mrlock; /* protect rb_mrs list */
406 struct list_head rb_mws; 397 struct list_head rb_mrs;
407 struct list_head rb_all; 398 struct list_head rb_all;
408 399
409 unsigned long rb_sc_head; 400 unsigned long rb_sc_head;
@@ -438,13 +429,11 @@ struct rpcrdma_buffer {
438 * This data should be set with mount options 429 * This data should be set with mount options
439 */ 430 */
440struct rpcrdma_create_data_internal { 431struct rpcrdma_create_data_internal {
441 struct sockaddr_storage addr; /* RDMA server address */
442 unsigned int max_requests; /* max requests (slots) in flight */ 432 unsigned int max_requests; /* max requests (slots) in flight */
443 unsigned int rsize; /* mount rsize - max read hdr+data */ 433 unsigned int rsize; /* mount rsize - max read hdr+data */
444 unsigned int wsize; /* mount wsize - max write hdr+data */ 434 unsigned int wsize; /* mount wsize - max write hdr+data */
445 unsigned int inline_rsize; /* max non-rdma read data payload */ 435 unsigned int inline_rsize; /* max non-rdma read data payload */
446 unsigned int inline_wsize; /* max non-rdma write data payload */ 436 unsigned int inline_wsize; /* max non-rdma write data payload */
447 unsigned int padding; /* non-rdma write header padding */
448}; 437};
449 438
450/* 439/*
@@ -484,17 +473,19 @@ struct rpcrdma_memreg_ops {
484 struct rpcrdma_mr_seg * 473 struct rpcrdma_mr_seg *
485 (*ro_map)(struct rpcrdma_xprt *, 474 (*ro_map)(struct rpcrdma_xprt *,
486 struct rpcrdma_mr_seg *, int, bool, 475 struct rpcrdma_mr_seg *, int, bool,
487 struct rpcrdma_mw **); 476 struct rpcrdma_mr **);
477 void (*ro_reminv)(struct rpcrdma_rep *rep,
478 struct list_head *mrs);
488 void (*ro_unmap_sync)(struct rpcrdma_xprt *, 479 void (*ro_unmap_sync)(struct rpcrdma_xprt *,
489 struct list_head *); 480 struct list_head *);
490 void (*ro_recover_mr)(struct rpcrdma_mw *); 481 void (*ro_recover_mr)(struct rpcrdma_mr *mr);
491 int (*ro_open)(struct rpcrdma_ia *, 482 int (*ro_open)(struct rpcrdma_ia *,
492 struct rpcrdma_ep *, 483 struct rpcrdma_ep *,
493 struct rpcrdma_create_data_internal *); 484 struct rpcrdma_create_data_internal *);
494 size_t (*ro_maxpages)(struct rpcrdma_xprt *); 485 size_t (*ro_maxpages)(struct rpcrdma_xprt *);
495 int (*ro_init_mr)(struct rpcrdma_ia *, 486 int (*ro_init_mr)(struct rpcrdma_ia *,
496 struct rpcrdma_mw *); 487 struct rpcrdma_mr *);
497 void (*ro_release_mr)(struct rpcrdma_mw *); 488 void (*ro_release_mr)(struct rpcrdma_mr *mr);
498 const char *ro_displayname; 489 const char *ro_displayname;
499 const int ro_send_w_inv_ok; 490 const int ro_send_w_inv_ok;
500}; 491};
@@ -525,6 +516,18 @@ struct rpcrdma_xprt {
525#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) 516#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
526#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) 517#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
527 518
519static inline const char *
520rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt)
521{
522 return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR];
523}
524
525static inline const char *
526rpcrdma_portstr(const struct rpcrdma_xprt *r_xprt)
527{
528 return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_PORT];
529}
530
528/* Setting this to 0 ensures interoperability with early servers. 531/* Setting this to 0 ensures interoperability with early servers.
529 * Setting this to 1 enhances certain unaligned read/write performance. 532 * Setting this to 1 enhances certain unaligned read/write performance.
530 * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ 533 * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
@@ -538,7 +541,7 @@ extern unsigned int xprt_rdma_memreg_strategy;
538/* 541/*
539 * Interface Adapter calls - xprtrdma/verbs.c 542 * Interface Adapter calls - xprtrdma/verbs.c
540 */ 543 */
541int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr); 544int rpcrdma_ia_open(struct rpcrdma_xprt *xprt);
542void rpcrdma_ia_remove(struct rpcrdma_ia *ia); 545void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
543void rpcrdma_ia_close(struct rpcrdma_ia *); 546void rpcrdma_ia_close(struct rpcrdma_ia *);
544bool frwr_is_supported(struct rpcrdma_ia *); 547bool frwr_is_supported(struct rpcrdma_ia *);
@@ -564,22 +567,23 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *);
564 * Buffer calls - xprtrdma/verbs.c 567 * Buffer calls - xprtrdma/verbs.c
565 */ 568 */
566struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); 569struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
567struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
568void rpcrdma_destroy_req(struct rpcrdma_req *); 570void rpcrdma_destroy_req(struct rpcrdma_req *);
571int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt);
569int rpcrdma_buffer_create(struct rpcrdma_xprt *); 572int rpcrdma_buffer_create(struct rpcrdma_xprt *);
570void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); 573void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
571struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); 574struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
572void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); 575void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
573 576
574struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); 577struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
575void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); 578void rpcrdma_mr_put(struct rpcrdma_mr *mr);
579void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr);
580void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr);
581
576struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); 582struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
577void rpcrdma_buffer_put(struct rpcrdma_req *); 583void rpcrdma_buffer_put(struct rpcrdma_req *);
578void rpcrdma_recv_buffer_get(struct rpcrdma_req *); 584void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
579void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); 585void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
580 586
581void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *);
582
583struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, 587struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
584 gfp_t); 588 gfp_t);
585bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); 589bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *);
@@ -663,7 +667,7 @@ int xprt_rdma_bc_up(struct svc_serv *, struct net *);
663size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); 667size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
664int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); 668int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
665void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); 669void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
666int rpcrdma_bc_marshal_reply(struct rpc_rqst *); 670int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);
667void xprt_rdma_bc_free_rqst(struct rpc_rqst *); 671void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
668void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); 672void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
669#endif /* CONFIG_SUNRPC_BACKCHANNEL */ 673#endif /* CONFIG_SUNRPC_BACKCHANNEL */
@@ -671,3 +675,5 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
671extern struct xprt_class xprt_rdma_bc; 675extern struct xprt_class xprt_rdma_bc;
672 676
673#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ 677#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
678
679#include <trace/events/rpcrdma.h>
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 6d0cc3b8f932..a6b8c1f8f92a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -52,6 +52,8 @@
52 52
53#include "sunrpc.h" 53#include "sunrpc.h"
54 54
55#define RPC_TCP_READ_CHUNK_SZ (3*512*1024)
56
55static void xs_close(struct rpc_xprt *xprt); 57static void xs_close(struct rpc_xprt *xprt);
56static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, 58static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
57 struct socket *sock); 59 struct socket *sock);
@@ -805,13 +807,6 @@ static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
805 smp_mb__after_atomic(); 807 smp_mb__after_atomic();
806} 808}
807 809
808static void xs_sock_mark_closed(struct rpc_xprt *xprt)
809{
810 xs_sock_reset_connection_flags(xprt);
811 /* Mark transport as closed and wake up all pending tasks */
812 xprt_disconnect_done(xprt);
813}
814
815/** 810/**
816 * xs_error_report - callback to handle TCP socket state errors 811 * xs_error_report - callback to handle TCP socket state errors
817 * @sk: socket 812 * @sk: socket
@@ -831,9 +826,6 @@ static void xs_error_report(struct sock *sk)
831 err = -sk->sk_err; 826 err = -sk->sk_err;
832 if (err == 0) 827 if (err == 0)
833 goto out; 828 goto out;
834 /* Is this a reset event? */
835 if (sk->sk_state == TCP_CLOSE)
836 xs_sock_mark_closed(xprt);
837 dprintk("RPC: xs_error_report client %p, error=%d...\n", 829 dprintk("RPC: xs_error_report client %p, error=%d...\n",
838 xprt, -err); 830 xprt, -err);
839 trace_rpc_socket_error(xprt, sk->sk_socket, err); 831 trace_rpc_socket_error(xprt, sk->sk_socket, err);
@@ -1003,6 +995,7 @@ static void xs_local_data_receive(struct sock_xprt *transport)
1003 struct sock *sk; 995 struct sock *sk;
1004 int err; 996 int err;
1005 997
998restart:
1006 mutex_lock(&transport->recv_mutex); 999 mutex_lock(&transport->recv_mutex);
1007 sk = transport->inet; 1000 sk = transport->inet;
1008 if (sk == NULL) 1001 if (sk == NULL)
@@ -1016,6 +1009,11 @@ static void xs_local_data_receive(struct sock_xprt *transport)
1016 } 1009 }
1017 if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) 1010 if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
1018 break; 1011 break;
1012 if (need_resched()) {
1013 mutex_unlock(&transport->recv_mutex);
1014 cond_resched();
1015 goto restart;
1016 }
1019 } 1017 }
1020out: 1018out:
1021 mutex_unlock(&transport->recv_mutex); 1019 mutex_unlock(&transport->recv_mutex);
@@ -1070,18 +1068,18 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
1070 1068
1071 /* Suck it into the iovec, verify checksum if not done by hw. */ 1069 /* Suck it into the iovec, verify checksum if not done by hw. */
1072 if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) { 1070 if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
1073 __UDPX_INC_STATS(sk, UDP_MIB_INERRORS);
1074 spin_lock(&xprt->recv_lock); 1071 spin_lock(&xprt->recv_lock);
1072 __UDPX_INC_STATS(sk, UDP_MIB_INERRORS);
1075 goto out_unpin; 1073 goto out_unpin;
1076 } 1074 }
1077 1075
1078 __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
1079 1076
1080 spin_lock_bh(&xprt->transport_lock); 1077 spin_lock_bh(&xprt->transport_lock);
1081 xprt_adjust_cwnd(xprt, task, copied); 1078 xprt_adjust_cwnd(xprt, task, copied);
1082 spin_unlock_bh(&xprt->transport_lock); 1079 spin_unlock_bh(&xprt->transport_lock);
1083 spin_lock(&xprt->recv_lock); 1080 spin_lock(&xprt->recv_lock);
1084 xprt_complete_rqst(task, copied); 1081 xprt_complete_rqst(task, copied);
1082 __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
1085out_unpin: 1083out_unpin:
1086 xprt_unpin_rqst(rovr); 1084 xprt_unpin_rqst(rovr);
1087 out_unlock: 1085 out_unlock:
@@ -1094,6 +1092,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
1094 struct sock *sk; 1092 struct sock *sk;
1095 int err; 1093 int err;
1096 1094
1095restart:
1097 mutex_lock(&transport->recv_mutex); 1096 mutex_lock(&transport->recv_mutex);
1098 sk = transport->inet; 1097 sk = transport->inet;
1099 if (sk == NULL) 1098 if (sk == NULL)
@@ -1107,6 +1106,11 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
1107 } 1106 }
1108 if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) 1107 if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
1109 break; 1108 break;
1109 if (need_resched()) {
1110 mutex_unlock(&transport->recv_mutex);
1111 cond_resched();
1112 goto restart;
1113 }
1110 } 1114 }
1111out: 1115out:
1112 mutex_unlock(&transport->recv_mutex); 1116 mutex_unlock(&transport->recv_mutex);
@@ -1479,6 +1483,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
1479 .offset = offset, 1483 .offset = offset,
1480 .count = len, 1484 .count = len,
1481 }; 1485 };
1486 size_t ret;
1482 1487
1483 dprintk("RPC: xs_tcp_data_recv started\n"); 1488 dprintk("RPC: xs_tcp_data_recv started\n");
1484 do { 1489 do {
@@ -1507,9 +1512,14 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
1507 /* Skip over any trailing bytes on short reads */ 1512 /* Skip over any trailing bytes on short reads */
1508 xs_tcp_read_discard(transport, &desc); 1513 xs_tcp_read_discard(transport, &desc);
1509 } while (desc.count); 1514 } while (desc.count);
1515 ret = len - desc.count;
1516 if (ret < rd_desc->count)
1517 rd_desc->count -= ret;
1518 else
1519 rd_desc->count = 0;
1510 trace_xs_tcp_data_recv(transport); 1520 trace_xs_tcp_data_recv(transport);
1511 dprintk("RPC: xs_tcp_data_recv done\n"); 1521 dprintk("RPC: xs_tcp_data_recv done\n");
1512 return len - desc.count; 1522 return ret;
1513} 1523}
1514 1524
1515static void xs_tcp_data_receive(struct sock_xprt *transport) 1525static void xs_tcp_data_receive(struct sock_xprt *transport)
@@ -1517,30 +1527,34 @@ static void xs_tcp_data_receive(struct sock_xprt *transport)
1517 struct rpc_xprt *xprt = &transport->xprt; 1527 struct rpc_xprt *xprt = &transport->xprt;
1518 struct sock *sk; 1528 struct sock *sk;
1519 read_descriptor_t rd_desc = { 1529 read_descriptor_t rd_desc = {
1520 .count = 2*1024*1024,
1521 .arg.data = xprt, 1530 .arg.data = xprt,
1522 }; 1531 };
1523 unsigned long total = 0; 1532 unsigned long total = 0;
1524 int loop;
1525 int read = 0; 1533 int read = 0;
1526 1534
1535restart:
1527 mutex_lock(&transport->recv_mutex); 1536 mutex_lock(&transport->recv_mutex);
1528 sk = transport->inet; 1537 sk = transport->inet;
1529 if (sk == NULL) 1538 if (sk == NULL)
1530 goto out; 1539 goto out;
1531 1540
1532 /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ 1541 /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
1533 for (loop = 0; loop < 64; loop++) { 1542 for (;;) {
1543 rd_desc.count = RPC_TCP_READ_CHUNK_SZ;
1534 lock_sock(sk); 1544 lock_sock(sk);
1535 read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); 1545 read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
1536 if (read <= 0) { 1546 if (rd_desc.count != 0 || read < 0) {
1537 clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); 1547 clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
1538 release_sock(sk); 1548 release_sock(sk);
1539 break; 1549 break;
1540 } 1550 }
1541 release_sock(sk); 1551 release_sock(sk);
1542 total += read; 1552 total += read;
1543 rd_desc.count = 65536; 1553 if (need_resched()) {
1554 mutex_unlock(&transport->recv_mutex);
1555 cond_resched();
1556 goto restart;
1557 }
1544 } 1558 }
1545 if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) 1559 if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
1546 queue_work(xprtiod_workqueue, &transport->recv_worker); 1560 queue_work(xprtiod_workqueue, &transport->recv_worker);
@@ -1631,9 +1645,11 @@ static void xs_tcp_state_change(struct sock *sk)
1631 if (test_and_clear_bit(XPRT_SOCK_CONNECTING, 1645 if (test_and_clear_bit(XPRT_SOCK_CONNECTING,
1632 &transport->sock_state)) 1646 &transport->sock_state))
1633 xprt_clear_connecting(xprt); 1647 xprt_clear_connecting(xprt);
1648 clear_bit(XPRT_CLOSING, &xprt->state);
1634 if (sk->sk_err) 1649 if (sk->sk_err)
1635 xprt_wake_pending_tasks(xprt, -sk->sk_err); 1650 xprt_wake_pending_tasks(xprt, -sk->sk_err);
1636 xs_sock_mark_closed(xprt); 1651 /* Trigger the socket release */
1652 xs_tcp_force_close(xprt);
1637 } 1653 }
1638 out: 1654 out:
1639 read_unlock_bh(&sk->sk_callback_lock); 1655 read_unlock_bh(&sk->sk_callback_lock);
@@ -2241,14 +2257,19 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
2241{ 2257{
2242 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2258 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
2243 struct socket *sock = transport->sock; 2259 struct socket *sock = transport->sock;
2260 int skst = transport->inet ? transport->inet->sk_state : TCP_CLOSE;
2244 2261
2245 if (sock == NULL) 2262 if (sock == NULL)
2246 return; 2263 return;
2247 if (xprt_connected(xprt)) { 2264 switch (skst) {
2265 default:
2248 kernel_sock_shutdown(sock, SHUT_RDWR); 2266 kernel_sock_shutdown(sock, SHUT_RDWR);
2249 trace_rpc_socket_shutdown(xprt, sock); 2267 trace_rpc_socket_shutdown(xprt, sock);
2250 } else 2268 break;
2269 case TCP_CLOSE:
2270 case TCP_TIME_WAIT:
2251 xs_reset_transport(transport); 2271 xs_reset_transport(transport);
2272 }
2252} 2273}
2253 2274
2254static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, 2275static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 329325bd553e..37892b3909af 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * net/tipc/bcast.c: TIPC broadcast code 2 * net/tipc/bcast.c: TIPC broadcast code
3 * 3 *
4 * Copyright (c) 2004-2006, 2014-2016, Ericsson AB 4 * Copyright (c) 2004-2006, 2014-2017, Ericsson AB
5 * Copyright (c) 2004, Intel Corporation. 5 * Copyright (c) 2004, Intel Corporation.
6 * Copyright (c) 2005, 2010-2011, Wind River Systems 6 * Copyright (c) 2005, 2010-2011, Wind River Systems
7 * All rights reserved. 7 * All rights reserved.
@@ -42,8 +42,8 @@
42#include "link.h" 42#include "link.h"
43#include "name_table.h" 43#include "name_table.h"
44 44
45#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */ 45#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */
46#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */ 46#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */
47 47
48const char tipc_bclink_name[] = "broadcast-link"; 48const char tipc_bclink_name[] = "broadcast-link";
49 49
@@ -74,6 +74,10 @@ static struct tipc_bc_base *tipc_bc_base(struct net *net)
74 return tipc_net(net)->bcbase; 74 return tipc_net(net)->bcbase;
75} 75}
76 76
77/* tipc_bcast_get_mtu(): -get the MTU currently used by broadcast link
78 * Note: the MTU is decremented to give room for a tunnel header, in
79 * case the message needs to be sent as replicast
80 */
77int tipc_bcast_get_mtu(struct net *net) 81int tipc_bcast_get_mtu(struct net *net)
78{ 82{
79 return tipc_link_mtu(tipc_bc_sndlink(net)) - INT_H_SIZE; 83 return tipc_link_mtu(tipc_bc_sndlink(net)) - INT_H_SIZE;
@@ -515,7 +519,7 @@ int tipc_bcast_init(struct net *net)
515 spin_lock_init(&tipc_net(net)->bclock); 519 spin_lock_init(&tipc_net(net)->bclock);
516 520
517 if (!tipc_link_bc_create(net, 0, 0, 521 if (!tipc_link_bc_create(net, 0, 0,
518 U16_MAX, 522 FB_MTU,
519 BCLINK_WIN_DEFAULT, 523 BCLINK_WIN_DEFAULT,
520 0, 524 0,
521 &bb->inputq, 525 &bb->inputq,
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 964342689f2c..20b21af2ff14 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -49,7 +49,6 @@
49#include <linux/uaccess.h> 49#include <linux/uaccess.h>
50#include <linux/interrupt.h> 50#include <linux/interrupt.h>
51#include <linux/atomic.h> 51#include <linux/atomic.h>
52#include <asm/hardirq.h>
53#include <linux/netdevice.h> 52#include <linux/netdevice.h>
54#include <linux/in.h> 53#include <linux/in.h>
55#include <linux/list.h> 54#include <linux/list.h>
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 5f4ffae807ee..122162a31816 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -49,8 +49,6 @@
49#define ADV_ACTIVE (ADV_UNIT * 12) 49#define ADV_ACTIVE (ADV_UNIT * 12)
50 50
51enum mbr_state { 51enum mbr_state {
52 MBR_QUARANTINED,
53 MBR_DISCOVERED,
54 MBR_JOINING, 52 MBR_JOINING,
55 MBR_PUBLISHED, 53 MBR_PUBLISHED,
56 MBR_JOINED, 54 MBR_JOINED,
@@ -64,8 +62,7 @@ enum mbr_state {
64struct tipc_member { 62struct tipc_member {
65 struct rb_node tree_node; 63 struct rb_node tree_node;
66 struct list_head list; 64 struct list_head list;
67 struct list_head congested; 65 struct list_head small_win;
68 struct sk_buff *event_msg;
69 struct sk_buff_head deferredq; 66 struct sk_buff_head deferredq;
70 struct tipc_group *group; 67 struct tipc_group *group;
71 u32 node; 68 u32 node;
@@ -77,21 +74,18 @@ struct tipc_member {
77 u16 bc_rcv_nxt; 74 u16 bc_rcv_nxt;
78 u16 bc_syncpt; 75 u16 bc_syncpt;
79 u16 bc_acked; 76 u16 bc_acked;
80 bool usr_pending;
81}; 77};
82 78
83struct tipc_group { 79struct tipc_group {
84 struct rb_root members; 80 struct rb_root members;
85 struct list_head congested; 81 struct list_head small_win;
86 struct list_head pending; 82 struct list_head pending;
87 struct list_head active; 83 struct list_head active;
88 struct list_head reclaiming;
89 struct tipc_nlist dests; 84 struct tipc_nlist dests;
90 struct net *net; 85 struct net *net;
91 int subid; 86 int subid;
92 u32 type; 87 u32 type;
93 u32 instance; 88 u32 instance;
94 u32 domain;
95 u32 scope; 89 u32 scope;
96 u32 portid; 90 u32 portid;
97 u16 member_cnt; 91 u16 member_cnt;
@@ -99,6 +93,7 @@ struct tipc_group {
99 u16 max_active; 93 u16 max_active;
100 u16 bc_snd_nxt; 94 u16 bc_snd_nxt;
101 u16 bc_ackers; 95 u16 bc_ackers;
96 bool *open;
102 bool loopback; 97 bool loopback;
103 bool events; 98 bool events;
104}; 99};
@@ -106,6 +101,16 @@ struct tipc_group {
106static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, 101static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
107 int mtyp, struct sk_buff_head *xmitq); 102 int mtyp, struct sk_buff_head *xmitq);
108 103
104static void tipc_group_open(struct tipc_member *m, bool *wakeup)
105{
106 *wakeup = false;
107 if (list_empty(&m->small_win))
108 return;
109 list_del_init(&m->small_win);
110 *m->group->open = true;
111 *wakeup = true;
112}
113
109static void tipc_group_decr_active(struct tipc_group *grp, 114static void tipc_group_decr_active(struct tipc_group *grp,
110 struct tipc_member *m) 115 struct tipc_member *m)
111{ 116{
@@ -137,14 +142,14 @@ u16 tipc_group_bc_snd_nxt(struct tipc_group *grp)
137 return grp->bc_snd_nxt; 142 return grp->bc_snd_nxt;
138} 143}
139 144
140static bool tipc_group_is_enabled(struct tipc_member *m) 145static bool tipc_group_is_receiver(struct tipc_member *m)
141{ 146{
142 return m->state != MBR_QUARANTINED && m->state != MBR_LEAVING; 147 return m && m->state != MBR_JOINING && m->state != MBR_LEAVING;
143} 148}
144 149
145static bool tipc_group_is_receiver(struct tipc_member *m) 150static bool tipc_group_is_sender(struct tipc_member *m)
146{ 151{
147 return m && m->state >= MBR_JOINED; 152 return m && m->state != MBR_JOINING && m->state != MBR_PUBLISHED;
148} 153}
149 154
150u32 tipc_group_exclude(struct tipc_group *grp) 155u32 tipc_group_exclude(struct tipc_group *grp)
@@ -160,8 +165,11 @@ int tipc_group_size(struct tipc_group *grp)
160} 165}
161 166
162struct tipc_group *tipc_group_create(struct net *net, u32 portid, 167struct tipc_group *tipc_group_create(struct net *net, u32 portid,
163 struct tipc_group_req *mreq) 168 struct tipc_group_req *mreq,
169 bool *group_is_open)
164{ 170{
171 u32 filter = TIPC_SUB_PORTS | TIPC_SUB_NO_STATUS;
172 bool global = mreq->scope != TIPC_NODE_SCOPE;
165 struct tipc_group *grp; 173 struct tipc_group *grp;
166 u32 type = mreq->type; 174 u32 type = mreq->type;
167 175
@@ -169,25 +177,41 @@ struct tipc_group *tipc_group_create(struct net *net, u32 portid,
169 if (!grp) 177 if (!grp)
170 return NULL; 178 return NULL;
171 tipc_nlist_init(&grp->dests, tipc_own_addr(net)); 179 tipc_nlist_init(&grp->dests, tipc_own_addr(net));
172 INIT_LIST_HEAD(&grp->congested); 180 INIT_LIST_HEAD(&grp->small_win);
173 INIT_LIST_HEAD(&grp->active); 181 INIT_LIST_HEAD(&grp->active);
174 INIT_LIST_HEAD(&grp->pending); 182 INIT_LIST_HEAD(&grp->pending);
175 INIT_LIST_HEAD(&grp->reclaiming);
176 grp->members = RB_ROOT; 183 grp->members = RB_ROOT;
177 grp->net = net; 184 grp->net = net;
178 grp->portid = portid; 185 grp->portid = portid;
179 grp->domain = addr_domain(net, mreq->scope);
180 grp->type = type; 186 grp->type = type;
181 grp->instance = mreq->instance; 187 grp->instance = mreq->instance;
182 grp->scope = mreq->scope; 188 grp->scope = mreq->scope;
183 grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK; 189 grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK;
184 grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS; 190 grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS;
185 if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, &grp->subid)) 191 grp->open = group_is_open;
192 filter |= global ? TIPC_SUB_CLUSTER_SCOPE : TIPC_SUB_NODE_SCOPE;
193 if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0,
194 filter, &grp->subid))
186 return grp; 195 return grp;
187 kfree(grp); 196 kfree(grp);
188 return NULL; 197 return NULL;
189} 198}
190 199
200void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcvbuf)
201{
202 struct rb_root *tree = &grp->members;
203 struct tipc_member *m, *tmp;
204 struct sk_buff_head xmitq;
205
206 skb_queue_head_init(&xmitq);
207 rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
208 tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, &xmitq);
209 tipc_group_update_member(m, 0);
210 }
211 tipc_node_distr_xmit(net, &xmitq);
212 *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
213}
214
191void tipc_group_delete(struct net *net, struct tipc_group *grp) 215void tipc_group_delete(struct net *net, struct tipc_group *grp)
192{ 216{
193 struct rb_root *tree = &grp->members; 217 struct rb_root *tree = &grp->members;
@@ -233,7 +257,7 @@ static struct tipc_member *tipc_group_find_dest(struct tipc_group *grp,
233 struct tipc_member *m; 257 struct tipc_member *m;
234 258
235 m = tipc_group_find_member(grp, node, port); 259 m = tipc_group_find_member(grp, node, port);
236 if (m && tipc_group_is_enabled(m)) 260 if (m && tipc_group_is_receiver(m))
237 return m; 261 return m;
238 return NULL; 262 return NULL;
239} 263}
@@ -278,7 +302,7 @@ static void tipc_group_add_to_tree(struct tipc_group *grp,
278 302
279static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, 303static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
280 u32 node, u32 port, 304 u32 node, u32 port,
281 int state) 305 u32 instance, int state)
282{ 306{
283 struct tipc_member *m; 307 struct tipc_member *m;
284 308
@@ -286,11 +310,12 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
286 if (!m) 310 if (!m)
287 return NULL; 311 return NULL;
288 INIT_LIST_HEAD(&m->list); 312 INIT_LIST_HEAD(&m->list);
289 INIT_LIST_HEAD(&m->congested); 313 INIT_LIST_HEAD(&m->small_win);
290 __skb_queue_head_init(&m->deferredq); 314 __skb_queue_head_init(&m->deferredq);
291 m->group = grp; 315 m->group = grp;
292 m->node = node; 316 m->node = node;
293 m->port = port; 317 m->port = port;
318 m->instance = instance;
294 m->bc_acked = grp->bc_snd_nxt - 1; 319 m->bc_acked = grp->bc_snd_nxt - 1;
295 grp->member_cnt++; 320 grp->member_cnt++;
296 tipc_group_add_to_tree(grp, m); 321 tipc_group_add_to_tree(grp, m);
@@ -299,9 +324,10 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
299 return m; 324 return m;
300} 325}
301 326
302void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port) 327void tipc_group_add_member(struct tipc_group *grp, u32 node,
328 u32 port, u32 instance)
303{ 329{
304 tipc_group_create_member(grp, node, port, MBR_DISCOVERED); 330 tipc_group_create_member(grp, node, port, instance, MBR_PUBLISHED);
305} 331}
306 332
307static void tipc_group_delete_member(struct tipc_group *grp, 333static void tipc_group_delete_member(struct tipc_group *grp,
@@ -315,7 +341,7 @@ static void tipc_group_delete_member(struct tipc_group *grp,
315 grp->bc_ackers--; 341 grp->bc_ackers--;
316 342
317 list_del_init(&m->list); 343 list_del_init(&m->list);
318 list_del_init(&m->congested); 344 list_del_init(&m->small_win);
319 tipc_group_decr_active(grp, m); 345 tipc_group_decr_active(grp, m);
320 346
321 /* If last member on a node, remove node from dest list */ 347 /* If last member on a node, remove node from dest list */
@@ -344,7 +370,7 @@ void tipc_group_update_member(struct tipc_member *m, int len)
344 struct tipc_group *grp = m->group; 370 struct tipc_group *grp = m->group;
345 struct tipc_member *_m, *tmp; 371 struct tipc_member *_m, *tmp;
346 372
347 if (!tipc_group_is_enabled(m)) 373 if (!tipc_group_is_receiver(m))
348 return; 374 return;
349 375
350 m->window -= len; 376 m->window -= len;
@@ -352,16 +378,14 @@ void tipc_group_update_member(struct tipc_member *m, int len)
352 if (m->window >= ADV_IDLE) 378 if (m->window >= ADV_IDLE)
353 return; 379 return;
354 380
355 list_del_init(&m->congested); 381 list_del_init(&m->small_win);
356 382
357 /* Sort member into congested members' list */ 383 /* Sort member into small_window members' list */
358 list_for_each_entry_safe(_m, tmp, &grp->congested, congested) { 384 list_for_each_entry_safe(_m, tmp, &grp->small_win, small_win) {
359 if (m->window > _m->window) 385 if (_m->window > m->window)
360 continue; 386 break;
361 list_add_tail(&m->congested, &_m->congested);
362 return;
363 } 387 }
364 list_add_tail(&m->congested, &grp->congested); 388 list_add_tail(&m->small_win, &_m->small_win);
365} 389}
366 390
367void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack) 391void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
@@ -373,7 +397,7 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
373 397
374 for (n = rb_first(&grp->members); n; n = rb_next(n)) { 398 for (n = rb_first(&grp->members); n; n = rb_next(n)) {
375 m = container_of(n, struct tipc_member, tree_node); 399 m = container_of(n, struct tipc_member, tree_node);
376 if (tipc_group_is_enabled(m)) { 400 if (tipc_group_is_receiver(m)) {
377 tipc_group_update_member(m, len); 401 tipc_group_update_member(m, len);
378 m->bc_acked = prev; 402 m->bc_acked = prev;
379 ackers++; 403 ackers++;
@@ -394,20 +418,20 @@ bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
394 int adv, state; 418 int adv, state;
395 419
396 m = tipc_group_find_dest(grp, dnode, dport); 420 m = tipc_group_find_dest(grp, dnode, dport);
397 *mbr = m; 421 if (!tipc_group_is_receiver(m)) {
398 if (!m) 422 *mbr = NULL;
399 return false; 423 return false;
400 if (m->usr_pending) 424 }
401 return true; 425 *mbr = m;
426
402 if (m->window >= len) 427 if (m->window >= len)
403 return false; 428 return false;
404 m->usr_pending = true; 429
430 *grp->open = false;
405 431
406 /* If not fully advertised, do it now to prevent mutual blocking */ 432 /* If not fully advertised, do it now to prevent mutual blocking */
407 adv = m->advertised; 433 adv = m->advertised;
408 state = m->state; 434 state = m->state;
409 if (state < MBR_JOINED)
410 return true;
411 if (state == MBR_JOINED && adv == ADV_IDLE) 435 if (state == MBR_JOINED && adv == ADV_IDLE)
412 return true; 436 return true;
413 if (state == MBR_ACTIVE && adv == ADV_ACTIVE) 437 if (state == MBR_ACTIVE && adv == ADV_ACTIVE)
@@ -425,13 +449,14 @@ bool tipc_group_bc_cong(struct tipc_group *grp, int len)
425 struct tipc_member *m = NULL; 449 struct tipc_member *m = NULL;
426 450
427 /* If prev bcast was replicast, reject until all receivers have acked */ 451 /* If prev bcast was replicast, reject until all receivers have acked */
428 if (grp->bc_ackers) 452 if (grp->bc_ackers) {
453 *grp->open = false;
429 return true; 454 return true;
430 455 }
431 if (list_empty(&grp->congested)) 456 if (list_empty(&grp->small_win))
432 return false; 457 return false;
433 458
434 m = list_first_entry(&grp->congested, struct tipc_member, congested); 459 m = list_first_entry(&grp->small_win, struct tipc_member, small_win);
435 if (m->window >= len) 460 if (m->window >= len)
436 return false; 461 return false;
437 462
@@ -486,7 +511,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
486 goto drop; 511 goto drop;
487 512
488 m = tipc_group_find_member(grp, node, port); 513 m = tipc_group_find_member(grp, node, port);
489 if (!tipc_group_is_receiver(m)) 514 if (!tipc_group_is_sender(m))
490 goto drop; 515 goto drop;
491 516
492 if (less(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt)) 517 if (less(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
@@ -573,24 +598,34 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
573 598
574 switch (m->state) { 599 switch (m->state) {
575 case MBR_JOINED: 600 case MBR_JOINED:
576 /* Reclaim advertised space from least active member */ 601 /* First, decide if member can go active */
577 if (!list_empty(active) && active_cnt >= reclaim_limit) { 602 if (active_cnt <= max_active) {
603 m->state = MBR_ACTIVE;
604 list_add_tail(&m->list, active);
605 grp->active_cnt++;
606 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
607 } else {
608 m->state = MBR_PENDING;
609 list_add_tail(&m->list, &grp->pending);
610 }
611
612 if (active_cnt < reclaim_limit)
613 break;
614
615 /* Reclaim from oldest active member, if possible */
616 if (!list_empty(active)) {
578 rm = list_first_entry(active, struct tipc_member, list); 617 rm = list_first_entry(active, struct tipc_member, list);
579 rm->state = MBR_RECLAIMING; 618 rm->state = MBR_RECLAIMING;
580 list_move_tail(&rm->list, &grp->reclaiming); 619 list_del_init(&rm->list);
581 tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq); 620 tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq);
582 }
583 /* If max active, become pending and wait for reclaimed space */
584 if (active_cnt >= max_active) {
585 m->state = MBR_PENDING;
586 list_add_tail(&m->list, &grp->pending);
587 break; 621 break;
588 } 622 }
589 /* Otherwise become active */ 623 /* Nobody to reclaim from; - revert oldest pending to JOINED */
590 m->state = MBR_ACTIVE; 624 pm = list_first_entry(&grp->pending, struct tipc_member, list);
591 list_add_tail(&m->list, &grp->active); 625 list_del_init(&pm->list);
592 grp->active_cnt++; 626 pm->state = MBR_JOINED;
593 /* Fall through */ 627 tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
628 break;
594 case MBR_ACTIVE: 629 case MBR_ACTIVE:
595 if (!list_is_last(&m->list, &grp->active)) 630 if (!list_is_last(&m->list, &grp->active))
596 list_move_tail(&m->list, &grp->active); 631 list_move_tail(&m->list, &grp->active);
@@ -602,12 +637,12 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
602 if (m->advertised > ADV_IDLE) 637 if (m->advertised > ADV_IDLE)
603 break; 638 break;
604 m->state = MBR_JOINED; 639 m->state = MBR_JOINED;
640 grp->active_cnt--;
605 if (m->advertised < ADV_IDLE) { 641 if (m->advertised < ADV_IDLE) {
606 pr_warn_ratelimited("Rcv unexpected msg after REMIT\n"); 642 pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
607 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); 643 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
608 } 644 }
609 grp->active_cnt--; 645
610 list_del_init(&m->list);
611 if (list_empty(&grp->pending)) 646 if (list_empty(&grp->pending))
612 return; 647 return;
613 648
@@ -619,7 +654,6 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
619 tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); 654 tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
620 break; 655 break;
621 case MBR_RECLAIMING: 656 case MBR_RECLAIMING:
622 case MBR_DISCOVERED:
623 case MBR_JOINING: 657 case MBR_JOINING:
624 case MBR_LEAVING: 658 case MBR_LEAVING:
625 default: 659 default:
@@ -627,6 +661,40 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
627 } 661 }
628} 662}
629 663
664static void tipc_group_create_event(struct tipc_group *grp,
665 struct tipc_member *m,
666 u32 event, u16 seqno,
667 struct sk_buff_head *inputq)
668{ u32 dnode = tipc_own_addr(grp->net);
669 struct tipc_event evt;
670 struct sk_buff *skb;
671 struct tipc_msg *hdr;
672
673 evt.event = event;
674 evt.found_lower = m->instance;
675 evt.found_upper = m->instance;
676 evt.port.ref = m->port;
677 evt.port.node = m->node;
678 evt.s.seq.type = grp->type;
679 evt.s.seq.lower = m->instance;
680 evt.s.seq.upper = m->instance;
681
682 skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_GRP_MEMBER_EVT,
683 GROUP_H_SIZE, sizeof(evt), dnode, m->node,
684 grp->portid, m->port, 0);
685 if (!skb)
686 return;
687
688 hdr = buf_msg(skb);
689 msg_set_nametype(hdr, grp->type);
690 msg_set_grp_evt(hdr, event);
691 msg_set_dest_droppable(hdr, true);
692 msg_set_grp_bc_seqno(hdr, seqno);
693 memcpy(msg_data(hdr), &evt, sizeof(evt));
694 TIPC_SKB_CB(skb)->orig_member = m->instance;
695 __skb_queue_tail(inputq, skb);
696}
697
630static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, 698static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
631 int mtyp, struct sk_buff_head *xmitq) 699 int mtyp, struct sk_buff_head *xmitq)
632{ 700{
@@ -672,83 +740,73 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
672 u32 node = msg_orignode(hdr); 740 u32 node = msg_orignode(hdr);
673 u32 port = msg_origport(hdr); 741 u32 port = msg_origport(hdr);
674 struct tipc_member *m, *pm; 742 struct tipc_member *m, *pm;
675 struct tipc_msg *ehdr;
676 u16 remitted, in_flight; 743 u16 remitted, in_flight;
677 744
678 if (!grp) 745 if (!grp)
679 return; 746 return;
680 747
748 if (grp->scope == TIPC_NODE_SCOPE && node != tipc_own_addr(grp->net))
749 return;
750
681 m = tipc_group_find_member(grp, node, port); 751 m = tipc_group_find_member(grp, node, port);
682 752
683 switch (msg_type(hdr)) { 753 switch (msg_type(hdr)) {
684 case GRP_JOIN_MSG: 754 case GRP_JOIN_MSG:
685 if (!m) 755 if (!m)
686 m = tipc_group_create_member(grp, node, port, 756 m = tipc_group_create_member(grp, node, port,
687 MBR_QUARANTINED); 757 0, MBR_JOINING);
688 if (!m) 758 if (!m)
689 return; 759 return;
690 m->bc_syncpt = msg_grp_bc_syncpt(hdr); 760 m->bc_syncpt = msg_grp_bc_syncpt(hdr);
691 m->bc_rcv_nxt = m->bc_syncpt; 761 m->bc_rcv_nxt = m->bc_syncpt;
692 m->window += msg_adv_win(hdr); 762 m->window += msg_adv_win(hdr);
693 763
694 /* Wait until PUBLISH event is received */ 764 /* Wait until PUBLISH event is received if necessary */
695 if (m->state == MBR_DISCOVERED) { 765 if (m->state != MBR_PUBLISHED)
696 m->state = MBR_JOINING; 766 return;
697 } else if (m->state == MBR_PUBLISHED) { 767
698 m->state = MBR_JOINED; 768 /* Member can be taken into service */
699 *usr_wakeup = true; 769 m->state = MBR_JOINED;
700 m->usr_pending = false; 770 tipc_group_open(m, usr_wakeup);
701 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
702 ehdr = buf_msg(m->event_msg);
703 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
704 __skb_queue_tail(inputq, m->event_msg);
705 }
706 list_del_init(&m->congested);
707 tipc_group_update_member(m, 0); 771 tipc_group_update_member(m, 0);
772 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
773 tipc_group_create_event(grp, m, TIPC_PUBLISHED,
774 m->bc_syncpt, inputq);
708 return; 775 return;
709 case GRP_LEAVE_MSG: 776 case GRP_LEAVE_MSG:
710 if (!m) 777 if (!m)
711 return; 778 return;
712 m->bc_syncpt = msg_grp_bc_syncpt(hdr); 779 m->bc_syncpt = msg_grp_bc_syncpt(hdr);
713 list_del_init(&m->list); 780 list_del_init(&m->list);
714 list_del_init(&m->congested); 781 tipc_group_open(m, usr_wakeup);
715 *usr_wakeup = true; 782 tipc_group_decr_active(grp, m);
716 783 m->state = MBR_LEAVING;
717 /* Wait until WITHDRAW event is received */ 784 tipc_group_create_event(grp, m, TIPC_WITHDRAWN,
718 if (m->state != MBR_LEAVING) { 785 m->bc_syncpt, inputq);
719 tipc_group_decr_active(grp, m);
720 m->state = MBR_LEAVING;
721 return;
722 }
723 /* Otherwise deliver already received WITHDRAW event */
724 ehdr = buf_msg(m->event_msg);
725 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
726 __skb_queue_tail(inputq, m->event_msg);
727 return; 786 return;
728 case GRP_ADV_MSG: 787 case GRP_ADV_MSG:
729 if (!m) 788 if (!m)
730 return; 789 return;
731 m->window += msg_adv_win(hdr); 790 m->window += msg_adv_win(hdr);
732 *usr_wakeup = m->usr_pending; 791 tipc_group_open(m, usr_wakeup);
733 m->usr_pending = false;
734 list_del_init(&m->congested);
735 return; 792 return;
736 case GRP_ACK_MSG: 793 case GRP_ACK_MSG:
737 if (!m) 794 if (!m)
738 return; 795 return;
739 m->bc_acked = msg_grp_bc_acked(hdr); 796 m->bc_acked = msg_grp_bc_acked(hdr);
740 if (--grp->bc_ackers) 797 if (--grp->bc_ackers)
741 break; 798 return;
799 list_del_init(&m->small_win);
800 *m->group->open = true;
742 *usr_wakeup = true; 801 *usr_wakeup = true;
743 m->usr_pending = false; 802 tipc_group_update_member(m, 0);
744 return; 803 return;
745 case GRP_RECLAIM_MSG: 804 case GRP_RECLAIM_MSG:
746 if (!m) 805 if (!m)
747 return; 806 return;
748 *usr_wakeup = m->usr_pending;
749 m->usr_pending = false;
750 tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq); 807 tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq);
751 m->window = ADV_IDLE; 808 m->window = ADV_IDLE;
809 tipc_group_open(m, usr_wakeup);
752 return; 810 return;
753 case GRP_REMIT_MSG: 811 case GRP_REMIT_MSG:
754 if (!m || m->state != MBR_RECLAIMING) 812 if (!m || m->state != MBR_RECLAIMING)
@@ -763,18 +821,14 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
763 m->advertised = ADV_IDLE + in_flight; 821 m->advertised = ADV_IDLE + in_flight;
764 return; 822 return;
765 } 823 }
766 /* All messages preceding the REMIT have been read */ 824 /* This should never happen */
767 if (m->advertised <= remitted) {
768 m->state = MBR_JOINED;
769 in_flight = 0;
770 }
771 /* ..and the REMIT overtaken by more messages => re-advertise */
772 if (m->advertised < remitted) 825 if (m->advertised < remitted)
773 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); 826 pr_warn_ratelimited("Unexpected REMIT msg\n");
774 827
775 m->advertised = ADV_IDLE + in_flight; 828 /* All messages preceding the REMIT have been read */
829 m->state = MBR_JOINED;
776 grp->active_cnt--; 830 grp->active_cnt--;
777 list_del_init(&m->list); 831 m->advertised = ADV_IDLE;
778 832
779 /* Set oldest pending member to active and advertise */ 833 /* Set oldest pending member to active and advertise */
780 if (list_empty(&grp->pending)) 834 if (list_empty(&grp->pending))
@@ -796,11 +850,10 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
796void tipc_group_member_evt(struct tipc_group *grp, 850void tipc_group_member_evt(struct tipc_group *grp,
797 bool *usr_wakeup, 851 bool *usr_wakeup,
798 int *sk_rcvbuf, 852 int *sk_rcvbuf,
799 struct sk_buff *skb, 853 struct tipc_msg *hdr,
800 struct sk_buff_head *inputq, 854 struct sk_buff_head *inputq,
801 struct sk_buff_head *xmitq) 855 struct sk_buff_head *xmitq)
802{ 856{
803 struct tipc_msg *hdr = buf_msg(skb);
804 struct tipc_event *evt = (void *)msg_data(hdr); 857 struct tipc_event *evt = (void *)msg_data(hdr);
805 u32 instance = evt->found_lower; 858 u32 instance = evt->found_lower;
806 u32 node = evt->port.node; 859 u32 node = evt->port.node;
@@ -808,89 +861,59 @@ void tipc_group_member_evt(struct tipc_group *grp,
808 int event = evt->event; 861 int event = evt->event;
809 struct tipc_member *m; 862 struct tipc_member *m;
810 struct net *net; 863 struct net *net;
811 bool node_up;
812 u32 self; 864 u32 self;
813 865
814 if (!grp) 866 if (!grp)
815 goto drop; 867 return;
816 868
817 net = grp->net; 869 net = grp->net;
818 self = tipc_own_addr(net); 870 self = tipc_own_addr(net);
819 if (!grp->loopback && node == self && port == grp->portid) 871 if (!grp->loopback && node == self && port == grp->portid)
820 goto drop; 872 return;
821
822 /* Convert message before delivery to user */
823 msg_set_hdr_sz(hdr, GROUP_H_SIZE);
824 msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE);
825 msg_set_type(hdr, TIPC_GRP_MEMBER_EVT);
826 msg_set_origport(hdr, port);
827 msg_set_orignode(hdr, node);
828 msg_set_nametype(hdr, grp->type);
829 msg_set_grp_evt(hdr, event);
830 873
831 m = tipc_group_find_member(grp, node, port); 874 m = tipc_group_find_member(grp, node, port);
832 875
833 if (event == TIPC_PUBLISHED) { 876 switch (event) {
834 if (!m) 877 case TIPC_PUBLISHED:
835 m = tipc_group_create_member(grp, node, port, 878 /* Send and wait for arrival of JOIN message if necessary */
836 MBR_DISCOVERED); 879 if (!m) {
837 if (!m) 880 m = tipc_group_create_member(grp, node, port, instance,
838 goto drop; 881 MBR_PUBLISHED);
839 882 if (!m)
840 /* Hold back event if JOIN message not yet received */ 883 break;
841 if (m->state == MBR_DISCOVERED) { 884 tipc_group_update_member(m, 0);
842 m->event_msg = skb; 885 tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
843 m->state = MBR_PUBLISHED; 886 break;
844 } else {
845 msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
846 __skb_queue_tail(inputq, skb);
847 m->state = MBR_JOINED;
848 *usr_wakeup = true;
849 m->usr_pending = false;
850 } 887 }
888
889 if (m->state != MBR_JOINING)
890 break;
891
892 /* Member can be taken into service */
851 m->instance = instance; 893 m->instance = instance;
852 TIPC_SKB_CB(skb)->orig_member = m->instance; 894 m->state = MBR_JOINED;
895 tipc_group_open(m, usr_wakeup);
896 tipc_group_update_member(m, 0);
853 tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); 897 tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
854 if (m->window < ADV_IDLE) 898 tipc_group_create_event(grp, m, TIPC_PUBLISHED,
855 tipc_group_update_member(m, 0); 899 m->bc_syncpt, inputq);
856 else 900 break;
857 list_del_init(&m->congested); 901 case TIPC_WITHDRAWN:
858 } else if (event == TIPC_WITHDRAWN) {
859 if (!m) 902 if (!m)
860 goto drop; 903 break;
861
862 TIPC_SKB_CB(skb)->orig_member = m->instance;
863 904
864 *usr_wakeup = true; 905 tipc_group_decr_active(grp, m);
865 m->usr_pending = false; 906 m->state = MBR_LEAVING;
866 node_up = tipc_node_is_up(net, node);
867 m->event_msg = NULL;
868
869 if (node_up) {
870 /* Hold back event if a LEAVE msg should be expected */
871 if (m->state != MBR_LEAVING) {
872 m->event_msg = skb;
873 tipc_group_decr_active(grp, m);
874 m->state = MBR_LEAVING;
875 } else {
876 msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
877 __skb_queue_tail(inputq, skb);
878 }
879 } else {
880 if (m->state != MBR_LEAVING) {
881 tipc_group_decr_active(grp, m);
882 m->state = MBR_LEAVING;
883 msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
884 } else {
885 msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
886 }
887 __skb_queue_tail(inputq, skb);
888 }
889 list_del_init(&m->list); 907 list_del_init(&m->list);
890 list_del_init(&m->congested); 908 tipc_group_open(m, usr_wakeup);
909
910 /* Only send event if no LEAVE message can be expected */
911 if (!tipc_node_is_up(net, node))
912 tipc_group_create_event(grp, m, TIPC_WITHDRAWN,
913 m->bc_rcv_nxt, inputq);
914 break;
915 default:
916 break;
891 } 917 }
892 *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); 918 *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
893 return;
894drop:
895 kfree_skb(skb);
896} 919}
diff --git a/net/tipc/group.h b/net/tipc/group.h
index d525e1cd7de5..5996af6e9f1d 100644
--- a/net/tipc/group.h
+++ b/net/tipc/group.h
@@ -43,9 +43,12 @@ struct tipc_member;
43struct tipc_msg; 43struct tipc_msg;
44 44
45struct tipc_group *tipc_group_create(struct net *net, u32 portid, 45struct tipc_group *tipc_group_create(struct net *net, u32 portid,
46 struct tipc_group_req *mreq); 46 struct tipc_group_req *mreq,
47 bool *group_is_open);
48void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcv_buf);
47void tipc_group_delete(struct net *net, struct tipc_group *grp); 49void tipc_group_delete(struct net *net, struct tipc_group *grp);
48void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port); 50void tipc_group_add_member(struct tipc_group *grp, u32 node,
51 u32 port, u32 instance);
49struct tipc_nlist *tipc_group_dests(struct tipc_group *grp); 52struct tipc_nlist *tipc_group_dests(struct tipc_group *grp);
50void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, 53void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
51 int *scope); 54 int *scope);
@@ -54,7 +57,7 @@ void tipc_group_filter_msg(struct tipc_group *grp,
54 struct sk_buff_head *inputq, 57 struct sk_buff_head *inputq,
55 struct sk_buff_head *xmitq); 58 struct sk_buff_head *xmitq);
56void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup, 59void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup,
57 int *sk_rcvbuf, struct sk_buff *skb, 60 int *sk_rcvbuf, struct tipc_msg *hdr,
58 struct sk_buff_head *inputq, 61 struct sk_buff_head *inputq,
59 struct sk_buff_head *xmitq); 62 struct sk_buff_head *xmitq);
60void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup, 63void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup,
@@ -69,5 +72,4 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
69 u32 port, struct sk_buff_head *xmitq); 72 u32 port, struct sk_buff_head *xmitq);
70u16 tipc_group_bc_snd_nxt(struct tipc_group *grp); 73u16 tipc_group_bc_snd_nxt(struct tipc_group *grp);
71void tipc_group_update_member(struct tipc_member *m, int len); 74void tipc_group_update_member(struct tipc_member *m, int len);
72int tipc_group_size(struct tipc_group *grp);
73#endif 75#endif
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 6bce0b1117bd..2d6b2aed30e0 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -483,7 +483,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
483/** 483/**
484 * tipc_link_bc_create - create new link to be used for broadcast 484 * tipc_link_bc_create - create new link to be used for broadcast
485 * @n: pointer to associated node 485 * @n: pointer to associated node
486 * @mtu: mtu to be used 486 * @mtu: mtu to be used initially if no peers
487 * @window: send window to be used 487 * @window: send window to be used
488 * @inputq: queue to put messages ready for delivery 488 * @inputq: queue to put messages ready for delivery
489 * @namedq: queue to put binding table update messages ready for delivery 489 * @namedq: queue to put binding table update messages ready for delivery
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index b0d07b35909d..4e1c6f6450bb 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -208,8 +208,8 @@ bool tipc_msg_validate(struct sk_buff **_skb)
208 int msz, hsz; 208 int msz, hsz;
209 209
210 /* Ensure that flow control ratio condition is satisfied */ 210 /* Ensure that flow control ratio condition is satisfied */
211 if (unlikely(skb->truesize / buf_roundup_len(skb) > 4)) { 211 if (unlikely(skb->truesize / buf_roundup_len(skb) >= 4)) {
212 skb = skb_copy(skb, GFP_ATOMIC); 212 skb = skb_copy_expand(skb, BUF_HEADROOM, 0, GFP_ATOMIC);
213 if (!skb) 213 if (!skb)
214 return false; 214 return false;
215 kfree_skb(*_skb); 215 kfree_skb(*_skb);
@@ -251,20 +251,23 @@ bool tipc_msg_validate(struct sk_buff **_skb)
251 * @pktmax: Max packet size that can be used 251 * @pktmax: Max packet size that can be used
252 * @list: Buffer or chain of buffers to be returned to caller 252 * @list: Buffer or chain of buffers to be returned to caller
253 * 253 *
254 * Note that the recursive call we are making here is safe, since it can
255 * logically go only one further level down.
256 *
254 * Returns message data size or errno: -ENOMEM, -EFAULT 257 * Returns message data size or errno: -ENOMEM, -EFAULT
255 */ 258 */
256int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, 259int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
257 int offset, int dsz, int pktmax, struct sk_buff_head *list) 260 int dsz, int pktmax, struct sk_buff_head *list)
258{ 261{
259 int mhsz = msg_hdr_sz(mhdr); 262 int mhsz = msg_hdr_sz(mhdr);
263 struct tipc_msg pkthdr;
260 int msz = mhsz + dsz; 264 int msz = mhsz + dsz;
261 int pktno = 1;
262 int pktsz;
263 int pktrem = pktmax; 265 int pktrem = pktmax;
264 int drem = dsz;
265 struct tipc_msg pkthdr;
266 struct sk_buff *skb; 266 struct sk_buff *skb;
267 int drem = dsz;
268 int pktno = 1;
267 char *pktpos; 269 char *pktpos;
270 int pktsz;
268 int rc; 271 int rc;
269 272
270 msg_set_size(mhdr, msz); 273 msg_set_size(mhdr, msz);
@@ -272,8 +275,18 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
272 /* No fragmentation needed? */ 275 /* No fragmentation needed? */
273 if (likely(msz <= pktmax)) { 276 if (likely(msz <= pktmax)) {
274 skb = tipc_buf_acquire(msz, GFP_KERNEL); 277 skb = tipc_buf_acquire(msz, GFP_KERNEL);
275 if (unlikely(!skb)) 278
279 /* Fall back to smaller MTU if node local message */
280 if (unlikely(!skb)) {
281 if (pktmax != MAX_MSG_SIZE)
282 return -ENOMEM;
283 rc = tipc_msg_build(mhdr, m, offset, dsz, FB_MTU, list);
284 if (rc != dsz)
285 return rc;
286 if (tipc_msg_assemble(list))
287 return dsz;
276 return -ENOMEM; 288 return -ENOMEM;
289 }
277 skb_orphan(skb); 290 skb_orphan(skb);
278 __skb_queue_tail(list, skb); 291 __skb_queue_tail(list, skb);
279 skb_copy_to_linear_data(skb, mhdr, mhsz); 292 skb_copy_to_linear_data(skb, mhdr, mhsz);
@@ -589,6 +602,30 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
589 return true; 602 return true;
590} 603}
591 604
605/* tipc_msg_assemble() - assemble chain of fragments into one message
606 */
607bool tipc_msg_assemble(struct sk_buff_head *list)
608{
609 struct sk_buff *skb, *tmp = NULL;
610
611 if (skb_queue_len(list) == 1)
612 return true;
613
614 while ((skb = __skb_dequeue(list))) {
615 skb->next = NULL;
616 if (tipc_buf_append(&tmp, &skb)) {
617 __skb_queue_tail(list, skb);
618 return true;
619 }
620 if (!tmp)
621 break;
622 }
623 __skb_queue_purge(list);
624 __skb_queue_head_init(list);
625 pr_warn("Failed do assemble buffer\n");
626 return false;
627}
628
592/* tipc_msg_reassemble() - clone a buffer chain of fragments and 629/* tipc_msg_reassemble() - clone a buffer chain of fragments and
593 * reassemble the clones into one message 630 * reassemble the clones into one message
594 */ 631 */
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 3e4384c222f7..b4ba1b4f9ae7 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -98,7 +98,7 @@ struct plist;
98#define MAX_H_SIZE 60 /* Largest possible TIPC header size */ 98#define MAX_H_SIZE 60 /* Largest possible TIPC header size */
99 99
100#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) 100#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE)
101 101#define FB_MTU 3744
102#define TIPC_MEDIA_INFO_OFFSET 5 102#define TIPC_MEDIA_INFO_OFFSET 5
103 103
104struct tipc_skb_cb { 104struct tipc_skb_cb {
@@ -943,6 +943,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
943int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, 943int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
944 int offset, int dsz, int mtu, struct sk_buff_head *list); 944 int offset, int dsz, int mtu, struct sk_buff_head *list);
945bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err); 945bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
946bool tipc_msg_assemble(struct sk_buff_head *list);
946bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq); 947bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq);
947bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg, 948bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg,
948 struct sk_buff_head *cpy); 949 struct sk_buff_head *cpy);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index b3829bcf63c7..ed0457cc99d6 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -328,7 +328,8 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
328 list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { 328 list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
329 tipc_subscrp_report_overlap(s, publ->lower, publ->upper, 329 tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
330 TIPC_PUBLISHED, publ->ref, 330 TIPC_PUBLISHED, publ->ref,
331 publ->node, created_subseq); 331 publ->node, publ->scope,
332 created_subseq);
332 } 333 }
333 return publ; 334 return publ;
334} 335}
@@ -398,19 +399,21 @@ found:
398 list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { 399 list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
399 tipc_subscrp_report_overlap(s, publ->lower, publ->upper, 400 tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
400 TIPC_WITHDRAWN, publ->ref, 401 TIPC_WITHDRAWN, publ->ref,
401 publ->node, removed_subseq); 402 publ->node, publ->scope,
403 removed_subseq);
402 } 404 }
403 405
404 return publ; 406 return publ;
405} 407}
406 408
407/** 409/**
408 * tipc_nameseq_subscribe - attach a subscription, and issue 410 * tipc_nameseq_subscribe - attach a subscription, and optionally
409 * the prescribed number of events if there is any sub- 411 * issue the prescribed number of events if there is any sub-
410 * sequence overlapping with the requested sequence 412 * sequence overlapping with the requested sequence
411 */ 413 */
412static void tipc_nameseq_subscribe(struct name_seq *nseq, 414static void tipc_nameseq_subscribe(struct name_seq *nseq,
413 struct tipc_subscription *s) 415 struct tipc_subscription *s,
416 bool status)
414{ 417{
415 struct sub_seq *sseq = nseq->sseqs; 418 struct sub_seq *sseq = nseq->sseqs;
416 struct tipc_name_seq ns; 419 struct tipc_name_seq ns;
@@ -420,7 +423,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
420 tipc_subscrp_get(s); 423 tipc_subscrp_get(s);
421 list_add(&s->nameseq_list, &nseq->subscriptions); 424 list_add(&s->nameseq_list, &nseq->subscriptions);
422 425
423 if (!sseq) 426 if (!status || !sseq)
424 return; 427 return;
425 428
426 while (sseq != &nseq->sseqs[nseq->first_free]) { 429 while (sseq != &nseq->sseqs[nseq->first_free]) {
@@ -434,6 +437,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
434 sseq->upper, 437 sseq->upper,
435 TIPC_PUBLISHED, 438 TIPC_PUBLISHED,
436 crs->ref, crs->node, 439 crs->ref, crs->node,
440 crs->scope,
437 must_report); 441 must_report);
438 must_report = 0; 442 must_report = 0;
439 } 443 }
@@ -597,7 +601,7 @@ not_found:
597 return ref; 601 return ref;
598} 602}
599 603
600bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, 604bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
601 struct list_head *dsts, int *dstcnt, u32 exclude, 605 struct list_head *dsts, int *dstcnt, u32 exclude,
602 bool all) 606 bool all)
603{ 607{
@@ -607,9 +611,6 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
607 struct name_seq *seq; 611 struct name_seq *seq;
608 struct sub_seq *sseq; 612 struct sub_seq *sseq;
609 613
610 if (!tipc_in_scope(domain, self))
611 return false;
612
613 *dstcnt = 0; 614 *dstcnt = 0;
614 rcu_read_lock(); 615 rcu_read_lock();
615 seq = nametbl_find_seq(net, type); 616 seq = nametbl_find_seq(net, type);
@@ -620,7 +621,7 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
620 if (likely(sseq)) { 621 if (likely(sseq)) {
621 info = sseq->info; 622 info = sseq->info;
622 list_for_each_entry(publ, &info->zone_list, zone_list) { 623 list_for_each_entry(publ, &info->zone_list, zone_list) {
623 if (!tipc_in_scope(domain, publ->node)) 624 if (publ->scope != scope)
624 continue; 625 continue;
625 if (publ->ref == exclude && publ->node == self) 626 if (publ->ref == exclude && publ->node == self)
626 continue; 627 continue;
@@ -638,13 +639,14 @@ exit:
638 return !list_empty(dsts); 639 return !list_empty(dsts);
639} 640}
640 641
641int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, 642int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
642 u32 limit, struct list_head *dports) 643 u32 scope, bool exact, struct list_head *dports)
643{ 644{
644 struct name_seq *seq;
645 struct sub_seq *sseq;
646 struct sub_seq *sseq_stop; 645 struct sub_seq *sseq_stop;
647 struct name_info *info; 646 struct name_info *info;
647 struct publication *p;
648 struct name_seq *seq;
649 struct sub_seq *sseq;
648 int res = 0; 650 int res = 0;
649 651
650 rcu_read_lock(); 652 rcu_read_lock();
@@ -656,15 +658,12 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
656 sseq = seq->sseqs + nameseq_locate_subseq(seq, lower); 658 sseq = seq->sseqs + nameseq_locate_subseq(seq, lower);
657 sseq_stop = seq->sseqs + seq->first_free; 659 sseq_stop = seq->sseqs + seq->first_free;
658 for (; sseq != sseq_stop; sseq++) { 660 for (; sseq != sseq_stop; sseq++) {
659 struct publication *publ;
660
661 if (sseq->lower > upper) 661 if (sseq->lower > upper)
662 break; 662 break;
663
664 info = sseq->info; 663 info = sseq->info;
665 list_for_each_entry(publ, &info->node_list, node_list) { 664 list_for_each_entry(p, &info->node_list, node_list) {
666 if (publ->scope <= limit) 665 if (p->scope == scope || (!exact && p->scope < scope))
667 tipc_dest_push(dports, 0, publ->ref); 666 tipc_dest_push(dports, 0, p->ref);
668 } 667 }
669 668
670 if (info->cluster_list_size != info->node_list_size) 669 if (info->cluster_list_size != info->node_list_size)
@@ -681,8 +680,7 @@ exit:
681 * - Determines if any node local ports overlap 680 * - Determines if any node local ports overlap
682 */ 681 */
683void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, 682void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
684 u32 upper, u32 domain, 683 u32 upper, struct tipc_nlist *nodes)
685 struct tipc_nlist *nodes)
686{ 684{
687 struct sub_seq *sseq, *stop; 685 struct sub_seq *sseq, *stop;
688 struct publication *publ; 686 struct publication *publ;
@@ -700,8 +698,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
700 for (; sseq != stop && sseq->lower <= upper; sseq++) { 698 for (; sseq != stop && sseq->lower <= upper; sseq++) {
701 info = sseq->info; 699 info = sseq->info;
702 list_for_each_entry(publ, &info->zone_list, zone_list) { 700 list_for_each_entry(publ, &info->zone_list, zone_list) {
703 if (tipc_in_scope(domain, publ->node)) 701 tipc_nlist_add(nodes, publ->node);
704 tipc_nlist_add(nodes, publ->node);
705 } 702 }
706 } 703 }
707 spin_unlock_bh(&seq->lock); 704 spin_unlock_bh(&seq->lock);
@@ -712,7 +709,7 @@ exit:
712/* tipc_nametbl_build_group - build list of communication group members 709/* tipc_nametbl_build_group - build list of communication group members
713 */ 710 */
714void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, 711void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
715 u32 type, u32 domain) 712 u32 type, u32 scope)
716{ 713{
717 struct sub_seq *sseq, *stop; 714 struct sub_seq *sseq, *stop;
718 struct name_info *info; 715 struct name_info *info;
@@ -730,9 +727,9 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
730 for (; sseq != stop; sseq++) { 727 for (; sseq != stop; sseq++) {
731 info = sseq->info; 728 info = sseq->info;
732 list_for_each_entry(p, &info->zone_list, zone_list) { 729 list_for_each_entry(p, &info->zone_list, zone_list) {
733 if (!tipc_in_scope(domain, p->node)) 730 if (p->scope != scope)
734 continue; 731 continue;
735 tipc_group_add_member(grp, p->node, p->ref); 732 tipc_group_add_member(grp, p->node, p->ref, p->lower);
736 } 733 }
737 } 734 }
738 spin_unlock_bh(&seq->lock); 735 spin_unlock_bh(&seq->lock);
@@ -811,7 +808,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
811/** 808/**
812 * tipc_nametbl_subscribe - add a subscription object to the name table 809 * tipc_nametbl_subscribe - add a subscription object to the name table
813 */ 810 */
814void tipc_nametbl_subscribe(struct tipc_subscription *s) 811void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status)
815{ 812{
816 struct tipc_net *tn = net_generic(s->net, tipc_net_id); 813 struct tipc_net *tn = net_generic(s->net, tipc_net_id);
817 u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap); 814 u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
@@ -825,7 +822,7 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s)
825 seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]); 822 seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]);
826 if (seq) { 823 if (seq) {
827 spin_lock_bh(&seq->lock); 824 spin_lock_bh(&seq->lock);
828 tipc_nameseq_subscribe(seq, s); 825 tipc_nameseq_subscribe(seq, s, status);
829 spin_unlock_bh(&seq->lock); 826 spin_unlock_bh(&seq->lock);
830 } else { 827 } else {
831 tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); 828 tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 71926e429446..f56e7cb3d436 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -100,13 +100,12 @@ struct name_table {
100int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); 100int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
101 101
102u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); 102u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
103int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, 103int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
104 u32 limit, struct list_head *dports); 104 u32 scope, bool exact, struct list_head *dports);
105void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, 105void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
106 u32 type, u32 domain); 106 u32 type, u32 domain);
107void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, 107void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
108 u32 upper, u32 domain, 108 u32 upper, struct tipc_nlist *nodes);
109 struct tipc_nlist *nodes);
110bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, 109bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
111 struct list_head *dsts, int *dstcnt, u32 exclude, 110 struct list_head *dsts, int *dstcnt, u32 exclude,
112 bool all); 111 bool all);
@@ -121,7 +120,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
121struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, 120struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
122 u32 lower, u32 node, u32 ref, 121 u32 lower, u32 node, u32 ref,
123 u32 key); 122 u32 key);
124void tipc_nametbl_subscribe(struct tipc_subscription *s); 123void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status);
125void tipc_nametbl_unsubscribe(struct tipc_subscription *s); 124void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
126int tipc_nametbl_init(struct net *net); 125int tipc_nametbl_init(struct net *net);
127void tipc_nametbl_stop(struct net *net); 126void tipc_nametbl_stop(struct net *net);
diff --git a/net/tipc/server.c b/net/tipc/server.c
index d60c30342327..df0c563c90cd 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -132,10 +132,11 @@ static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
132 132
133 spin_lock_bh(&s->idr_lock); 133 spin_lock_bh(&s->idr_lock);
134 con = idr_find(&s->conn_idr, conid); 134 con = idr_find(&s->conn_idr, conid);
135 if (con && test_bit(CF_CONNECTED, &con->flags)) 135 if (con) {
136 conn_get(con); 136 if (!test_bit(CF_CONNECTED, &con->flags) ||
137 else 137 !kref_get_unless_zero(&con->kref))
138 con = NULL; 138 con = NULL;
139 }
139 spin_unlock_bh(&s->idr_lock); 140 spin_unlock_bh(&s->idr_lock);
140 return con; 141 return con;
141} 142}
@@ -183,35 +184,28 @@ static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con)
183 write_unlock_bh(&sk->sk_callback_lock); 184 write_unlock_bh(&sk->sk_callback_lock);
184} 185}
185 186
186static void tipc_unregister_callbacks(struct tipc_conn *con)
187{
188 struct sock *sk = con->sock->sk;
189
190 write_lock_bh(&sk->sk_callback_lock);
191 sk->sk_user_data = NULL;
192 write_unlock_bh(&sk->sk_callback_lock);
193}
194
195static void tipc_close_conn(struct tipc_conn *con) 187static void tipc_close_conn(struct tipc_conn *con)
196{ 188{
197 struct tipc_server *s = con->server; 189 struct tipc_server *s = con->server;
190 struct sock *sk = con->sock->sk;
191 bool disconnect = false;
198 192
199 if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { 193 write_lock_bh(&sk->sk_callback_lock);
200 if (con->sock) 194 disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags);
201 tipc_unregister_callbacks(con); 195 if (disconnect) {
202 196 sk->sk_user_data = NULL;
203 if (con->conid) 197 if (con->conid)
204 s->tipc_conn_release(con->conid, con->usr_data); 198 s->tipc_conn_release(con->conid, con->usr_data);
205
206 /* We shouldn't flush pending works as we may be in the
207 * thread. In fact the races with pending rx/tx work structs
208 * are harmless for us here as we have already deleted this
209 * connection from server connection list.
210 */
211 if (con->sock)
212 kernel_sock_shutdown(con->sock, SHUT_RDWR);
213 conn_put(con);
214 } 199 }
200 write_unlock_bh(&sk->sk_callback_lock);
201
202 /* Handle concurrent calls from sending and receiving threads */
203 if (!disconnect)
204 return;
205
206 /* Don't flush pending works, -just let them expire */
207 kernel_sock_shutdown(con->sock, SHUT_RDWR);
208 conn_put(con);
215} 209}
216 210
217static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s) 211static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
@@ -248,9 +242,10 @@ static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
248 242
249static int tipc_receive_from_sock(struct tipc_conn *con) 243static int tipc_receive_from_sock(struct tipc_conn *con)
250{ 244{
251 struct msghdr msg = {};
252 struct tipc_server *s = con->server; 245 struct tipc_server *s = con->server;
246 struct sock *sk = con->sock->sk;
253 struct sockaddr_tipc addr; 247 struct sockaddr_tipc addr;
248 struct msghdr msg = {};
254 struct kvec iov; 249 struct kvec iov;
255 void *buf; 250 void *buf;
256 int ret; 251 int ret;
@@ -264,19 +259,22 @@ static int tipc_receive_from_sock(struct tipc_conn *con)
264 iov.iov_base = buf; 259 iov.iov_base = buf;
265 iov.iov_len = s->max_rcvbuf_size; 260 iov.iov_len = s->max_rcvbuf_size;
266 msg.msg_name = &addr; 261 msg.msg_name = &addr;
267 ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, 262 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
268 MSG_DONTWAIT); 263 ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
269 if (ret <= 0) { 264 if (ret <= 0) {
270 kmem_cache_free(s->rcvbuf_cache, buf); 265 kmem_cache_free(s->rcvbuf_cache, buf);
271 goto out_close; 266 goto out_close;
272 } 267 }
273 268
274 s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid, &addr, 269 read_lock_bh(&sk->sk_callback_lock);
275 con->usr_data, buf, ret); 270 if (test_bit(CF_CONNECTED, &con->flags))
276 271 ret = s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid,
272 &addr, con->usr_data, buf, ret);
273 read_unlock_bh(&sk->sk_callback_lock);
277 kmem_cache_free(s->rcvbuf_cache, buf); 274 kmem_cache_free(s->rcvbuf_cache, buf);
278 275 if (ret < 0)
279 return 0; 276 tipc_conn_terminate(s, con->conid);
277 return ret;
280 278
281out_close: 279out_close:
282 if (ret != -EWOULDBLOCK) 280 if (ret != -EWOULDBLOCK)
@@ -489,8 +487,8 @@ void tipc_conn_terminate(struct tipc_server *s, int conid)
489 } 487 }
490} 488}
491 489
492bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, 490bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
493 u32 lower, u32 upper, int *conid) 491 u32 upper, u32 filter, int *conid)
494{ 492{
495 struct tipc_subscriber *scbr; 493 struct tipc_subscriber *scbr;
496 struct tipc_subscr sub; 494 struct tipc_subscr sub;
@@ -501,7 +499,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
501 sub.seq.lower = lower; 499 sub.seq.lower = lower;
502 sub.seq.upper = upper; 500 sub.seq.upper = upper;
503 sub.timeout = TIPC_WAIT_FOREVER; 501 sub.timeout = TIPC_WAIT_FOREVER;
504 sub.filter = TIPC_SUB_PORTS; 502 sub.filter = filter;
505 *(u32 *)&sub.usr_handle = port; 503 *(u32 *)&sub.usr_handle = port;
506 504
507 con = tipc_alloc_conn(tipc_topsrv(net)); 505 con = tipc_alloc_conn(tipc_topsrv(net));
@@ -525,11 +523,17 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
525void tipc_topsrv_kern_unsubscr(struct net *net, int conid) 523void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
526{ 524{
527 struct tipc_conn *con; 525 struct tipc_conn *con;
526 struct tipc_server *srv;
528 527
529 con = tipc_conn_lookup(tipc_topsrv(net), conid); 528 con = tipc_conn_lookup(tipc_topsrv(net), conid);
530 if (!con) 529 if (!con)
531 return; 530 return;
532 tipc_close_conn(con); 531
532 test_and_clear_bit(CF_CONNECTED, &con->flags);
533 srv = con->server;
534 if (con->conid)
535 srv->tipc_conn_release(con->conid, con->usr_data);
536 conn_put(con);
533 conn_put(con); 537 conn_put(con);
534} 538}
535 539
diff --git a/net/tipc/server.h b/net/tipc/server.h
index 2113c9192633..64df7513cd70 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -41,6 +41,9 @@
41#include <net/net_namespace.h> 41#include <net/net_namespace.h>
42 42
43#define TIPC_SERVER_NAME_LEN 32 43#define TIPC_SERVER_NAME_LEN 32
44#define TIPC_SUB_CLUSTER_SCOPE 0x20
45#define TIPC_SUB_NODE_SCOPE 0x40
46#define TIPC_SUB_NO_STATUS 0x80
44 47
45/** 48/**
46 * struct tipc_server - TIPC server structure 49 * struct tipc_server - TIPC server structure
@@ -71,9 +74,9 @@ struct tipc_server {
71 int max_rcvbuf_size; 74 int max_rcvbuf_size;
72 void *(*tipc_conn_new)(int conid); 75 void *(*tipc_conn_new)(int conid);
73 void (*tipc_conn_release)(int conid, void *usr_data); 76 void (*tipc_conn_release)(int conid, void *usr_data);
74 void (*tipc_conn_recvmsg)(struct net *net, int conid, 77 int (*tipc_conn_recvmsg)(struct net *net, int conid,
75 struct sockaddr_tipc *addr, void *usr_data, 78 struct sockaddr_tipc *addr, void *usr_data,
76 void *buf, size_t len); 79 void *buf, size_t len);
77 struct sockaddr_tipc *saddr; 80 struct sockaddr_tipc *saddr;
78 char name[TIPC_SERVER_NAME_LEN]; 81 char name[TIPC_SERVER_NAME_LEN];
79 int imp; 82 int imp;
@@ -83,8 +86,8 @@ struct tipc_server {
83int tipc_conn_sendmsg(struct tipc_server *s, int conid, 86int tipc_conn_sendmsg(struct tipc_server *s, int conid,
84 struct sockaddr_tipc *addr, void *data, size_t len); 87 struct sockaddr_tipc *addr, void *data, size_t len);
85 88
86bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, 89bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
87 u32 lower, u32 upper, int *conid); 90 u32 upper, u32 filter, int *conid);
88void tipc_topsrv_kern_unsubscr(struct net *net, int conid); 91void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
89 92
90/** 93/**
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3b4084480377..b0323ec7971e 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -116,6 +116,7 @@ struct tipc_sock {
116 struct tipc_mc_method mc_method; 116 struct tipc_mc_method mc_method;
117 struct rcu_head rcu; 117 struct rcu_head rcu;
118 struct tipc_group *group; 118 struct tipc_group *group;
119 bool group_is_open;
119}; 120};
120 121
121static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); 122static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
@@ -710,43 +711,41 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
710 * imply that the operation will succeed, merely that it should be performed 711 * imply that the operation will succeed, merely that it should be performed
711 * and will not block. 712 * and will not block.
712 */ 713 */
713static unsigned int tipc_poll(struct file *file, struct socket *sock, 714static __poll_t tipc_poll(struct file *file, struct socket *sock,
714 poll_table *wait) 715 poll_table *wait)
715{ 716{
716 struct sock *sk = sock->sk; 717 struct sock *sk = sock->sk;
717 struct tipc_sock *tsk = tipc_sk(sk); 718 struct tipc_sock *tsk = tipc_sk(sk);
718 struct tipc_group *grp = tsk->group; 719 __poll_t revents = 0;
719 u32 revents = 0;
720 720
721 sock_poll_wait(file, sk_sleep(sk), wait); 721 sock_poll_wait(file, sk_sleep(sk), wait);
722 722
723 if (sk->sk_shutdown & RCV_SHUTDOWN) 723 if (sk->sk_shutdown & RCV_SHUTDOWN)
724 revents |= POLLRDHUP | POLLIN | POLLRDNORM; 724 revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
725 if (sk->sk_shutdown == SHUTDOWN_MASK) 725 if (sk->sk_shutdown == SHUTDOWN_MASK)
726 revents |= POLLHUP; 726 revents |= EPOLLHUP;
727 727
728 switch (sk->sk_state) { 728 switch (sk->sk_state) {
729 case TIPC_ESTABLISHED: 729 case TIPC_ESTABLISHED:
730 case TIPC_CONNECTING: 730 case TIPC_CONNECTING:
731 if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) 731 if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
732 revents |= POLLOUT; 732 revents |= EPOLLOUT;
733 /* fall thru' */ 733 /* fall thru' */
734 case TIPC_LISTEN: 734 case TIPC_LISTEN:
735 if (!skb_queue_empty(&sk->sk_receive_queue)) 735 if (!skb_queue_empty(&sk->sk_receive_queue))
736 revents |= POLLIN | POLLRDNORM; 736 revents |= EPOLLIN | EPOLLRDNORM;
737 break; 737 break;
738 case TIPC_OPEN: 738 case TIPC_OPEN:
739 if (!grp || tipc_group_size(grp)) 739 if (tsk->group_is_open && !tsk->cong_link_cnt)
740 if (!tsk->cong_link_cnt) 740 revents |= EPOLLOUT;
741 revents |= POLLOUT;
742 if (!tipc_sk_type_connectionless(sk)) 741 if (!tipc_sk_type_connectionless(sk))
743 break; 742 break;
744 if (skb_queue_empty(&sk->sk_receive_queue)) 743 if (skb_queue_empty(&sk->sk_receive_queue))
745 break; 744 break;
746 revents |= POLLIN | POLLRDNORM; 745 revents |= EPOLLIN | EPOLLRDNORM;
747 break; 746 break;
748 case TIPC_DISCONNECTING: 747 case TIPC_DISCONNECTING:
749 revents = POLLIN | POLLRDNORM | POLLHUP; 748 revents = EPOLLIN | EPOLLRDNORM | EPOLLHUP;
750 break; 749 break;
751 } 750 }
752 return revents; 751 return revents;
@@ -772,7 +771,6 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
772 struct net *net = sock_net(sk); 771 struct net *net = sock_net(sk);
773 int mtu = tipc_bcast_get_mtu(net); 772 int mtu = tipc_bcast_get_mtu(net);
774 struct tipc_mc_method *method = &tsk->mc_method; 773 struct tipc_mc_method *method = &tsk->mc_method;
775 u32 domain = addr_domain(net, TIPC_CLUSTER_SCOPE);
776 struct sk_buff_head pkts; 774 struct sk_buff_head pkts;
777 struct tipc_nlist dsts; 775 struct tipc_nlist dsts;
778 int rc; 776 int rc;
@@ -788,7 +786,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
788 /* Lookup destination nodes */ 786 /* Lookup destination nodes */
789 tipc_nlist_init(&dsts, tipc_own_addr(net)); 787 tipc_nlist_init(&dsts, tipc_own_addr(net));
790 tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower, 788 tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower,
791 seq->upper, domain, &dsts); 789 seq->upper, &dsts);
792 if (!dsts.local && !dsts.remote) 790 if (!dsts.local && !dsts.remote)
793 return -EHOSTUNREACH; 791 return -EHOSTUNREACH;
794 792
@@ -928,21 +926,22 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
928 struct list_head *cong_links = &tsk->cong_links; 926 struct list_head *cong_links = &tsk->cong_links;
929 int blks = tsk_blocks(GROUP_H_SIZE + dlen); 927 int blks = tsk_blocks(GROUP_H_SIZE + dlen);
930 struct tipc_group *grp = tsk->group; 928 struct tipc_group *grp = tsk->group;
929 struct tipc_msg *hdr = &tsk->phdr;
931 struct tipc_member *first = NULL; 930 struct tipc_member *first = NULL;
932 struct tipc_member *mbr = NULL; 931 struct tipc_member *mbr = NULL;
933 struct net *net = sock_net(sk); 932 struct net *net = sock_net(sk);
934 u32 node, port, exclude; 933 u32 node, port, exclude;
935 u32 type, inst, domain;
936 struct list_head dsts; 934 struct list_head dsts;
935 u32 type, inst, scope;
937 int lookups = 0; 936 int lookups = 0;
938 int dstcnt, rc; 937 int dstcnt, rc;
939 bool cong; 938 bool cong;
940 939
941 INIT_LIST_HEAD(&dsts); 940 INIT_LIST_HEAD(&dsts);
942 941
943 type = dest->addr.name.name.type; 942 type = msg_nametype(hdr);
944 inst = dest->addr.name.name.instance; 943 inst = dest->addr.name.name.instance;
945 domain = addr_domain(net, dest->scope); 944 scope = msg_lookup_scope(hdr);
946 exclude = tipc_group_exclude(grp); 945 exclude = tipc_group_exclude(grp);
947 946
948 while (++lookups < 4) { 947 while (++lookups < 4) {
@@ -950,7 +949,7 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
950 949
951 /* Look for a non-congested destination member, if any */ 950 /* Look for a non-congested destination member, if any */
952 while (1) { 951 while (1) {
953 if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts, 952 if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
954 &dstcnt, exclude, false)) 953 &dstcnt, exclude, false))
955 return -EHOSTUNREACH; 954 return -EHOSTUNREACH;
956 tipc_dest_pop(&dsts, &node, &port); 955 tipc_dest_pop(&dsts, &node, &port);
@@ -1079,22 +1078,23 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
1079{ 1078{
1080 struct sock *sk = sock->sk; 1079 struct sock *sk = sock->sk;
1081 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); 1080 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
1082 struct tipc_name_seq *seq = &dest->addr.nameseq;
1083 struct tipc_sock *tsk = tipc_sk(sk); 1081 struct tipc_sock *tsk = tipc_sk(sk);
1084 struct tipc_group *grp = tsk->group; 1082 struct tipc_group *grp = tsk->group;
1083 struct tipc_msg *hdr = &tsk->phdr;
1085 struct net *net = sock_net(sk); 1084 struct net *net = sock_net(sk);
1086 u32 domain, exclude, dstcnt; 1085 u32 type, inst, scope, exclude;
1087 struct list_head dsts; 1086 struct list_head dsts;
1087 u32 dstcnt;
1088 1088
1089 INIT_LIST_HEAD(&dsts); 1089 INIT_LIST_HEAD(&dsts);
1090 1090
1091 if (seq->lower != seq->upper) 1091 type = msg_nametype(hdr);
1092 return -ENOTSUPP; 1092 inst = dest->addr.name.name.instance;
1093 1093 scope = msg_lookup_scope(hdr);
1094 domain = addr_domain(net, dest->scope);
1095 exclude = tipc_group_exclude(grp); 1094 exclude = tipc_group_exclude(grp);
1096 if (!tipc_nametbl_lookup(net, seq->type, seq->lower, domain, 1095
1097 &dsts, &dstcnt, exclude, true)) 1096 if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
1097 &dstcnt, exclude, true))
1098 return -EHOSTUNREACH; 1098 return -EHOSTUNREACH;
1099 1099
1100 if (dstcnt == 1) { 1100 if (dstcnt == 1) {
@@ -1116,24 +1116,29 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
1116void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, 1116void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
1117 struct sk_buff_head *inputq) 1117 struct sk_buff_head *inputq)
1118{ 1118{
1119 u32 scope = TIPC_CLUSTER_SCOPE;
1120 u32 self = tipc_own_addr(net); 1119 u32 self = tipc_own_addr(net);
1120 u32 type, lower, upper, scope;
1121 struct sk_buff *skb, *_skb; 1121 struct sk_buff *skb, *_skb;
1122 u32 lower = 0, upper = ~0;
1123 struct sk_buff_head tmpq;
1124 u32 portid, oport, onode; 1122 u32 portid, oport, onode;
1123 struct sk_buff_head tmpq;
1125 struct list_head dports; 1124 struct list_head dports;
1126 struct tipc_msg *msg; 1125 struct tipc_msg *hdr;
1127 int user, mtyp, hsz; 1126 int user, mtyp, hlen;
1127 bool exact;
1128 1128
1129 __skb_queue_head_init(&tmpq); 1129 __skb_queue_head_init(&tmpq);
1130 INIT_LIST_HEAD(&dports); 1130 INIT_LIST_HEAD(&dports);
1131 1131
1132 skb = tipc_skb_peek(arrvq, &inputq->lock); 1132 skb = tipc_skb_peek(arrvq, &inputq->lock);
1133 for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { 1133 for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
1134 msg = buf_msg(skb); 1134 hdr = buf_msg(skb);
1135 user = msg_user(msg); 1135 user = msg_user(hdr);
1136 mtyp = msg_type(msg); 1136 mtyp = msg_type(hdr);
1137 hlen = skb_headroom(skb) + msg_hdr_sz(hdr);
1138 oport = msg_origport(hdr);
1139 onode = msg_orignode(hdr);
1140 type = msg_nametype(hdr);
1141
1137 if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) { 1142 if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
1138 spin_lock_bh(&inputq->lock); 1143 spin_lock_bh(&inputq->lock);
1139 if (skb_peek(arrvq) == skb) { 1144 if (skb_peek(arrvq) == skb) {
@@ -1144,21 +1149,31 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
1144 spin_unlock_bh(&inputq->lock); 1149 spin_unlock_bh(&inputq->lock);
1145 continue; 1150 continue;
1146 } 1151 }
1147 hsz = skb_headroom(skb) + msg_hdr_sz(msg); 1152
1148 oport = msg_origport(msg); 1153 /* Group messages require exact scope match */
1149 onode = msg_orignode(msg); 1154 if (msg_in_group(hdr)) {
1150 if (onode == self) 1155 lower = 0;
1151 scope = TIPC_NODE_SCOPE; 1156 upper = ~0;
1152 1157 scope = msg_lookup_scope(hdr);
1153 /* Create destination port list and message clones: */ 1158 exact = true;
1154 if (!msg_in_group(msg)) { 1159 } else {
1155 lower = msg_namelower(msg); 1160 /* TIPC_NODE_SCOPE means "any scope" in this context */
1156 upper = msg_nameupper(msg); 1161 if (onode == self)
1162 scope = TIPC_NODE_SCOPE;
1163 else
1164 scope = TIPC_CLUSTER_SCOPE;
1165 exact = false;
1166 lower = msg_namelower(hdr);
1167 upper = msg_nameupper(hdr);
1157 } 1168 }
1158 tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper, 1169
1159 scope, &dports); 1170 /* Create destination port list: */
1171 tipc_nametbl_mc_lookup(net, type, lower, upper,
1172 scope, exact, &dports);
1173
1174 /* Clone message per destination */
1160 while (tipc_dest_pop(&dports, NULL, &portid)) { 1175 while (tipc_dest_pop(&dports, NULL, &portid)) {
1161 _skb = __pskb_copy(skb, hsz, GFP_ATOMIC); 1176 _skb = __pskb_copy(skb, hlen, GFP_ATOMIC);
1162 if (_skb) { 1177 if (_skb) {
1163 msg_set_destport(buf_msg(_skb), portid); 1178 msg_set_destport(buf_msg(_skb), portid);
1164 __skb_queue_tail(&tmpq, _skb); 1179 __skb_queue_tail(&tmpq, _skb);
@@ -1882,8 +1897,8 @@ static void tipc_write_space(struct sock *sk)
1882 rcu_read_lock(); 1897 rcu_read_lock();
1883 wq = rcu_dereference(sk->sk_wq); 1898 wq = rcu_dereference(sk->sk_wq);
1884 if (skwq_has_sleeper(wq)) 1899 if (skwq_has_sleeper(wq))
1885 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | 1900 wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
1886 POLLWRNORM | POLLWRBAND); 1901 EPOLLWRNORM | EPOLLWRBAND);
1887 rcu_read_unlock(); 1902 rcu_read_unlock();
1888} 1903}
1889 1904
@@ -1899,8 +1914,8 @@ static void tipc_data_ready(struct sock *sk)
1899 rcu_read_lock(); 1914 rcu_read_lock();
1900 wq = rcu_dereference(sk->sk_wq); 1915 wq = rcu_dereference(sk->sk_wq);
1901 if (skwq_has_sleeper(wq)) 1916 if (skwq_has_sleeper(wq))
1902 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | 1917 wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN |
1903 POLLRDNORM | POLLRDBAND); 1918 EPOLLRDNORM | EPOLLRDBAND);
1904 rcu_read_unlock(); 1919 rcu_read_unlock();
1905} 1920}
1906 1921
@@ -1933,8 +1948,7 @@ static void tipc_sk_proto_rcv(struct sock *sk,
1933 break; 1948 break;
1934 case TOP_SRV: 1949 case TOP_SRV:
1935 tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf, 1950 tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf,
1936 skb, inputq, xmitq); 1951 hdr, inputq, xmitq);
1937 skb = NULL;
1938 break; 1952 break;
1939 default: 1953 default:
1940 break; 1954 break;
@@ -2640,9 +2654,7 @@ void tipc_sk_reinit(struct net *net)
2640 rhashtable_walk_enter(&tn->sk_rht, &iter); 2654 rhashtable_walk_enter(&tn->sk_rht, &iter);
2641 2655
2642 do { 2656 do {
2643 tsk = ERR_PTR(rhashtable_walk_start(&iter)); 2657 rhashtable_walk_start(&iter);
2644 if (IS_ERR(tsk))
2645 goto walk_stop;
2646 2658
2647 while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { 2659 while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
2648 spin_lock_bh(&tsk->sk.sk_lock.slock); 2660 spin_lock_bh(&tsk->sk.sk_lock.slock);
@@ -2651,7 +2663,7 @@ void tipc_sk_reinit(struct net *net)
2651 msg_set_orignode(msg, tn->own_addr); 2663 msg_set_orignode(msg, tn->own_addr);
2652 spin_unlock_bh(&tsk->sk.sk_lock.slock); 2664 spin_unlock_bh(&tsk->sk.sk_lock.slock);
2653 } 2665 }
2654walk_stop: 2666
2655 rhashtable_walk_stop(&iter); 2667 rhashtable_walk_stop(&iter);
2656 } while (tsk == ERR_PTR(-EAGAIN)); 2668 } while (tsk == ERR_PTR(-EAGAIN));
2657} 2669}
@@ -2734,7 +2746,6 @@ void tipc_sk_rht_destroy(struct net *net)
2734static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) 2746static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
2735{ 2747{
2736 struct net *net = sock_net(&tsk->sk); 2748 struct net *net = sock_net(&tsk->sk);
2737 u32 domain = addr_domain(net, mreq->scope);
2738 struct tipc_group *grp = tsk->group; 2749 struct tipc_group *grp = tsk->group;
2739 struct tipc_msg *hdr = &tsk->phdr; 2750 struct tipc_msg *hdr = &tsk->phdr;
2740 struct tipc_name_seq seq; 2751 struct tipc_name_seq seq;
@@ -2742,9 +2753,11 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
2742 2753
2743 if (mreq->type < TIPC_RESERVED_TYPES) 2754 if (mreq->type < TIPC_RESERVED_TYPES)
2744 return -EACCES; 2755 return -EACCES;
2756 if (mreq->scope > TIPC_NODE_SCOPE)
2757 return -EINVAL;
2745 if (grp) 2758 if (grp)
2746 return -EACCES; 2759 return -EACCES;
2747 grp = tipc_group_create(net, tsk->portid, mreq); 2760 grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open);
2748 if (!grp) 2761 if (!grp)
2749 return -ENOMEM; 2762 return -ENOMEM;
2750 tsk->group = grp; 2763 tsk->group = grp;
@@ -2754,16 +2767,17 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
2754 seq.type = mreq->type; 2767 seq.type = mreq->type;
2755 seq.lower = mreq->instance; 2768 seq.lower = mreq->instance;
2756 seq.upper = seq.lower; 2769 seq.upper = seq.lower;
2757 tipc_nametbl_build_group(net, grp, mreq->type, domain); 2770 tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope);
2758 rc = tipc_sk_publish(tsk, mreq->scope, &seq); 2771 rc = tipc_sk_publish(tsk, mreq->scope, &seq);
2759 if (rc) { 2772 if (rc) {
2760 tipc_group_delete(net, grp); 2773 tipc_group_delete(net, grp);
2761 tsk->group = NULL; 2774 tsk->group = NULL;
2775 return rc;
2762 } 2776 }
2763 2777 /* Eliminate any risk that a broadcast overtakes sent JOINs */
2764 /* Eliminate any risk that a broadcast overtakes the sent JOIN */
2765 tsk->mc_method.rcast = true; 2778 tsk->mc_method.rcast = true;
2766 tsk->mc_method.mandatory = true; 2779 tsk->mc_method.mandatory = true;
2780 tipc_group_join(net, grp, &tsk->sk.sk_rcvbuf);
2767 return rc; 2781 return rc;
2768} 2782}
2769 2783
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 251065dfd8df..68e26470c516 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -118,15 +118,19 @@ void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
118 118
119void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, 119void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
120 u32 found_upper, u32 event, u32 port_ref, 120 u32 found_upper, u32 event, u32 port_ref,
121 u32 node, int must) 121 u32 node, u32 scope, int must)
122{ 122{
123 u32 filter = htohl(sub->evt.s.filter, sub->swap);
123 struct tipc_name_seq seq; 124 struct tipc_name_seq seq;
124 125
125 tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq); 126 tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
126 if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper)) 127 if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
127 return; 128 return;
128 if (!must && 129 if (!must && !(filter & TIPC_SUB_PORTS))
129 !(htohl(sub->evt.s.filter, sub->swap) & TIPC_SUB_PORTS)) 130 return;
131 if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE)
132 return;
133 if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE)
130 return; 134 return;
131 135
132 tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, 136 tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
@@ -285,21 +289,21 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net,
285 return sub; 289 return sub;
286} 290}
287 291
288static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, 292static int tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
289 struct tipc_subscriber *subscriber, int swap) 293 struct tipc_subscriber *subscriber, int swap,
294 bool status)
290{ 295{
291 struct tipc_net *tn = net_generic(net, tipc_net_id);
292 struct tipc_subscription *sub = NULL; 296 struct tipc_subscription *sub = NULL;
293 u32 timeout; 297 u32 timeout;
294 298
295 sub = tipc_subscrp_create(net, s, swap); 299 sub = tipc_subscrp_create(net, s, swap);
296 if (!sub) 300 if (!sub)
297 return tipc_conn_terminate(tn->topsrv, subscriber->conid); 301 return -1;
298 302
299 spin_lock_bh(&subscriber->lock); 303 spin_lock_bh(&subscriber->lock);
300 list_add(&sub->subscrp_list, &subscriber->subscrp_list); 304 list_add(&sub->subscrp_list, &subscriber->subscrp_list);
301 sub->subscriber = subscriber; 305 sub->subscriber = subscriber;
302 tipc_nametbl_subscribe(sub); 306 tipc_nametbl_subscribe(sub, status);
303 tipc_subscrb_get(subscriber); 307 tipc_subscrb_get(subscriber);
304 spin_unlock_bh(&subscriber->lock); 308 spin_unlock_bh(&subscriber->lock);
305 309
@@ -308,6 +312,7 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
308 312
309 if (timeout != TIPC_WAIT_FOREVER) 313 if (timeout != TIPC_WAIT_FOREVER)
310 mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout)); 314 mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
315 return 0;
311} 316}
312 317
313/* Handle one termination request for the subscriber */ 318/* Handle one termination request for the subscriber */
@@ -317,12 +322,13 @@ static void tipc_subscrb_release_cb(int conid, void *usr_data)
317} 322}
318 323
319/* Handle one request to create a new subscription for the subscriber */ 324/* Handle one request to create a new subscription for the subscriber */
320static void tipc_subscrb_rcv_cb(struct net *net, int conid, 325static int tipc_subscrb_rcv_cb(struct net *net, int conid,
321 struct sockaddr_tipc *addr, void *usr_data, 326 struct sockaddr_tipc *addr, void *usr_data,
322 void *buf, size_t len) 327 void *buf, size_t len)
323{ 328{
324 struct tipc_subscriber *subscriber = usr_data; 329 struct tipc_subscriber *subscriber = usr_data;
325 struct tipc_subscr *s = (struct tipc_subscr *)buf; 330 struct tipc_subscr *s = (struct tipc_subscr *)buf;
331 bool status;
326 int swap; 332 int swap;
327 333
328 /* Determine subscriber's endianness */ 334 /* Determine subscriber's endianness */
@@ -332,10 +338,11 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid,
332 /* Detect & process a subscription cancellation request */ 338 /* Detect & process a subscription cancellation request */
333 if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { 339 if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
334 s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); 340 s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
335 return tipc_subscrp_cancel(s, subscriber); 341 tipc_subscrp_cancel(s, subscriber);
342 return 0;
336 } 343 }
337 344 status = !(s->filter & htohl(TIPC_SUB_NO_STATUS, swap));
338 tipc_subscrp_subscribe(net, s, subscriber, swap); 345 return tipc_subscrp_subscribe(net, s, subscriber, swap, status);
339} 346}
340 347
341/* Handle one request to establish a new subscriber */ 348/* Handle one request to establish a new subscriber */
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index ee52957dc952..f3edca775d9f 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -71,7 +71,7 @@ int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
71 u32 found_upper); 71 u32 found_upper);
72void tipc_subscrp_report_overlap(struct tipc_subscription *sub, 72void tipc_subscrp_report_overlap(struct tipc_subscription *sub,
73 u32 found_lower, u32 found_upper, u32 event, 73 u32 found_lower, u32 found_upper, u32 event,
74 u32 port_ref, u32 node, int must); 74 u32 port_ref, u32 node, u32 scope, int must);
75void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap, 75void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
76 struct tipc_name_seq *out); 76 struct tipc_name_seq *out);
77u32 tipc_subscrp_convert_seq_type(u32 type, int swap); 77u32 tipc_subscrp_convert_seq_type(u32 type, int swap);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 736719c8314e..b0d5fcea47e7 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -484,6 +484,8 @@ out:
484 484
485static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { 485static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
486 .name = "tls", 486 .name = "tls",
487 .uid = TCP_ULP_TLS,
488 .user_visible = true,
487 .owner = THIS_MODULE, 489 .owner = THIS_MODULE,
488 .init = tls_init, 490 .init = tls_init,
489}; 491};
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 0a9b72fbd761..f26376e954ae 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -214,7 +214,11 @@ static int tls_do_encryption(struct tls_context *tls_ctx,
214 aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE); 214 aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
215 aead_request_set_crypt(aead_req, ctx->sg_aead_in, ctx->sg_aead_out, 215 aead_request_set_crypt(aead_req, ctx->sg_aead_in, ctx->sg_aead_out,
216 data_len, tls_ctx->iv); 216 data_len, tls_ctx->iv);
217 rc = crypto_aead_encrypt(aead_req); 217
218 aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
219 crypto_req_done, &ctx->async_wait);
220
221 rc = crypto_wait_req(crypto_aead_encrypt(aead_req), &ctx->async_wait);
218 222
219 ctx->sg_encrypted_data[0].offset -= tls_ctx->prepend_size; 223 ctx->sg_encrypted_data[0].offset -= tls_ctx->prepend_size;
220 ctx->sg_encrypted_data[0].length += tls_ctx->prepend_size; 224 ctx->sg_encrypted_data[0].length += tls_ctx->prepend_size;
@@ -665,6 +669,8 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
665 goto out; 669 goto out;
666 } 670 }
667 671
672 crypto_init_wait(&sw_ctx->async_wait);
673
668 ctx->priv_ctx = (struct tls_offload_context *)sw_ctx; 674 ctx->priv_ctx = (struct tls_offload_context *)sw_ctx;
669 675
670 crypto_info = &ctx->crypto_send; 676 crypto_info = &ctx->crypto_send;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index a9ee634f3c42..d545e1d0dea2 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -367,7 +367,7 @@ static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int
367 /* relaying can only happen while the wq still exists */ 367 /* relaying can only happen while the wq still exists */
368 u_sleep = sk_sleep(&u->sk); 368 u_sleep = sk_sleep(&u->sk);
369 if (u_sleep) 369 if (u_sleep)
370 wake_up_interruptible_poll(u_sleep, key); 370 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
371 371
372 return 0; 372 return 0;
373} 373}
@@ -415,9 +415,9 @@ static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
415{ 415{
416 unix_dgram_peer_wake_disconnect(sk, other); 416 unix_dgram_peer_wake_disconnect(sk, other);
417 wake_up_interruptible_poll(sk_sleep(sk), 417 wake_up_interruptible_poll(sk_sleep(sk),
418 POLLOUT | 418 EPOLLOUT |
419 POLLWRNORM | 419 EPOLLWRNORM |
420 POLLWRBAND); 420 EPOLLWRBAND);
421} 421}
422 422
423/* preconditions: 423/* preconditions:
@@ -454,7 +454,7 @@ static void unix_write_space(struct sock *sk)
454 wq = rcu_dereference(sk->sk_wq); 454 wq = rcu_dereference(sk->sk_wq);
455 if (skwq_has_sleeper(wq)) 455 if (skwq_has_sleeper(wq))
456 wake_up_interruptible_sync_poll(&wq->wait, 456 wake_up_interruptible_sync_poll(&wq->wait,
457 POLLOUT | POLLWRNORM | POLLWRBAND); 457 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
458 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 458 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
459 } 459 }
460 rcu_read_unlock(); 460 rcu_read_unlock();
@@ -638,8 +638,8 @@ static int unix_stream_connect(struct socket *, struct sockaddr *,
638static int unix_socketpair(struct socket *, struct socket *); 638static int unix_socketpair(struct socket *, struct socket *);
639static int unix_accept(struct socket *, struct socket *, int, bool); 639static int unix_accept(struct socket *, struct socket *, int, bool);
640static int unix_getname(struct socket *, struct sockaddr *, int *, int); 640static int unix_getname(struct socket *, struct sockaddr *, int *, int);
641static unsigned int unix_poll(struct file *, struct socket *, poll_table *); 641static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
642static unsigned int unix_dgram_poll(struct file *, struct socket *, 642static __poll_t unix_dgram_poll(struct file *, struct socket *,
643 poll_table *); 643 poll_table *);
644static int unix_ioctl(struct socket *, unsigned int, unsigned long); 644static int unix_ioctl(struct socket *, unsigned int, unsigned long);
645static int unix_shutdown(struct socket *, int); 645static int unix_shutdown(struct socket *, int);
@@ -2129,8 +2129,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2129 2129
2130 if (wq_has_sleeper(&u->peer_wait)) 2130 if (wq_has_sleeper(&u->peer_wait))
2131 wake_up_interruptible_sync_poll(&u->peer_wait, 2131 wake_up_interruptible_sync_poll(&u->peer_wait,
2132 POLLOUT | POLLWRNORM | 2132 EPOLLOUT | EPOLLWRNORM |
2133 POLLWRBAND); 2133 EPOLLWRBAND);
2134 2134
2135 if (msg->msg_name) 2135 if (msg->msg_name)
2136 unix_copy_addr(msg, skb->sk); 2136 unix_copy_addr(msg, skb->sk);
@@ -2640,75 +2640,76 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2640 return err; 2640 return err;
2641} 2641}
2642 2642
2643static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait) 2643static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2644{ 2644{
2645 struct sock *sk = sock->sk; 2645 struct sock *sk = sock->sk;
2646 unsigned int mask; 2646 __poll_t mask;
2647 2647
2648 sock_poll_wait(file, sk_sleep(sk), wait); 2648 sock_poll_wait(file, sk_sleep(sk), wait);
2649 mask = 0; 2649 mask = 0;
2650 2650
2651 /* exceptional events? */ 2651 /* exceptional events? */
2652 if (sk->sk_err) 2652 if (sk->sk_err)
2653 mask |= POLLERR; 2653 mask |= EPOLLERR;
2654 if (sk->sk_shutdown == SHUTDOWN_MASK) 2654 if (sk->sk_shutdown == SHUTDOWN_MASK)
2655 mask |= POLLHUP; 2655 mask |= EPOLLHUP;
2656 if (sk->sk_shutdown & RCV_SHUTDOWN) 2656 if (sk->sk_shutdown & RCV_SHUTDOWN)
2657 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 2657 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2658 2658
2659 /* readable? */ 2659 /* readable? */
2660 if (!skb_queue_empty(&sk->sk_receive_queue)) 2660 if (!skb_queue_empty(&sk->sk_receive_queue))
2661 mask |= POLLIN | POLLRDNORM; 2661 mask |= EPOLLIN | EPOLLRDNORM;
2662 2662
2663 /* Connection-based need to check for termination and startup */ 2663 /* Connection-based need to check for termination and startup */
2664 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 2664 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2665 sk->sk_state == TCP_CLOSE) 2665 sk->sk_state == TCP_CLOSE)
2666 mask |= POLLHUP; 2666 mask |= EPOLLHUP;
2667 2667
2668 /* 2668 /*
2669 * we set writable also when the other side has shut down the 2669 * we set writable also when the other side has shut down the
2670 * connection. This prevents stuck sockets. 2670 * connection. This prevents stuck sockets.
2671 */ 2671 */
2672 if (unix_writable(sk)) 2672 if (unix_writable(sk))
2673 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 2673 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2674 2674
2675 return mask; 2675 return mask;
2676} 2676}
2677 2677
2678static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, 2678static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2679 poll_table *wait) 2679 poll_table *wait)
2680{ 2680{
2681 struct sock *sk = sock->sk, *other; 2681 struct sock *sk = sock->sk, *other;
2682 unsigned int mask, writable; 2682 unsigned int writable;
2683 __poll_t mask;
2683 2684
2684 sock_poll_wait(file, sk_sleep(sk), wait); 2685 sock_poll_wait(file, sk_sleep(sk), wait);
2685 mask = 0; 2686 mask = 0;
2686 2687
2687 /* exceptional events? */ 2688 /* exceptional events? */
2688 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 2689 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2689 mask |= POLLERR | 2690 mask |= EPOLLERR |
2690 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); 2691 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2691 2692
2692 if (sk->sk_shutdown & RCV_SHUTDOWN) 2693 if (sk->sk_shutdown & RCV_SHUTDOWN)
2693 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 2694 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2694 if (sk->sk_shutdown == SHUTDOWN_MASK) 2695 if (sk->sk_shutdown == SHUTDOWN_MASK)
2695 mask |= POLLHUP; 2696 mask |= EPOLLHUP;
2696 2697
2697 /* readable? */ 2698 /* readable? */
2698 if (!skb_queue_empty(&sk->sk_receive_queue)) 2699 if (!skb_queue_empty(&sk->sk_receive_queue))
2699 mask |= POLLIN | POLLRDNORM; 2700 mask |= EPOLLIN | EPOLLRDNORM;
2700 2701
2701 /* Connection-based need to check for termination and startup */ 2702 /* Connection-based need to check for termination and startup */
2702 if (sk->sk_type == SOCK_SEQPACKET) { 2703 if (sk->sk_type == SOCK_SEQPACKET) {
2703 if (sk->sk_state == TCP_CLOSE) 2704 if (sk->sk_state == TCP_CLOSE)
2704 mask |= POLLHUP; 2705 mask |= EPOLLHUP;
2705 /* connection hasn't started yet? */ 2706 /* connection hasn't started yet? */
2706 if (sk->sk_state == TCP_SYN_SENT) 2707 if (sk->sk_state == TCP_SYN_SENT)
2707 return mask; 2708 return mask;
2708 } 2709 }
2709 2710
2710 /* No write status requested, avoid expensive OUT tests. */ 2711 /* No write status requested, avoid expensive OUT tests. */
2711 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT))) 2712 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2712 return mask; 2713 return mask;
2713 2714
2714 writable = unix_writable(sk); 2715 writable = unix_writable(sk);
@@ -2725,7 +2726,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2725 } 2726 }
2726 2727
2727 if (writable) 2728 if (writable)
2728 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 2729 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2729 else 2730 else
2730 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 2731 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2731 2732
@@ -2869,7 +2870,6 @@ static int unix_seq_open(struct inode *inode, struct file *file)
2869} 2870}
2870 2871
2871static const struct file_operations unix_seq_fops = { 2872static const struct file_operations unix_seq_fops = {
2872 .owner = THIS_MODULE,
2873 .open = unix_seq_open, 2873 .open = unix_seq_open,
2874 .read = seq_read, 2874 .read = seq_read,
2875 .llseek = seq_lseek, 2875 .llseek = seq_lseek,
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index c9473d698525..e0fc84daed94 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -850,11 +850,11 @@ static int vsock_shutdown(struct socket *sock, int mode)
850 return err; 850 return err;
851} 851}
852 852
853static unsigned int vsock_poll(struct file *file, struct socket *sock, 853static __poll_t vsock_poll(struct file *file, struct socket *sock,
854 poll_table *wait) 854 poll_table *wait)
855{ 855{
856 struct sock *sk; 856 struct sock *sk;
857 unsigned int mask; 857 __poll_t mask;
858 struct vsock_sock *vsk; 858 struct vsock_sock *vsk;
859 859
860 sk = sock->sk; 860 sk = sock->sk;
@@ -865,20 +865,20 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
865 865
866 if (sk->sk_err) 866 if (sk->sk_err)
867 /* Signify that there has been an error on this socket. */ 867 /* Signify that there has been an error on this socket. */
868 mask |= POLLERR; 868 mask |= EPOLLERR;
869 869
870 /* INET sockets treat local write shutdown and peer write shutdown as a 870 /* INET sockets treat local write shutdown and peer write shutdown as a
871 * case of POLLHUP set. 871 * case of EPOLLHUP set.
872 */ 872 */
873 if ((sk->sk_shutdown == SHUTDOWN_MASK) || 873 if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
874 ((sk->sk_shutdown & SEND_SHUTDOWN) && 874 ((sk->sk_shutdown & SEND_SHUTDOWN) &&
875 (vsk->peer_shutdown & SEND_SHUTDOWN))) { 875 (vsk->peer_shutdown & SEND_SHUTDOWN))) {
876 mask |= POLLHUP; 876 mask |= EPOLLHUP;
877 } 877 }
878 878
879 if (sk->sk_shutdown & RCV_SHUTDOWN || 879 if (sk->sk_shutdown & RCV_SHUTDOWN ||
880 vsk->peer_shutdown & SEND_SHUTDOWN) { 880 vsk->peer_shutdown & SEND_SHUTDOWN) {
881 mask |= POLLRDHUP; 881 mask |= EPOLLRDHUP;
882 } 882 }
883 883
884 if (sock->type == SOCK_DGRAM) { 884 if (sock->type == SOCK_DGRAM) {
@@ -888,11 +888,11 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
888 */ 888 */
889 if (!skb_queue_empty(&sk->sk_receive_queue) || 889 if (!skb_queue_empty(&sk->sk_receive_queue) ||
890 (sk->sk_shutdown & RCV_SHUTDOWN)) { 890 (sk->sk_shutdown & RCV_SHUTDOWN)) {
891 mask |= POLLIN | POLLRDNORM; 891 mask |= EPOLLIN | EPOLLRDNORM;
892 } 892 }
893 893
894 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) 894 if (!(sk->sk_shutdown & SEND_SHUTDOWN))
895 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 895 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
896 896
897 } else if (sock->type == SOCK_STREAM) { 897 } else if (sock->type == SOCK_STREAM) {
898 lock_sock(sk); 898 lock_sock(sk);
@@ -902,7 +902,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
902 */ 902 */
903 if (sk->sk_state == TCP_LISTEN 903 if (sk->sk_state == TCP_LISTEN
904 && !vsock_is_accept_queue_empty(sk)) 904 && !vsock_is_accept_queue_empty(sk))
905 mask |= POLLIN | POLLRDNORM; 905 mask |= EPOLLIN | EPOLLRDNORM;
906 906
907 /* If there is something in the queue then we can read. */ 907 /* If there is something in the queue then we can read. */
908 if (transport->stream_is_active(vsk) && 908 if (transport->stream_is_active(vsk) &&
@@ -911,10 +911,10 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
911 int ret = transport->notify_poll_in( 911 int ret = transport->notify_poll_in(
912 vsk, 1, &data_ready_now); 912 vsk, 1, &data_ready_now);
913 if (ret < 0) { 913 if (ret < 0) {
914 mask |= POLLERR; 914 mask |= EPOLLERR;
915 } else { 915 } else {
916 if (data_ready_now) 916 if (data_ready_now)
917 mask |= POLLIN | POLLRDNORM; 917 mask |= EPOLLIN | EPOLLRDNORM;
918 918
919 } 919 }
920 } 920 }
@@ -925,7 +925,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
925 */ 925 */
926 if (sk->sk_shutdown & RCV_SHUTDOWN || 926 if (sk->sk_shutdown & RCV_SHUTDOWN ||
927 vsk->peer_shutdown & SEND_SHUTDOWN) { 927 vsk->peer_shutdown & SEND_SHUTDOWN) {
928 mask |= POLLIN | POLLRDNORM; 928 mask |= EPOLLIN | EPOLLRDNORM;
929 } 929 }
930 930
931 /* Connected sockets that can produce data can be written. */ 931 /* Connected sockets that can produce data can be written. */
@@ -935,25 +935,25 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
935 int ret = transport->notify_poll_out( 935 int ret = transport->notify_poll_out(
936 vsk, 1, &space_avail_now); 936 vsk, 1, &space_avail_now);
937 if (ret < 0) { 937 if (ret < 0) {
938 mask |= POLLERR; 938 mask |= EPOLLERR;
939 } else { 939 } else {
940 if (space_avail_now) 940 if (space_avail_now)
941 /* Remove POLLWRBAND since INET 941 /* Remove EPOLLWRBAND since INET
942 * sockets are not setting it. 942 * sockets are not setting it.
943 */ 943 */
944 mask |= POLLOUT | POLLWRNORM; 944 mask |= EPOLLOUT | EPOLLWRNORM;
945 945
946 } 946 }
947 } 947 }
948 } 948 }
949 949
950 /* Simulate INET socket poll behaviors, which sets 950 /* Simulate INET socket poll behaviors, which sets
951 * POLLOUT|POLLWRNORM when peer is closed and nothing to read, 951 * EPOLLOUT|EPOLLWRNORM when peer is closed and nothing to read,
952 * but local send is not shutdown. 952 * but local send is not shutdown.
953 */ 953 */
954 if (sk->sk_state == TCP_CLOSE || sk->sk_state == TCP_CLOSING) { 954 if (sk->sk_state == TCP_CLOSE || sk->sk_state == TCP_CLOSING) {
955 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) 955 if (!(sk->sk_shutdown & SEND_SHUTDOWN))
956 mask |= POLLOUT | POLLWRNORM; 956 mask |= EPOLLOUT | EPOLLWRNORM;
957 957
958 } 958 }
959 959
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 413d4f4e6334..a1d10993d08a 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -126,6 +126,11 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
126 wdev->ibss_fixed = params->channel_fixed; 126 wdev->ibss_fixed = params->channel_fixed;
127 wdev->ibss_dfs_possible = params->userspace_handles_dfs; 127 wdev->ibss_dfs_possible = params->userspace_handles_dfs;
128 wdev->chandef = params->chandef; 128 wdev->chandef = params->chandef;
129 if (connkeys) {
130 params->wep_keys = connkeys->params;
131 params->wep_tx_key = connkeys->def;
132 }
133
129#ifdef CONFIG_CFG80211_WEXT 134#ifdef CONFIG_CFG80211_WEXT
130 wdev->wext.ibss.chandef = params->chandef; 135 wdev->wext.ibss.chandef = params->chandef;
131#endif 136#endif
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index e7c64a8dce54..bbb9907bfa86 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -692,7 +692,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
692 return rdev_mgmt_tx(rdev, wdev, params, cookie); 692 return rdev_mgmt_tx(rdev, wdev, params, cookie);
693} 693}
694 694
695bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, 695bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_dbm,
696 const u8 *buf, size_t len, u32 flags) 696 const u8 *buf, size_t len, u32 flags)
697{ 697{
698 struct wiphy *wiphy = wdev->wiphy; 698 struct wiphy *wiphy = wdev->wiphy;
@@ -708,7 +708,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
708 cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE); 708 cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE);
709 u16 stype; 709 u16 stype;
710 710
711 trace_cfg80211_rx_mgmt(wdev, freq, sig_mbm); 711 trace_cfg80211_rx_mgmt(wdev, freq, sig_dbm);
712 stype = (le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE) >> 4; 712 stype = (le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE) >> 4;
713 713
714 if (!(stypes->rx & BIT(stype))) { 714 if (!(stypes->rx & BIT(stype))) {
@@ -735,7 +735,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
735 735
736 /* Indicate the received Action frame to user space */ 736 /* Indicate the received Action frame to user space */
737 if (nl80211_send_mgmt(rdev, wdev, reg->nlportid, 737 if (nl80211_send_mgmt(rdev, wdev, reg->nlportid,
738 freq, sig_mbm, 738 freq, sig_dbm,
739 buf, len, flags, GFP_ATOMIC)) 739 buf, len, flags, GFP_ATOMIC))
740 continue; 740 continue;
741 741
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 542a4fc0a8d7..9c0dcc8324b0 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -16,6 +16,7 @@
16#include <linux/nl80211.h> 16#include <linux/nl80211.h>
17#include <linux/rtnetlink.h> 17#include <linux/rtnetlink.h>
18#include <linux/netlink.h> 18#include <linux/netlink.h>
19#include <linux/nospec.h>
19#include <linux/etherdevice.h> 20#include <linux/etherdevice.h>
20#include <net/net_namespace.h> 21#include <net/net_namespace.h>
21#include <net/genetlink.h> 22#include <net/genetlink.h>
@@ -734,11 +735,12 @@ struct key_parse {
734 bool def_uni, def_multi; 735 bool def_uni, def_multi;
735}; 736};
736 737
737static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k) 738static int nl80211_parse_key_new(struct genl_info *info, struct nlattr *key,
739 struct key_parse *k)
738{ 740{
739 struct nlattr *tb[NL80211_KEY_MAX + 1]; 741 struct nlattr *tb[NL80211_KEY_MAX + 1];
740 int err = nla_parse_nested(tb, NL80211_KEY_MAX, key, 742 int err = nla_parse_nested(tb, NL80211_KEY_MAX, key,
741 nl80211_key_policy, NULL); 743 nl80211_key_policy, info->extack);
742 if (err) 744 if (err)
743 return err; 745 return err;
744 746
@@ -771,7 +773,8 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
771 if (tb[NL80211_KEY_TYPE]) { 773 if (tb[NL80211_KEY_TYPE]) {
772 k->type = nla_get_u32(tb[NL80211_KEY_TYPE]); 774 k->type = nla_get_u32(tb[NL80211_KEY_TYPE]);
773 if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES) 775 if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES)
774 return -EINVAL; 776 return genl_err_attr(info, -EINVAL,
777 tb[NL80211_KEY_TYPE]);
775 } 778 }
776 779
777 if (tb[NL80211_KEY_DEFAULT_TYPES]) { 780 if (tb[NL80211_KEY_DEFAULT_TYPES]) {
@@ -779,7 +782,8 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
779 782
780 err = nla_parse_nested(kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1, 783 err = nla_parse_nested(kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1,
781 tb[NL80211_KEY_DEFAULT_TYPES], 784 tb[NL80211_KEY_DEFAULT_TYPES],
782 nl80211_key_default_policy, NULL); 785 nl80211_key_default_policy,
786 info->extack);
783 if (err) 787 if (err)
784 return err; 788 return err;
785 789
@@ -820,8 +824,10 @@ static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k)
820 824
821 if (info->attrs[NL80211_ATTR_KEY_TYPE]) { 825 if (info->attrs[NL80211_ATTR_KEY_TYPE]) {
822 k->type = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]); 826 k->type = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]);
823 if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES) 827 if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES) {
828 GENL_SET_ERR_MSG(info, "key type out of range");
824 return -EINVAL; 829 return -EINVAL;
830 }
825 } 831 }
826 832
827 if (info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES]) { 833 if (info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES]) {
@@ -850,31 +856,42 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k)
850 k->type = -1; 856 k->type = -1;
851 857
852 if (info->attrs[NL80211_ATTR_KEY]) 858 if (info->attrs[NL80211_ATTR_KEY])
853 err = nl80211_parse_key_new(info->attrs[NL80211_ATTR_KEY], k); 859 err = nl80211_parse_key_new(info, info->attrs[NL80211_ATTR_KEY], k);
854 else 860 else
855 err = nl80211_parse_key_old(info, k); 861 err = nl80211_parse_key_old(info, k);
856 862
857 if (err) 863 if (err)
858 return err; 864 return err;
859 865
860 if (k->def && k->defmgmt) 866 if (k->def && k->defmgmt) {
867 GENL_SET_ERR_MSG(info, "key with def && defmgmt is invalid");
861 return -EINVAL; 868 return -EINVAL;
869 }
862 870
863 if (k->defmgmt) { 871 if (k->defmgmt) {
864 if (k->def_uni || !k->def_multi) 872 if (k->def_uni || !k->def_multi) {
873 GENL_SET_ERR_MSG(info, "defmgmt key must be mcast");
865 return -EINVAL; 874 return -EINVAL;
875 }
866 } 876 }
867 877
868 if (k->idx != -1) { 878 if (k->idx != -1) {
869 if (k->defmgmt) { 879 if (k->defmgmt) {
870 if (k->idx < 4 || k->idx > 5) 880 if (k->idx < 4 || k->idx > 5) {
881 GENL_SET_ERR_MSG(info,
882 "defmgmt key idx not 4 or 5");
871 return -EINVAL; 883 return -EINVAL;
884 }
872 } else if (k->def) { 885 } else if (k->def) {
873 if (k->idx < 0 || k->idx > 3) 886 if (k->idx < 0 || k->idx > 3) {
887 GENL_SET_ERR_MSG(info, "def key idx not 0-3");
874 return -EINVAL; 888 return -EINVAL;
889 }
875 } else { 890 } else {
876 if (k->idx < 0 || k->idx > 5) 891 if (k->idx < 0 || k->idx > 5) {
892 GENL_SET_ERR_MSG(info, "key idx not 0-5");
877 return -EINVAL; 893 return -EINVAL;
894 }
878 } 895 }
879 } 896 }
880 897
@@ -883,8 +900,9 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k)
883 900
884static struct cfg80211_cached_keys * 901static struct cfg80211_cached_keys *
885nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, 902nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
886 struct nlattr *keys, bool *no_ht) 903 struct genl_info *info, bool *no_ht)
887{ 904{
905 struct nlattr *keys = info->attrs[NL80211_ATTR_KEYS];
888 struct key_parse parse; 906 struct key_parse parse;
889 struct nlattr *key; 907 struct nlattr *key;
890 struct cfg80211_cached_keys *result; 908 struct cfg80211_cached_keys *result;
@@ -909,17 +927,22 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
909 memset(&parse, 0, sizeof(parse)); 927 memset(&parse, 0, sizeof(parse));
910 parse.idx = -1; 928 parse.idx = -1;
911 929
912 err = nl80211_parse_key_new(key, &parse); 930 err = nl80211_parse_key_new(info, key, &parse);
913 if (err) 931 if (err)
914 goto error; 932 goto error;
915 err = -EINVAL; 933 err = -EINVAL;
916 if (!parse.p.key) 934 if (!parse.p.key)
917 goto error; 935 goto error;
918 if (parse.idx < 0 || parse.idx > 3) 936 if (parse.idx < 0 || parse.idx > 3) {
937 GENL_SET_ERR_MSG(info, "key index out of range [0-3]");
919 goto error; 938 goto error;
939 }
920 if (parse.def) { 940 if (parse.def) {
921 if (def) 941 if (def) {
942 GENL_SET_ERR_MSG(info,
943 "only one key can be default");
922 goto error; 944 goto error;
945 }
923 def = 1; 946 def = 1;
924 result->def = parse.idx; 947 result->def = parse.idx;
925 if (!parse.def_uni || !parse.def_multi) 948 if (!parse.def_uni || !parse.def_multi)
@@ -932,6 +955,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
932 goto error; 955 goto error;
933 if (parse.p.cipher != WLAN_CIPHER_SUITE_WEP40 && 956 if (parse.p.cipher != WLAN_CIPHER_SUITE_WEP40 &&
934 parse.p.cipher != WLAN_CIPHER_SUITE_WEP104) { 957 parse.p.cipher != WLAN_CIPHER_SUITE_WEP104) {
958 GENL_SET_ERR_MSG(info, "connect key must be WEP");
935 err = -EINVAL; 959 err = -EINVAL;
936 goto error; 960 goto error;
937 } 961 }
@@ -947,6 +971,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
947 971
948 if (result->def < 0) { 972 if (result->def < 0) {
949 err = -EINVAL; 973 err = -EINVAL;
974 GENL_SET_ERR_MSG(info, "need a default/TX key");
950 goto error; 975 goto error;
951 } 976 }
952 977
@@ -2056,20 +2081,22 @@ static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = {
2056static int parse_txq_params(struct nlattr *tb[], 2081static int parse_txq_params(struct nlattr *tb[],
2057 struct ieee80211_txq_params *txq_params) 2082 struct ieee80211_txq_params *txq_params)
2058{ 2083{
2084 u8 ac;
2085
2059 if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] || 2086 if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] ||
2060 !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] || 2087 !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] ||
2061 !tb[NL80211_TXQ_ATTR_AIFS]) 2088 !tb[NL80211_TXQ_ATTR_AIFS])
2062 return -EINVAL; 2089 return -EINVAL;
2063 2090
2064 txq_params->ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); 2091 ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]);
2065 txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]); 2092 txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]);
2066 txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]); 2093 txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]);
2067 txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]); 2094 txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]);
2068 txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]); 2095 txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]);
2069 2096
2070 if (txq_params->ac >= NL80211_NUM_ACS) 2097 if (ac >= NL80211_NUM_ACS)
2071 return -EINVAL; 2098 return -EINVAL;
2072 2099 txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS);
2073 return 0; 2100 return 0;
2074} 2101}
2075 2102
@@ -7820,6 +7847,11 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
7820 intbss->ts_boottime, NL80211_BSS_PAD)) 7847 intbss->ts_boottime, NL80211_BSS_PAD))
7821 goto nla_put_failure; 7848 goto nla_put_failure;
7822 7849
7850 if (!nl80211_put_signal(msg, intbss->pub.chains,
7851 intbss->pub.chain_signal,
7852 NL80211_BSS_CHAIN_SIGNAL))
7853 goto nla_put_failure;
7854
7823 switch (rdev->wiphy.signal_type) { 7855 switch (rdev->wiphy.signal_type) {
7824 case CFG80211_SIGNAL_TYPE_MBM: 7856 case CFG80211_SIGNAL_TYPE_MBM:
7825 if (nla_put_u32(msg, NL80211_BSS_SIGNAL_MBM, res->signal)) 7857 if (nla_put_u32(msg, NL80211_BSS_SIGNAL_MBM, res->signal))
@@ -8616,9 +8648,7 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
8616 if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) { 8648 if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) {
8617 bool no_ht = false; 8649 bool no_ht = false;
8618 8650
8619 connkeys = nl80211_parse_connkeys(rdev, 8651 connkeys = nl80211_parse_connkeys(rdev, info, &no_ht);
8620 info->attrs[NL80211_ATTR_KEYS],
8621 &no_ht);
8622 if (IS_ERR(connkeys)) 8652 if (IS_ERR(connkeys))
8623 return PTR_ERR(connkeys); 8653 return PTR_ERR(connkeys);
8624 8654
@@ -9022,8 +9052,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
9022 } 9052 }
9023 9053
9024 if (connect.privacy && info->attrs[NL80211_ATTR_KEYS]) { 9054 if (connect.privacy && info->attrs[NL80211_ATTR_KEYS]) {
9025 connkeys = nl80211_parse_connkeys(rdev, 9055 connkeys = nl80211_parse_connkeys(rdev, info, NULL);
9026 info->attrs[NL80211_ATTR_KEYS], NULL);
9027 if (IS_ERR(connkeys)) 9056 if (IS_ERR(connkeys))
9028 return PTR_ERR(connkeys); 9057 return PTR_ERR(connkeys);
9029 } 9058 }
@@ -13948,7 +13977,7 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
13948 13977
13949 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || 13978 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
13950 nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || 13979 nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
13951 (from_ap && reason && 13980 (reason &&
13952 nla_put_u16(msg, NL80211_ATTR_REASON_CODE, reason)) || 13981 nla_put_u16(msg, NL80211_ATTR_REASON_CODE, reason)) ||
13953 (from_ap && 13982 (from_ap &&
13954 nla_put_flag(msg, NL80211_ATTR_DISCONNECTED_BY_AP)) || 13983 nla_put_flag(msg, NL80211_ATTR_DISCONNECTED_BY_AP)) ||
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index f6c5fe482506..d36c3eb7b931 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -981,6 +981,9 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
981 found->ts = tmp->ts; 981 found->ts = tmp->ts;
982 found->ts_boottime = tmp->ts_boottime; 982 found->ts_boottime = tmp->ts_boottime;
983 found->parent_tsf = tmp->parent_tsf; 983 found->parent_tsf = tmp->parent_tsf;
984 found->pub.chains = tmp->pub.chains;
985 memcpy(found->pub.chain_signal, tmp->pub.chain_signal,
986 IEEE80211_MAX_CHAINS);
984 ether_addr_copy(found->parent_bssid, tmp->parent_bssid); 987 ether_addr_copy(found->parent_bssid, tmp->parent_bssid);
985 } else { 988 } else {
986 struct cfg80211_internal_bss *new; 989 struct cfg80211_internal_bss *new;
@@ -1233,6 +1236,8 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
1233 tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info); 1236 tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
1234 tmp.ts_boottime = data->boottime_ns; 1237 tmp.ts_boottime = data->boottime_ns;
1235 tmp.parent_tsf = data->parent_tsf; 1238 tmp.parent_tsf = data->parent_tsf;
1239 tmp.pub.chains = data->chains;
1240 memcpy(tmp.pub.chain_signal, data->chain_signal, IEEE80211_MAX_CHAINS);
1236 ether_addr_copy(tmp.parent_bssid, data->parent_bssid); 1241 ether_addr_copy(tmp.parent_bssid, data->parent_bssid);
1237 1242
1238 signal_valid = abs(data->chan->center_freq - channel->center_freq) <= 1243 signal_valid = abs(data->chan->center_freq - channel->center_freq) <=
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index f3353fe5b35b..bcfedd39e7a3 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2544,20 +2544,20 @@ DEFINE_EVENT(cfg80211_netdev_mac_evt, cfg80211_del_sta,
2544); 2544);
2545 2545
2546TRACE_EVENT(cfg80211_rx_mgmt, 2546TRACE_EVENT(cfg80211_rx_mgmt,
2547 TP_PROTO(struct wireless_dev *wdev, int freq, int sig_mbm), 2547 TP_PROTO(struct wireless_dev *wdev, int freq, int sig_dbm),
2548 TP_ARGS(wdev, freq, sig_mbm), 2548 TP_ARGS(wdev, freq, sig_dbm),
2549 TP_STRUCT__entry( 2549 TP_STRUCT__entry(
2550 WDEV_ENTRY 2550 WDEV_ENTRY
2551 __field(int, freq) 2551 __field(int, freq)
2552 __field(int, sig_mbm) 2552 __field(int, sig_dbm)
2553 ), 2553 ),
2554 TP_fast_assign( 2554 TP_fast_assign(
2555 WDEV_ASSIGN; 2555 WDEV_ASSIGN;
2556 __entry->freq = freq; 2556 __entry->freq = freq;
2557 __entry->sig_mbm = sig_mbm; 2557 __entry->sig_dbm = sig_dbm;
2558 ), 2558 ),
2559 TP_printk(WDEV_PR_FMT ", freq: %d, sig mbm: %d", 2559 TP_printk(WDEV_PR_FMT ", freq: %d, sig dbm: %d",
2560 WDEV_PR_ARG, __entry->freq, __entry->sig_mbm) 2560 WDEV_PR_ARG, __entry->freq, __entry->sig_dbm)
2561); 2561);
2562 2562
2563TRACE_EVENT(cfg80211_mgmt_tx_status, 2563TRACE_EVENT(cfg80211_mgmt_tx_status,
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 6cdb054484d6..9efbfc753347 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -1035,18 +1035,23 @@ static int ioctl_standard_call(struct net_device * dev,
1035} 1035}
1036 1036
1037 1037
1038int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd, 1038int wext_handle_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1039 void __user *arg)
1040{ 1039{
1041 struct iw_request_info info = { .cmd = cmd, .flags = 0 }; 1040 struct iw_request_info info = { .cmd = cmd, .flags = 0 };
1041 struct iwreq iwr;
1042 int ret; 1042 int ret;
1043 1043
1044 ret = wext_ioctl_dispatch(net, iwr, cmd, &info, 1044 if (copy_from_user(&iwr, arg, sizeof(iwr)))
1045 return -EFAULT;
1046
1047 iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0;
1048
1049 ret = wext_ioctl_dispatch(net, &iwr, cmd, &info,
1045 ioctl_standard_call, 1050 ioctl_standard_call,
1046 ioctl_private_call); 1051 ioctl_private_call);
1047 if (ret >= 0 && 1052 if (ret >= 0 &&
1048 IW_IS_GET(cmd) && 1053 IW_IS_GET(cmd) &&
1049 copy_to_user(arg, iwr, sizeof(struct iwreq))) 1054 copy_to_user(arg, &iwr, sizeof(struct iwreq)))
1050 return -EFAULT; 1055 return -EFAULT;
1051 1056
1052 return ret; 1057 return ret;
diff --git a/net/wireless/wext-proc.c b/net/wireless/wext-proc.c
index e98a01c1034f..5511f989ef47 100644
--- a/net/wireless/wext-proc.c
+++ b/net/wireless/wext-proc.c
@@ -133,7 +133,6 @@ static int seq_open_wireless(struct inode *inode, struct file *file)
133} 133}
134 134
135static const struct file_operations wireless_seq_fops = { 135static const struct file_operations wireless_seq_fops = {
136 .owner = THIS_MODULE,
137 .open = seq_open_wireless, 136 .open = seq_open_wireless,
138 .read = seq_read, 137 .read = seq_read,
139 .llseek = seq_lseek, 138 .llseek = seq_lseek,
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index ac9477189d1c..8e70291e586a 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -23,32 +23,114 @@
23#include <linux/notifier.h> 23#include <linux/notifier.h>
24 24
25#ifdef CONFIG_XFRM_OFFLOAD 25#ifdef CONFIG_XFRM_OFFLOAD
26int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features) 26struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again)
27{ 27{
28 int err; 28 int err;
29 unsigned long flags;
29 struct xfrm_state *x; 30 struct xfrm_state *x;
31 struct sk_buff *skb2;
32 struct softnet_data *sd;
33 netdev_features_t esp_features = features;
30 struct xfrm_offload *xo = xfrm_offload(skb); 34 struct xfrm_offload *xo = xfrm_offload(skb);
31 35
32 if (skb_is_gso(skb)) 36 if (!xo)
33 return 0; 37 return skb;
34 38
35 if (xo) { 39 if (!(features & NETIF_F_HW_ESP))
36 x = skb->sp->xvec[skb->sp->len - 1]; 40 esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
37 if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND) 41
38 return 0; 42 x = skb->sp->xvec[skb->sp->len - 1];
43 if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND)
44 return skb;
45
46 local_irq_save(flags);
47 sd = this_cpu_ptr(&softnet_data);
48 err = !skb_queue_empty(&sd->xfrm_backlog);
49 local_irq_restore(flags);
50
51 if (err) {
52 *again = true;
53 return skb;
54 }
55
56 if (skb_is_gso(skb)) {
57 struct net_device *dev = skb->dev;
58
59 if (unlikely(!x->xso.offload_handle || (x->xso.dev != dev))) {
60 struct sk_buff *segs;
61
62 /* Packet got rerouted, fixup features and segment it. */
63 esp_features = esp_features & ~(NETIF_F_HW_ESP
64 | NETIF_F_GSO_ESP);
65
66 segs = skb_gso_segment(skb, esp_features);
67 if (IS_ERR(segs)) {
68 kfree_skb(skb);
69 atomic_long_inc(&dev->tx_dropped);
70 return NULL;
71 } else {
72 consume_skb(skb);
73 skb = segs;
74 }
75 }
76 }
39 77
78 if (!skb->next) {
40 x->outer_mode->xmit(x, skb); 79 x->outer_mode->xmit(x, skb);
41 80
42 err = x->type_offload->xmit(x, skb, features); 81 xo->flags |= XFRM_DEV_RESUME;
82
83 err = x->type_offload->xmit(x, skb, esp_features);
43 if (err) { 84 if (err) {
85 if (err == -EINPROGRESS)
86 return NULL;
87
44 XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); 88 XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
45 return err; 89 kfree_skb(skb);
90 return NULL;
46 } 91 }
47 92
48 skb_push(skb, skb->data - skb_mac_header(skb)); 93 skb_push(skb, skb->data - skb_mac_header(skb));
94
95 return skb;
49 } 96 }
50 97
51 return 0; 98 skb2 = skb;
99
100 do {
101 struct sk_buff *nskb = skb2->next;
102 skb2->next = NULL;
103
104 xo = xfrm_offload(skb2);
105 xo->flags |= XFRM_DEV_RESUME;
106
107 x->outer_mode->xmit(x, skb2);
108
109 err = x->type_offload->xmit(x, skb2, esp_features);
110 if (!err) {
111 skb2->next = nskb;
112 } else if (err != -EINPROGRESS) {
113 XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
114 skb2->next = nskb;
115 kfree_skb_list(skb2);
116 return NULL;
117 } else {
118 if (skb == skb2)
119 skb = nskb;
120
121 if (!skb)
122 return NULL;
123
124 goto skip_push;
125 }
126
127 skb_push(skb2, skb2->data - skb_mac_header(skb2));
128
129skip_push:
130 skb2 = nskb;
131 } while (skb2);
132
133 return skb;
52} 134}
53EXPORT_SYMBOL_GPL(validate_xmit_xfrm); 135EXPORT_SYMBOL_GPL(validate_xmit_xfrm);
54 136
@@ -65,9 +147,9 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
65 if (!x->type_offload) 147 if (!x->type_offload)
66 return -EINVAL; 148 return -EINVAL;
67 149
68 /* We don't yet support UDP encapsulation, TFC padding and ESN. */ 150 /* We don't yet support UDP encapsulation and TFC padding. */
69 if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN)) 151 if (x->encap || x->tfcpad)
70 return 0; 152 return -EINVAL;
71 153
72 dev = dev_get_by_index(net, xuo->ifindex); 154 dev = dev_get_by_index(net, xuo->ifindex);
73 if (!dev) { 155 if (!dev) {
@@ -96,6 +178,13 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
96 return 0; 178 return 0;
97 } 179 }
98 180
181 if (x->props.flags & XFRM_STATE_ESN &&
182 !dev->xfrmdev_ops->xdo_dev_state_advance_esn) {
183 xso->dev = NULL;
184 dev_put(dev);
185 return -EINVAL;
186 }
187
99 xso->dev = dev; 188 xso->dev = dev;
100 xso->num_exthdrs = 1; 189 xso->num_exthdrs = 1;
101 xso->flags = xuo->flags; 190 xso->flags = xuo->flags;
@@ -121,8 +210,8 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
121 if (!x->type_offload || x->encap) 210 if (!x->type_offload || x->encap)
122 return false; 211 return false;
123 212
124 if ((x->xso.offload_handle && (dev == dst->path->dev)) && 213 if ((!dev || (x->xso.offload_handle && (dev == xfrm_dst_path(dst)->dev))) &&
125 !dst->child->xfrm && x->type->get_mtu) { 214 (!xdst->child->xfrm && x->type->get_mtu)) {
126 mtu = x->type->get_mtu(x, xdst->child_mtu_cached); 215 mtu = x->type->get_mtu(x, xdst->child_mtu_cached);
127 216
128 if (skb->len <= mtu) 217 if (skb->len <= mtu)
@@ -141,19 +230,82 @@ ok:
141 return true; 230 return true;
142} 231}
143EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok); 232EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok);
233
234void xfrm_dev_resume(struct sk_buff *skb)
235{
236 struct net_device *dev = skb->dev;
237 int ret = NETDEV_TX_BUSY;
238 struct netdev_queue *txq;
239 struct softnet_data *sd;
240 unsigned long flags;
241
242 rcu_read_lock();
243 txq = netdev_pick_tx(dev, skb, NULL);
244
245 HARD_TX_LOCK(dev, txq, smp_processor_id());
246 if (!netif_xmit_frozen_or_stopped(txq))
247 skb = dev_hard_start_xmit(skb, dev, txq, &ret);
248 HARD_TX_UNLOCK(dev, txq);
249
250 if (!dev_xmit_complete(ret)) {
251 local_irq_save(flags);
252 sd = this_cpu_ptr(&softnet_data);
253 skb_queue_tail(&sd->xfrm_backlog, skb);
254 raise_softirq_irqoff(NET_TX_SOFTIRQ);
255 local_irq_restore(flags);
256 }
257 rcu_read_unlock();
258}
259EXPORT_SYMBOL_GPL(xfrm_dev_resume);
260
261void xfrm_dev_backlog(struct softnet_data *sd)
262{
263 struct sk_buff_head *xfrm_backlog = &sd->xfrm_backlog;
264 struct sk_buff_head list;
265 struct sk_buff *skb;
266
267 if (skb_queue_empty(xfrm_backlog))
268 return;
269
270 __skb_queue_head_init(&list);
271
272 spin_lock(&xfrm_backlog->lock);
273 skb_queue_splice_init(xfrm_backlog, &list);
274 spin_unlock(&xfrm_backlog->lock);
275
276 while (!skb_queue_empty(&list)) {
277 skb = __skb_dequeue(&list);
278 xfrm_dev_resume(skb);
279 }
280
281}
144#endif 282#endif
145 283
146static int xfrm_dev_register(struct net_device *dev) 284static int xfrm_api_check(struct net_device *dev)
147{ 285{
148 if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops) 286#ifdef CONFIG_XFRM_OFFLOAD
149 return NOTIFY_BAD;
150 if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) && 287 if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) &&
151 !(dev->features & NETIF_F_HW_ESP)) 288 !(dev->features & NETIF_F_HW_ESP))
152 return NOTIFY_BAD; 289 return NOTIFY_BAD;
153 290
291 if ((dev->features & NETIF_F_HW_ESP) &&
292 (!(dev->xfrmdev_ops &&
293 dev->xfrmdev_ops->xdo_dev_state_add &&
294 dev->xfrmdev_ops->xdo_dev_state_delete)))
295 return NOTIFY_BAD;
296#else
297 if (dev->features & (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM))
298 return NOTIFY_BAD;
299#endif
300
154 return NOTIFY_DONE; 301 return NOTIFY_DONE;
155} 302}
156 303
304static int xfrm_dev_register(struct net_device *dev)
305{
306 return xfrm_api_check(dev);
307}
308
157static int xfrm_dev_unregister(struct net_device *dev) 309static int xfrm_dev_unregister(struct net_device *dev)
158{ 310{
159 xfrm_policy_cache_flush(); 311 xfrm_policy_cache_flush();
@@ -162,16 +314,7 @@ static int xfrm_dev_unregister(struct net_device *dev)
162 314
163static int xfrm_dev_feat_change(struct net_device *dev) 315static int xfrm_dev_feat_change(struct net_device *dev)
164{ 316{
165 if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops) 317 return xfrm_api_check(dev);
166 return NOTIFY_BAD;
167 else if (!(dev->features & NETIF_F_HW_ESP))
168 dev->xfrmdev_ops = NULL;
169
170 if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) &&
171 !(dev->features & NETIF_F_HW_ESP))
172 return NOTIFY_BAD;
173
174 return NOTIFY_DONE;
175} 318}
176 319
177static int xfrm_dev_down(struct net_device *dev) 320static int xfrm_dev_down(struct net_device *dev)
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 5b2409746ae0..1472c0857975 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -257,7 +257,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
257 257
258 if (xo && (xo->flags & CRYPTO_DONE)) { 258 if (xo && (xo->flags & CRYPTO_DONE)) {
259 crypto_done = true; 259 crypto_done = true;
260 x = xfrm_input_state(skb);
261 family = XFRM_SPI_SKB_CB(skb)->family; 260 family = XFRM_SPI_SKB_CB(skb)->family;
262 261
263 if (!(xo->status & CRYPTO_SUCCESS)) { 262 if (!(xo->status & CRYPTO_SUCCESS)) {
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 73ad8c8ef344..23468672a767 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -44,7 +44,7 @@ static int xfrm_skb_check_space(struct sk_buff *skb)
44 44
45static struct dst_entry *skb_dst_pop(struct sk_buff *skb) 45static struct dst_entry *skb_dst_pop(struct sk_buff *skb)
46{ 46{
47 struct dst_entry *child = dst_clone(skb_dst(skb)->child); 47 struct dst_entry *child = dst_clone(xfrm_dst_child(skb_dst(skb)));
48 48
49 skb_dst_drop(skb); 49 skb_dst_drop(skb);
50 return child; 50 return child;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index bd6b0e7a0ee4..7a23078132cf 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -54,7 +54,7 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
54static struct kmem_cache *xfrm_dst_cache __read_mostly; 54static struct kmem_cache *xfrm_dst_cache __read_mostly;
55static __read_mostly seqcount_t xfrm_policy_hash_generation; 55static __read_mostly seqcount_t xfrm_policy_hash_generation;
56 56
57static void xfrm_init_pmtu(struct dst_entry *dst); 57static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr);
58static int stale_bundle(struct dst_entry *dst); 58static int stale_bundle(struct dst_entry *dst);
59static int xfrm_bundle_ok(struct xfrm_dst *xdst); 59static int xfrm_bundle_ok(struct xfrm_dst *xdst);
60static void xfrm_policy_queue_process(struct timer_list *t); 60static void xfrm_policy_queue_process(struct timer_list *t);
@@ -1256,7 +1256,7 @@ EXPORT_SYMBOL(xfrm_policy_delete);
1256 1256
1257int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) 1257int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1258{ 1258{
1259 struct net *net = xp_net(pol); 1259 struct net *net = sock_net(sk);
1260 struct xfrm_policy *old_pol; 1260 struct xfrm_policy *old_pol;
1261 1261
1262#ifdef CONFIG_XFRM_SUB_POLICY 1262#ifdef CONFIG_XFRM_SUB_POLICY
@@ -1543,7 +1543,9 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1543 */ 1543 */
1544 1544
1545static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, 1545static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1546 struct xfrm_state **xfrm, int nx, 1546 struct xfrm_state **xfrm,
1547 struct xfrm_dst **bundle,
1548 int nx,
1547 const struct flowi *fl, 1549 const struct flowi *fl,
1548 struct dst_entry *dst) 1550 struct dst_entry *dst)
1549{ 1551{
@@ -1551,8 +1553,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1551 unsigned long now = jiffies; 1553 unsigned long now = jiffies;
1552 struct net_device *dev; 1554 struct net_device *dev;
1553 struct xfrm_mode *inner_mode; 1555 struct xfrm_mode *inner_mode;
1554 struct dst_entry *dst_prev = NULL; 1556 struct xfrm_dst *xdst_prev = NULL;
1555 struct dst_entry *dst0 = NULL; 1557 struct xfrm_dst *xdst0 = NULL;
1556 int i = 0; 1558 int i = 0;
1557 int err; 1559 int err;
1558 int header_len = 0; 1560 int header_len = 0;
@@ -1578,13 +1580,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1578 goto put_states; 1580 goto put_states;
1579 } 1581 }
1580 1582
1581 if (!dst_prev) 1583 bundle[i] = xdst;
1582 dst0 = dst1; 1584 if (!xdst_prev)
1585 xdst0 = xdst;
1583 else 1586 else
1584 /* Ref count is taken during xfrm_alloc_dst() 1587 /* Ref count is taken during xfrm_alloc_dst()
1585 * No need to do dst_clone() on dst1 1588 * No need to do dst_clone() on dst1
1586 */ 1589 */
1587 dst_prev->child = dst1; 1590 xfrm_dst_set_child(xdst_prev, &xdst->u.dst);
1588 1591
1589 if (xfrm[i]->sel.family == AF_UNSPEC) { 1592 if (xfrm[i]->sel.family == AF_UNSPEC) {
1590 inner_mode = xfrm_ip2inner_mode(xfrm[i], 1593 inner_mode = xfrm_ip2inner_mode(xfrm[i],
@@ -1621,8 +1624,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1621 dst1->input = dst_discard; 1624 dst1->input = dst_discard;
1622 dst1->output = inner_mode->afinfo->output; 1625 dst1->output = inner_mode->afinfo->output;
1623 1626
1624 dst1->next = dst_prev; 1627 xdst_prev = xdst;
1625 dst_prev = dst1;
1626 1628
1627 header_len += xfrm[i]->props.header_len; 1629 header_len += xfrm[i]->props.header_len;
1628 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT) 1630 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
@@ -1630,40 +1632,39 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1630 trailer_len += xfrm[i]->props.trailer_len; 1632 trailer_len += xfrm[i]->props.trailer_len;
1631 } 1633 }
1632 1634
1633 dst_prev->child = dst; 1635 xfrm_dst_set_child(xdst_prev, dst);
1634 dst0->path = dst; 1636 xdst0->path = dst;
1635 1637
1636 err = -ENODEV; 1638 err = -ENODEV;
1637 dev = dst->dev; 1639 dev = dst->dev;
1638 if (!dev) 1640 if (!dev)
1639 goto free_dst; 1641 goto free_dst;
1640 1642
1641 xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); 1643 xfrm_init_path(xdst0, dst, nfheader_len);
1642 xfrm_init_pmtu(dst_prev); 1644 xfrm_init_pmtu(bundle, nx);
1643
1644 for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1645 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1646 1645
1647 err = xfrm_fill_dst(xdst, dev, fl); 1646 for (xdst_prev = xdst0; xdst_prev != (struct xfrm_dst *)dst;
1647 xdst_prev = (struct xfrm_dst *) xfrm_dst_child(&xdst_prev->u.dst)) {
1648 err = xfrm_fill_dst(xdst_prev, dev, fl);
1648 if (err) 1649 if (err)
1649 goto free_dst; 1650 goto free_dst;
1650 1651
1651 dst_prev->header_len = header_len; 1652 xdst_prev->u.dst.header_len = header_len;
1652 dst_prev->trailer_len = trailer_len; 1653 xdst_prev->u.dst.trailer_len = trailer_len;
1653 header_len -= xdst->u.dst.xfrm->props.header_len; 1654 header_len -= xdst_prev->u.dst.xfrm->props.header_len;
1654 trailer_len -= xdst->u.dst.xfrm->props.trailer_len; 1655 trailer_len -= xdst_prev->u.dst.xfrm->props.trailer_len;
1655 } 1656 }
1656 1657
1657out: 1658out:
1658 return dst0; 1659 return &xdst0->u.dst;
1659 1660
1660put_states: 1661put_states:
1661 for (; i < nx; i++) 1662 for (; i < nx; i++)
1662 xfrm_state_put(xfrm[i]); 1663 xfrm_state_put(xfrm[i]);
1663free_dst: 1664free_dst:
1664 if (dst0) 1665 if (xdst0)
1665 dst_release_immediate(dst0); 1666 dst_release_immediate(&xdst0->u.dst);
1666 dst0 = ERR_PTR(err); 1667 xdst0 = ERR_PTR(err);
1667 goto out; 1668 goto out;
1668} 1669}
1669 1670
@@ -1807,7 +1808,7 @@ static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst,
1807 for (i = 0; i < num; i++) { 1808 for (i = 0; i < num; i++) {
1808 if (!dst || dst->xfrm != xfrm[i]) 1809 if (!dst || dst->xfrm != xfrm[i])
1809 return false; 1810 return false;
1810 dst = dst->child; 1811 dst = xfrm_dst_child(dst);
1811 } 1812 }
1812 1813
1813 return xfrm_bundle_ok(xdst); 1814 return xfrm_bundle_ok(xdst);
@@ -1820,6 +1821,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1820{ 1821{
1821 struct net *net = xp_net(pols[0]); 1822 struct net *net = xp_net(pols[0]);
1822 struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; 1823 struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1824 struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
1823 struct xfrm_dst *xdst, *old; 1825 struct xfrm_dst *xdst, *old;
1824 struct dst_entry *dst; 1826 struct dst_entry *dst;
1825 int err; 1827 int err;
@@ -1848,7 +1850,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1848 1850
1849 old = xdst; 1851 old = xdst;
1850 1852
1851 dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); 1853 dst = xfrm_bundle_create(pols[0], xfrm, bundle, err, fl, dst_orig);
1852 if (IS_ERR(dst)) { 1854 if (IS_ERR(dst)) {
1853 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); 1855 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1854 return ERR_CAST(dst); 1856 return ERR_CAST(dst);
@@ -1888,8 +1890,8 @@ static void xfrm_policy_queue_process(struct timer_list *t)
1888 xfrm_decode_session(skb, &fl, dst->ops->family); 1890 xfrm_decode_session(skb, &fl, dst->ops->family);
1889 spin_unlock(&pq->hold_queue.lock); 1891 spin_unlock(&pq->hold_queue.lock);
1890 1892
1891 dst_hold(dst->path); 1893 dst_hold(xfrm_dst_path(dst));
1892 dst = xfrm_lookup(net, dst->path, &fl, sk, 0); 1894 dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, 0);
1893 if (IS_ERR(dst)) 1895 if (IS_ERR(dst))
1894 goto purge_queue; 1896 goto purge_queue;
1895 1897
@@ -1918,8 +1920,8 @@ static void xfrm_policy_queue_process(struct timer_list *t)
1918 skb = __skb_dequeue(&list); 1920 skb = __skb_dequeue(&list);
1919 1921
1920 xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); 1922 xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
1921 dst_hold(skb_dst(skb)->path); 1923 dst_hold(xfrm_dst_path(skb_dst(skb)));
1922 dst = xfrm_lookup(net, skb_dst(skb)->path, &fl, skb->sk, 0); 1924 dst = xfrm_lookup(net, xfrm_dst_path(skb_dst(skb)), &fl, skb->sk, 0);
1923 if (IS_ERR(dst)) { 1925 if (IS_ERR(dst)) {
1924 kfree_skb(skb); 1926 kfree_skb(skb);
1925 continue; 1927 continue;
@@ -2020,8 +2022,8 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
2020 dst1->output = xdst_queue_output; 2022 dst1->output = xdst_queue_output;
2021 2023
2022 dst_hold(dst); 2024 dst_hold(dst);
2023 dst1->child = dst; 2025 xfrm_dst_set_child(xdst, dst);
2024 dst1->path = dst; 2026 xdst->path = dst;
2025 2027
2026 xfrm_init_path((struct xfrm_dst *)dst1, dst, 0); 2028 xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
2027 2029
@@ -2590,7 +2592,7 @@ static int stale_bundle(struct dst_entry *dst)
2590 2592
2591void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) 2593void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2592{ 2594{
2593 while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { 2595 while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) {
2594 dst->dev = dev_net(dev)->loopback_dev; 2596 dst->dev = dev_net(dev)->loopback_dev;
2595 dev_hold(dst->dev); 2597 dev_hold(dst->dev);
2596 dev_put(dev); 2598 dev_put(dev);
@@ -2614,13 +2616,15 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2614 return dst; 2616 return dst;
2615} 2617}
2616 2618
2617static void xfrm_init_pmtu(struct dst_entry *dst) 2619static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr)
2618{ 2620{
2619 do { 2621 while (nr--) {
2620 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2622 struct xfrm_dst *xdst = bundle[nr];
2621 u32 pmtu, route_mtu_cached; 2623 u32 pmtu, route_mtu_cached;
2624 struct dst_entry *dst;
2622 2625
2623 pmtu = dst_mtu(dst->child); 2626 dst = &xdst->u.dst;
2627 pmtu = dst_mtu(xfrm_dst_child(dst));
2624 xdst->child_mtu_cached = pmtu; 2628 xdst->child_mtu_cached = pmtu;
2625 2629
2626 pmtu = xfrm_state_mtu(dst->xfrm, pmtu); 2630 pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
@@ -2632,7 +2636,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst)
2632 pmtu = route_mtu_cached; 2636 pmtu = route_mtu_cached;
2633 2637
2634 dst_metric_set(dst, RTAX_MTU, pmtu); 2638 dst_metric_set(dst, RTAX_MTU, pmtu);
2635 } while ((dst = dst->next)); 2639 }
2636} 2640}
2637 2641
2638/* Check that the bundle accepts the flow and its components are 2642/* Check that the bundle accepts the flow and its components are
@@ -2641,19 +2645,20 @@ static void xfrm_init_pmtu(struct dst_entry *dst)
2641 2645
2642static int xfrm_bundle_ok(struct xfrm_dst *first) 2646static int xfrm_bundle_ok(struct xfrm_dst *first)
2643{ 2647{
2648 struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
2644 struct dst_entry *dst = &first->u.dst; 2649 struct dst_entry *dst = &first->u.dst;
2645 struct xfrm_dst *last; 2650 struct xfrm_dst *xdst;
2651 int start_from, nr;
2646 u32 mtu; 2652 u32 mtu;
2647 2653
2648 if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || 2654 if (!dst_check(xfrm_dst_path(dst), ((struct xfrm_dst *)dst)->path_cookie) ||
2649 (dst->dev && !netif_running(dst->dev))) 2655 (dst->dev && !netif_running(dst->dev)))
2650 return 0; 2656 return 0;
2651 2657
2652 if (dst->flags & DST_XFRM_QUEUE) 2658 if (dst->flags & DST_XFRM_QUEUE)
2653 return 1; 2659 return 1;
2654 2660
2655 last = NULL; 2661 start_from = nr = 0;
2656
2657 do { 2662 do {
2658 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2663 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2659 2664
@@ -2665,9 +2670,11 @@ static int xfrm_bundle_ok(struct xfrm_dst *first)
2665 xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) 2670 xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2666 return 0; 2671 return 0;
2667 2672
2668 mtu = dst_mtu(dst->child); 2673 bundle[nr++] = xdst;
2674
2675 mtu = dst_mtu(xfrm_dst_child(dst));
2669 if (xdst->child_mtu_cached != mtu) { 2676 if (xdst->child_mtu_cached != mtu) {
2670 last = xdst; 2677 start_from = nr;
2671 xdst->child_mtu_cached = mtu; 2678 xdst->child_mtu_cached = mtu;
2672 } 2679 }
2673 2680
@@ -2675,30 +2682,30 @@ static int xfrm_bundle_ok(struct xfrm_dst *first)
2675 return 0; 2682 return 0;
2676 mtu = dst_mtu(xdst->route); 2683 mtu = dst_mtu(xdst->route);
2677 if (xdst->route_mtu_cached != mtu) { 2684 if (xdst->route_mtu_cached != mtu) {
2678 last = xdst; 2685 start_from = nr;
2679 xdst->route_mtu_cached = mtu; 2686 xdst->route_mtu_cached = mtu;
2680 } 2687 }
2681 2688
2682 dst = dst->child; 2689 dst = xfrm_dst_child(dst);
2683 } while (dst->xfrm); 2690 } while (dst->xfrm);
2684 2691
2685 if (likely(!last)) 2692 if (likely(!start_from))
2686 return 1; 2693 return 1;
2687 2694
2688 mtu = last->child_mtu_cached; 2695 xdst = bundle[start_from - 1];
2689 for (;;) { 2696 mtu = xdst->child_mtu_cached;
2690 dst = &last->u.dst; 2697 while (start_from--) {
2698 dst = &xdst->u.dst;
2691 2699
2692 mtu = xfrm_state_mtu(dst->xfrm, mtu); 2700 mtu = xfrm_state_mtu(dst->xfrm, mtu);
2693 if (mtu > last->route_mtu_cached) 2701 if (mtu > xdst->route_mtu_cached)
2694 mtu = last->route_mtu_cached; 2702 mtu = xdst->route_mtu_cached;
2695 dst_metric_set(dst, RTAX_MTU, mtu); 2703 dst_metric_set(dst, RTAX_MTU, mtu);
2696 2704 if (!start_from)
2697 if (last == first)
2698 break; 2705 break;
2699 2706
2700 last = (struct xfrm_dst *)last->u.dst.next; 2707 xdst = bundle[start_from - 1];
2701 last->child_mtu_cached = mtu; 2708 xdst->child_mtu_cached = mtu;
2702 } 2709 }
2703 2710
2704 return 1; 2711 return 1;
@@ -2706,22 +2713,20 @@ static int xfrm_bundle_ok(struct xfrm_dst *first)
2706 2713
2707static unsigned int xfrm_default_advmss(const struct dst_entry *dst) 2714static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
2708{ 2715{
2709 return dst_metric_advmss(dst->path); 2716 return dst_metric_advmss(xfrm_dst_path(dst));
2710} 2717}
2711 2718
2712static unsigned int xfrm_mtu(const struct dst_entry *dst) 2719static unsigned int xfrm_mtu(const struct dst_entry *dst)
2713{ 2720{
2714 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 2721 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2715 2722
2716 return mtu ? : dst_mtu(dst->path); 2723 return mtu ? : dst_mtu(xfrm_dst_path(dst));
2717} 2724}
2718 2725
2719static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst, 2726static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst,
2720 const void *daddr) 2727 const void *daddr)
2721{ 2728{
2722 const struct dst_entry *path = dst->path; 2729 while (dst->xfrm) {
2723
2724 for (; dst != path; dst = dst->child) {
2725 const struct xfrm_state *xfrm = dst->xfrm; 2730 const struct xfrm_state *xfrm = dst->xfrm;
2726 2731
2727 if (xfrm->props.mode == XFRM_MODE_TRANSPORT) 2732 if (xfrm->props.mode == XFRM_MODE_TRANSPORT)
@@ -2730,6 +2735,8 @@ static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst,
2730 daddr = xfrm->coaddr; 2735 daddr = xfrm->coaddr;
2731 else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR)) 2736 else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR))
2732 daddr = &xfrm->id.daddr; 2737 daddr = &xfrm->id.daddr;
2738
2739 dst = xfrm_dst_child(dst);
2733 } 2740 }
2734 return daddr; 2741 return daddr;
2735} 2742}
@@ -2738,7 +2745,7 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
2738 struct sk_buff *skb, 2745 struct sk_buff *skb,
2739 const void *daddr) 2746 const void *daddr)
2740{ 2747{
2741 const struct dst_entry *path = dst->path; 2748 const struct dst_entry *path = xfrm_dst_path(dst);
2742 2749
2743 if (!skb) 2750 if (!skb)
2744 daddr = xfrm_get_dst_nexthop(dst, daddr); 2751 daddr = xfrm_get_dst_nexthop(dst, daddr);
@@ -2747,7 +2754,7 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
2747 2754
2748static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr) 2755static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr)
2749{ 2756{
2750 const struct dst_entry *path = dst->path; 2757 const struct dst_entry *path = xfrm_dst_path(dst);
2751 2758
2752 daddr = xfrm_get_dst_nexthop(dst, daddr); 2759 daddr = xfrm_get_dst_nexthop(dst, daddr);
2753 path->ops->confirm_neigh(path, daddr); 2760 path->ops->confirm_neigh(path, daddr);
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index ba2b539879bc..6d5f85f4e672 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -71,7 +71,6 @@ static int xfrm_statistics_seq_open(struct inode *inode, struct file *file)
71} 71}
72 72
73static const struct file_operations xfrm_statistics_seq_fops = { 73static const struct file_operations xfrm_statistics_seq_fops = {
74 .owner = THIS_MODULE,
75 .open = xfrm_statistics_seq_open, 74 .open = xfrm_statistics_seq_open,
76 .read = seq_read, 75 .read = seq_read,
77 .llseek = seq_lseek, 76 .llseek = seq_lseek,
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 8b23c5bcf8e8..1d38c6acf8af 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -551,6 +551,8 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
551 bitnr = replay_esn->replay_window - (diff - pos); 551 bitnr = replay_esn->replay_window - (diff - pos);
552 } 552 }
553 553
554 xfrm_dev_state_advance_esn(x);
555
554 nr = bitnr >> 5; 556 nr = bitnr >> 5;
555 bitnr = bitnr & 0x1F; 557 bitnr = bitnr & 0x1F;
556 replay_esn->bmp[nr] |= (1U << bitnr); 558 replay_esn->bmp[nr] |= (1U << bitnr);
@@ -666,7 +668,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
666 if (unlikely(oseq < replay_esn->oseq)) { 668 if (unlikely(oseq < replay_esn->oseq)) {
667 XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi; 669 XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi;
668 xo->seq.hi = oseq_hi; 670 xo->seq.hi = oseq_hi;
669 671 replay_esn->oseq_hi = oseq_hi;
670 if (replay_esn->oseq_hi == 0) { 672 if (replay_esn->oseq_hi == 0) {
671 replay_esn->oseq--; 673 replay_esn->oseq--;
672 replay_esn->oseq_hi--; 674 replay_esn->oseq_hi--;
@@ -678,7 +680,6 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
678 } 680 }
679 681
680 replay_esn->oseq = oseq; 682 replay_esn->oseq = oseq;
681 replay_esn->oseq_hi = oseq_hi;
682 683
683 if (xfrm_aevent_is_on(net)) 684 if (xfrm_aevent_is_on(net))
684 x->repl->notify(x, XFRM_REPLAY_UPDATE); 685 x->repl->notify(x, XFRM_REPLAY_UPDATE);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index a3785f538018..54e21f19d722 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2056,6 +2056,13 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
2056 struct xfrm_mgr *km; 2056 struct xfrm_mgr *km;
2057 struct xfrm_policy *pol = NULL; 2057 struct xfrm_policy *pol = NULL;
2058 2058
2059 if (!optval && !optlen) {
2060 xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
2061 xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
2062 __sk_dst_reset(sk);
2063 return 0;
2064 }
2065
2059 if (optlen <= 0 || optlen > PAGE_SIZE) 2066 if (optlen <= 0 || optlen > PAGE_SIZE)
2060 return -EMSGSIZE; 2067 return -EMSGSIZE;
2061 2068