aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorTakashi Iwai <tiwai@suse.de>2015-07-24 14:08:13 -0400
committerTakashi Iwai <tiwai@suse.de>2015-07-24 14:08:13 -0400
commit43cbf02e7ad51007af38f39c5b2abdc7a5d7f5aa (patch)
tree1057babea8807af3f4a3c44fd116b7bbe99eb733 /net
parentcba59972a1191a0c1647a52fe745eed7a4b34b38 (diff)
parent996034b117b467709dec7811ef134063934fa626 (diff)
Merge tag 'asoc-fix-v4.2-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
ASoC: Fixes for v4.2 A lot of small fixes here, a few to the core: - Fix for binding DAPM stream widgets on devices with prefixes assigned to them - Minor fixes for the newly added topology interfaces - Locking and memory leak fixes for DAPM - Driver specific fixes
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c96
-rw-r--r--net/9p/client.c12
-rw-r--r--net/9p/trans_rdma.c4
-rw-r--r--net/Kconfig3
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/atm/common.c4
-rw-r--r--net/atm/common.h2
-rw-r--r--net/atm/pvc.c2
-rw-r--r--net/atm/svc.c2
-rw-r--r--net/ax25/af_ax25.c35
-rw-r--r--net/ax25/ax25_in.c3
-rw-r--r--net/ax25/ax25_ip.c1
-rw-r--r--net/ax25/ax25_out.c1
-rw-r--r--net/ax25/ax25_uid.c1
-rw-r--r--net/batman-adv/Makefile6
-rw-r--r--net/batman-adv/bat_algo.h2
-rw-r--r--net/batman-adv/bat_iv_ogm.c210
-rw-r--r--net/batman-adv/bitarray.c6
-rw-r--r--net/batman-adv/bitarray.h8
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c56
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h12
-rw-r--r--net/batman-adv/debugfs.c47
-rw-r--r--net/batman-adv/debugfs.h43
-rw-r--r--net/batman-adv/distributed-arp-table.c47
-rw-r--r--net/batman-adv/distributed-arp-table.h15
-rw-r--r--net/batman-adv/fragmentation.c44
-rw-r--r--net/batman-adv/fragmentation.h11
-rw-r--r--net/batman-adv/gateway_client.c41
-rw-r--r--net/batman-adv/gateway_client.h10
-rw-r--r--net/batman-adv/gateway_common.c13
-rw-r--r--net/batman-adv/gateway_common.h9
-rw-r--r--net/batman-adv/hard-interface.c40
-rw-r--r--net/batman-adv/hard-interface.h13
-rw-r--r--net/batman-adv/hash.c8
-rw-r--r--net/batman-adv/hash.h33
-rw-r--r--net/batman-adv/icmp_socket.c35
-rw-r--r--net/batman-adv/icmp_socket.h9
-rw-r--r--net/batman-adv/main.c103
-rw-r--r--net/batman-adv/main.h40
-rw-r--r--net/batman-adv/multicast.c31
-rw-r--r--net/batman-adv/multicast.h8
-rw-r--r--net/batman-adv/network-coding.c53
-rw-r--r--net/batman-adv/network-coding.h15
-rw-r--r--net/batman-adv/originator.c36
-rw-r--r--net/batman-adv/originator.h28
-rw-r--r--net/batman-adv/packet.h5
-rw-r--r--net/batman-adv/routing.c38
-rw-r--r--net/batman-adv/routing.h12
-rw-r--r--net/batman-adv/send.c40
-rw-r--r--net/batman-adv/send.h15
-rw-r--r--net/batman-adv/soft-interface.c72
-rw-r--r--net/batman-adv/soft-interface.h13
-rw-r--r--net/batman-adv/sysfs.c62
-rw-r--r--net/batman-adv/sysfs.h12
-rw-r--r--net/batman-adv/translation-table.c91
-rw-r--r--net/batman-adv/translation-table.h11
-rw-r--r--net/batman-adv/types.h33
-rw-r--r--net/bluetooth/6lowpan.c15
-rw-r--r--net/bluetooth/Makefile3
-rw-r--r--net/bluetooth/bnep/sock.c2
-rw-r--r--net/bluetooth/cmtp/sock.c2
-rw-r--r--net/bluetooth/hci_conn.c4
-rw-r--r--net/bluetooth/hci_core.c153
-rw-r--r--net/bluetooth/hci_event.c113
-rw-r--r--net/bluetooth/hci_sock.c32
-rw-r--r--net/bluetooth/hidp/core.c1
-rw-r--r--net/bluetooth/hidp/sock.c2
-rw-r--r--net/bluetooth/l2cap_core.c17
-rw-r--r--net/bluetooth/l2cap_sock.c10
-rw-r--r--net/bluetooth/mgmt.c576
-rw-r--r--net/bluetooth/rfcomm/core.c2
-rw-r--r--net/bluetooth/rfcomm/sock.c28
-rw-r--r--net/bluetooth/sco.c13
-rw-r--r--net/bluetooth/smp.c158
-rw-r--r--net/bridge/Makefile2
-rw-r--r--net/bridge/br.c22
-rw-r--r--net/bridge/br_fdb.c38
-rw-r--r--net/bridge/br_forward.c1
-rw-r--r--net/bridge/br_if.c4
-rw-r--r--net/bridge/br_ioctl.c2
-rw-r--r--net/bridge/br_mdb.c16
-rw-r--r--net/bridge/br_multicast.c266
-rw-r--r--net/bridge/br_netfilter_hooks.c (renamed from net/bridge/br_netfilter.c)322
-rw-r--r--net/bridge/br_netfilter_ipv6.c245
-rw-r--r--net/bridge/br_netlink.c26
-rw-r--r--net/bridge/br_private.h13
-rw-r--r--net/bridge/br_stp.c13
-rw-r--r--net/bridge/br_stp_if.c6
-rw-r--r--net/bridge/br_sysfs_if.c2
-rw-r--r--net/bridge/br_vlan.c60
-rw-r--r--net/bridge/netfilter/ebt_stp.c6
-rw-r--r--net/bridge/netfilter/ebtables.c4
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/can/af_can.c8
-rw-r--r--net/can/bcm.c2
-rw-r--r--net/can/gw.c68
-rw-r--r--net/can/raw.c7
-rw-r--r--net/ceph/ceph_common.c66
-rw-r--r--net/ceph/crush/crush.c13
-rw-r--r--net/ceph/crush/crush_ln_table.h32
-rw-r--r--net/ceph/crush/hash.c8
-rw-r--r--net/ceph/crush/mapper.c148
-rw-r--r--net/ceph/messenger.c29
-rw-r--r--net/ceph/mon_client.c13
-rw-r--r--net/ceph/osd_client.c42
-rw-r--r--net/ceph/osdmap.c2
-rw-r--r--net/ceph/pagevec.c5
-rw-r--r--net/core/dev.c263
-rw-r--r--net/core/ethtool.c13
-rw-r--r--net/core/filter.c261
-rw-r--r--net/core/flow_dissector.c658
-rw-r--r--net/core/gen_estimator.c13
-rw-r--r--net/core/neighbour.c16
-rw-r--r--net/core/net-sysfs.c10
-rw-r--r--net/core/net_namespace.c133
-rw-r--r--net/core/netevent.c5
-rw-r--r--net/core/pktgen.c121
-rw-r--r--net/core/rtnetlink.c268
-rw-r--r--net/core/secure_seq.c2
-rw-r--r--net/core/skbuff.c387
-rw-r--r--net/core/sock.c56
-rw-r--r--net/core/sock_diag.c85
-rw-r--r--net/core/stream.c6
-rw-r--r--net/core/utils.c12
-rw-r--r--net/dccp/diag.c1
-rw-r--r--net/decnet/af_decnet.c8
-rw-r--r--net/dsa/dsa.c6
-rw-r--r--net/dsa/slave.c56
-rw-r--r--net/ethernet/eth.c15
-rw-r--r--net/ieee802154/6lowpan/core.c28
-rw-r--r--net/ieee802154/6lowpan/tx.c5
-rw-r--r--net/ieee802154/core.c2
-rw-r--r--net/ieee802154/nl-mac.c39
-rw-r--r--net/ieee802154/nl-phy.c10
-rw-r--r--net/ieee802154/nl802154.c316
-rw-r--r--net/ieee802154/rdev-ops.h23
-rw-r--r--net/ieee802154/socket.c28
-rw-r--r--net/ieee802154/trace.h38
-rw-r--r--net/ipv4/Kconfig24
-rw-r--r--net/ipv4/Makefile3
-rw-r--r--net/ipv4/af_inet.c13
-rw-r--r--net/ipv4/devinet.c15
-rw-r--r--net/ipv4/esp4.c201
-rw-r--r--net/ipv4/fib_frontend.c29
-rw-r--r--net/ipv4/fib_rules.c5
-rw-r--r--net/ipv4/fib_semantics.c97
-rw-r--r--net/ipv4/fib_trie.c75
-rw-r--r--net/ipv4/geneve_core.c (renamed from net/ipv4/geneve.c)10
-rw-r--r--net/ipv4/igmp.c162
-rw-r--r--net/ipv4/inet_connection_sock.c19
-rw-r--r--net/ipv4/inet_diag.c60
-rw-r--r--net/ipv4/inet_hashtables.c57
-rw-r--r--net/ipv4/inet_timewait_sock.c2
-rw-r--r--net/ipv4/ip_forward.c18
-rw-r--r--net/ipv4/ip_fragment.c46
-rw-r--r--net/ipv4/ip_output.c88
-rw-r--r--net/ipv4/ip_sockglue.c18
-rw-r--r--net/ipv4/ip_tunnel.c8
-rw-r--r--net/ipv4/ip_tunnel_core.c20
-rw-r--r--net/ipv4/ipip.c3
-rw-r--r--net/ipv4/netfilter.c9
-rw-r--r--net/ipv4/netfilter/Kconfig3
-rw-r--r--net/ipv4/netfilter/arp_tables.c111
-rw-r--r--net/ipv4/netfilter/ip_tables.c99
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c5
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c4
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c2
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/route.c33
-rw-r--r--net/ipv4/syncookies.c10
-rw-r--r--net/ipv4/sysctl_net_ipv4.c25
-rw-r--r--net/ipv4/tcp.c100
-rw-r--r--net/ipv4/tcp_cdg.c433
-rw-r--r--net/ipv4/tcp_dctcp.c26
-rw-r--r--net/ipv4/tcp_diag.c6
-rw-r--r--net/ipv4/tcp_fastopen.c2
-rw-r--r--net/ipv4/tcp_input.c139
-rw-r--r--net/ipv4/tcp_ipv4.c15
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_offload.c4
-rw-r--r--net/ipv4/tcp_output.c106
-rw-r--r--net/ipv4/tcp_timer.c4
-rw-r--r--net/ipv4/udp_diag.c2
-rw-r--r--net/ipv4/udp_tunnel.c8
-rw-r--r--net/ipv6/Makefile1
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/af_inet6.c6
-rw-r--r--net/ipv6/datagram.c12
-rw-r--r--net/ipv6/esp6.c201
-rw-r--r--net/ipv6/icmp.c6
-rw-r--r--net/ipv6/inet6_hashtables.c8
-rw-r--r--net/ipv6/ip6_fib.c25
-rw-r--r--net/ipv6/ip6_flowlabel.c4
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/ip6_output.c60
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/ip6_udp_tunnel.c6
-rw-r--r--net/ipv6/mcast_snoop.c213
-rw-r--r--net/ipv6/ndisc.c2
-rw-r--r--net/ipv6/netfilter.c2
-rw-r--r--net/ipv6/netfilter/Kconfig3
-rw-r--r--net/ipv6/netfilter/ip6_tables.c100
-rw-r--r--net/ipv6/output_core.c14
-rw-r--r--net/ipv6/raw.c11
-rw-r--r--net/ipv6/route.c558
-rw-r--r--net/ipv6/syncookies.c19
-rw-r--r--net/ipv6/sysctl_net_ipv6.c8
-rw-r--r--net/ipv6/tcp_ipv6.c16
-rw-r--r--net/ipv6/xfrm6_policy.c20
-rw-r--r--net/ipx/af_ipx.c2
-rw-r--r--net/irda/af_irda.c2
-rw-r--r--net/irda/timer.c4
-rw-r--r--net/iucv/af_iucv.c10
-rw-r--r--net/key/af_key.c3
-rw-r--r--net/l2tp/l2tp_core.c15
-rw-r--r--net/l2tp/l2tp_ppp.c4
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/llc/llc_conn.c6
-rw-r--r--net/mac80211/Kconfig16
-rw-r--r--net/mac80211/aes_ccm.c33
-rw-r--r--net/mac80211/aes_gcm.c33
-rw-r--r--net/mac80211/aes_gmac.c14
-rw-r--r--net/mac80211/agg-tx.c4
-rw-r--r--net/mac80211/cfg.c214
-rw-r--r--net/mac80211/chan.c10
-rw-r--r--net/mac80211/debugfs.c177
-rw-r--r--net/mac80211/debugfs_key.c17
-rw-r--r--net/mac80211/debugfs_sta.c85
-rw-r--r--net/mac80211/driver-ops.h13
-rw-r--r--net/mac80211/ethtool.c3
-rw-r--r--net/mac80211/ibss.c6
-rw-r--r--net/mac80211/ieee80211_i.h36
-rw-r--r--net/mac80211/iface.c74
-rw-r--r--net/mac80211/key.c109
-rw-r--r--net/mac80211/key.h7
-rw-r--r--net/mac80211/led.c268
-rw-r--r--net/mac80211/led.h44
-rw-r--r--net/mac80211/main.c34
-rw-r--r--net/mac80211/mesh.c1
-rw-r--r--net/mac80211/mesh_hwmp.c35
-rw-r--r--net/mac80211/mesh_plink.c44
-rw-r--r--net/mac80211/mlme.c247
-rw-r--r--net/mac80211/offchannel.c2
-rw-r--r--net/mac80211/pm.c4
-rw-r--r--net/mac80211/rate.c18
-rw-r--r--net/mac80211/rate.h14
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c2
-rw-r--r--net/mac80211/rx.c227
-rw-r--r--net/mac80211/scan.c18
-rw-r--r--net/mac80211/sta_info.c24
-rw-r--r--net/mac80211/sta_info.h44
-rw-r--r--net/mac80211/status.c163
-rw-r--r--net/mac80211/tdls.c52
-rw-r--r--net/mac80211/trace.h42
-rw-r--r--net/mac80211/tx.c549
-rw-r--r--net/mac80211/util.c6
-rw-r--r--net/mac80211/wpa.c10
-rw-r--r--net/mac802154/Kconfig1
-rw-r--r--net/mac802154/Makefile4
-rw-r--r--net/mac802154/cfg.c101
-rw-r--r--net/mac802154/driver-ops.h96
-rw-r--r--net/mac802154/ieee802154_i.h9
-rw-r--r--net/mac802154/iface.c156
-rw-r--r--net/mac802154/llsec.c44
-rw-r--r--net/mac802154/mac_cmd.c42
-rw-r--r--net/mac802154/main.c32
-rw-r--r--net/mac802154/mib.c63
-rw-r--r--net/mac802154/rx.c13
-rw-r--r--net/mac802154/trace.c9
-rw-r--r--net/mac802154/trace.h272
-rw-r--r--net/mac802154/util.c5
-rw-r--r--net/mpls/mpls_gso.c2
-rw-r--r--net/netfilter/Kconfig31
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/core.c38
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h44
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c44
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c59
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c27
-rw-r--r--net/netfilter/ipset/ip_set_core.c387
-rw-r--r--net/netfilter/ipset/ip_set_getport.c19
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h736
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c72
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c87
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c98
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c91
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c96
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c30
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c73
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c250
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c146
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c86
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c176
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c422
-rw-r--r--net/netfilter/ipset/pfxlen.c16
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c30
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c19
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c8
-rw-r--r--net/netfilter/nf_internals.h1
-rw-r--r--net/netfilter/nf_queue.c19
-rw-r--r--net/netfilter/nf_synproxy_core.c1
-rw-r--r--net/netfilter/nf_tables_api.c117
-rw-r--r--net/netfilter/nf_tables_core.c7
-rw-r--r--net/netfilter/nf_tables_netdev.c258
-rw-r--r--net/netfilter/nfnetlink.c38
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nfnetlink_queue_core.c63
-rw-r--r--net/netfilter/nft_compat.c2
-rw-r--r--net/netfilter/x_tables.c55
-rw-r--r--net/netfilter/xt_TCPMSS.c6
-rw-r--r--net/netfilter/xt_TEE.c1
-rw-r--r--net/netfilter/xt_addrtype.c2
-rw-r--r--net/netfilter/xt_mark.c1
-rw-r--r--net/netfilter/xt_set.c47
-rw-r--r--net/netfilter/xt_socket.c59
-rw-r--r--net/netlink/af_netlink.c172
-rw-r--r--net/netrom/af_netrom.c4
-rw-r--r--net/netrom/nr_route.c1
-rw-r--r--net/nfc/af_nfc.c2
-rw-r--r--net/nfc/llcp.h2
-rw-r--r--net/nfc/llcp_core.c2
-rw-r--r--net/nfc/llcp_sock.c8
-rw-r--r--net/nfc/nci/Kconfig7
-rw-r--r--net/nfc/nci/Makefile3
-rw-r--r--net/nfc/nci/core.c105
-rw-r--r--net/nfc/nci/hci.c11
-rw-r--r--net/nfc/nci/ntf.c10
-rw-r--r--net/nfc/nci/rsp.c10
-rw-r--r--net/nfc/nci/uart.c494
-rw-r--r--net/nfc/netlink.c55
-rw-r--r--net/nfc/nfc.h2
-rw-r--r--net/nfc/rawsock.c4
-rw-r--r--net/openvswitch/Kconfig2
-rw-r--r--net/openvswitch/actions.c23
-rw-r--r--net/openvswitch/datapath.c20
-rw-r--r--net/openvswitch/datapath.h2
-rw-r--r--net/openvswitch/flow.c4
-rw-r--r--net/openvswitch/flow_netlink.c2
-rw-r--r--net/openvswitch/vport-geneve.c5
-rw-r--r--net/packet/af_packet.c198
-rw-r--r--net/packet/internal.h13
-rw-r--r--net/phonet/af_phonet.c2
-rw-r--r--net/phonet/pep.c2
-rw-r--r--net/rds/af_rds.c52
-rw-r--r--net/rds/bind.c4
-rw-r--r--net/rds/ib.h23
-rw-r--r--net/rds/ib_cm.c43
-rw-r--r--net/rds/ib_rdma.c4
-rw-r--r--net/rds/ib_recv.c4
-rw-r--r--net/rds/ib_send.c55
-rw-r--r--net/rds/iw_cm.c7
-rw-r--r--net/rds/iw_send.c18
-rw-r--r--net/rds/rdma_transport.c34
-rw-r--r--net/rds/rds.h9
-rw-r--r--net/rds/transport.c23
-rw-r--r--net/rfkill/core.c12
-rw-r--r--net/rfkill/rfkill-gpio.c24
-rw-r--r--net/rose/af_rose.c7
-rw-r--r--net/rose/rose_link.c1
-rw-r--r--net/rose/rose_route.c1
-rw-r--r--net/rxrpc/af_rxrpc.c2
-rw-r--r--net/rxrpc/ar-local.c4
-rw-r--r--net/sched/Kconfig11
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c5
-rw-r--r--net/sched/act_bpf.c9
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_pedit.c5
-rw-r--r--net/sched/cls_bpf.c16
-rw-r--r--net/sched/cls_flow.c28
-rw-r--r--net/sched/cls_flower.c691
-rw-r--r--net/sched/em_ipset.c4
-rw-r--r--net/sched/sch_api.c17
-rw-r--r--net/sched/sch_choke.c20
-rw-r--r--net/sched/sch_codel.c15
-rw-r--r--net/sched/sch_fq_codel.c26
-rw-r--r--net/sched/sch_gred.c28
-rw-r--r--net/sched/sch_hhf.c19
-rw-r--r--net/sched/sch_ingress.c59
-rw-r--r--net/sched/sch_netem.c4
-rw-r--r--net/sched/sch_qfq.c3
-rw-r--r--net/sched/sch_sfb.c24
-rw-r--r--net/sched/sch_sfq.c27
-rw-r--r--net/sctp/ipv6.c7
-rw-r--r--net/sctp/output.c4
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/socket.c49
-rw-r--r--net/socket.c7
-rw-r--r--net/sunrpc/Kconfig28
-rw-r--r--net/sunrpc/Makefile5
-rw-r--r--net/sunrpc/auth.c2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c8
-rw-r--r--net/sunrpc/backchannel_rqst.c134
-rw-r--r--net/sunrpc/bc_svc.c63
-rw-r--r--net/sunrpc/clnt.c109
-rw-r--r--net/sunrpc/debugfs.c78
-rw-r--r--net/sunrpc/svc.c38
-rw-r--r--net/sunrpc/xprt.c7
-rw-r--r--net/sunrpc/xprtrdma/Makefile14
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c120
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c229
-rw-r--r--net/sunrpc/xprtrdma/module.c46
-rw-r--r--net/sunrpc/xprtrdma/physical_ops.c14
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c8
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c8
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_marshal.c140
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c6
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c16
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c119
-rw-r--r--net/sunrpc/xprtrdma/transport.c56
-rw-r--r--net/sunrpc/xprtrdma/verbs.c348
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h49
-rw-r--r--net/sunrpc/xprtsock.c157
-rw-r--r--net/switchdev/switchdev.c955
-rw-r--r--net/tipc/addr.c7
-rw-r--r--net/tipc/addr.h8
-rw-r--r--net/tipc/bcast.c46
-rw-r--r--net/tipc/bcast.h1
-rw-r--r--net/tipc/bearer.c20
-rw-r--r--net/tipc/bearer.h2
-rw-r--r--net/tipc/core.c4
-rw-r--r--net/tipc/core.h37
-rw-r--r--net/tipc/link.c313
-rw-r--r--net/tipc/link.h60
-rw-r--r--net/tipc/msg.c51
-rw-r--r--net/tipc/msg.h37
-rw-r--r--net/tipc/name_table.c34
-rw-r--r--net/tipc/net.c1
-rw-r--r--net/tipc/netlink_compat.c137
-rw-r--r--net/tipc/node.c3
-rw-r--r--net/tipc/node.h2
-rw-r--r--net/tipc/server.c6
-rw-r--r--net/tipc/socket.c11
-rw-r--r--net/tipc/subscr.c242
-rw-r--r--net/tipc/subscr.h18
-rw-r--r--net/unix/af_unix.c267
-rw-r--r--net/vmw_vsock/af_vsock.c7
-rw-r--r--net/vmw_vsock/vmci_transport.c2
-rw-r--r--net/wireless/chan.c65
-rw-r--r--net/wireless/core.h1
-rw-r--r--net/wireless/nl80211.c7
-rw-r--r--net/wireless/reg.c4
-rw-r--r--net/wireless/sme.c4
-rw-r--r--net/wireless/sysfs.c14
-rw-r--r--net/wireless/util.c5
-rw-r--r--net/x25/af_x25.c8
-rw-r--r--net/xfrm/xfrm_algo.c28
-rw-r--r--net/xfrm/xfrm_input.c12
-rw-r--r--net/xfrm/xfrm_output.c12
-rw-r--r--net/xfrm/xfrm_policy.c42
-rw-r--r--net/xfrm/xfrm_state.c4
-rw-r--r--net/xfrm/xfrm_user.c40
453 files changed, 16530 insertions, 8271 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 59555f0f8fc8..d2cd9de4b724 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -618,6 +618,92 @@ out:
618 return err; 618 return err;
619} 619}
620 620
621static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
622 struct sk_buff *skb)
623{
624 struct sk_buff *p, **pp = NULL;
625 struct vlan_hdr *vhdr;
626 unsigned int hlen, off_vlan;
627 const struct packet_offload *ptype;
628 __be16 type;
629 int flush = 1;
630
631 off_vlan = skb_gro_offset(skb);
632 hlen = off_vlan + sizeof(*vhdr);
633 vhdr = skb_gro_header_fast(skb, off_vlan);
634 if (skb_gro_header_hard(skb, hlen)) {
635 vhdr = skb_gro_header_slow(skb, hlen, off_vlan);
636 if (unlikely(!vhdr))
637 goto out;
638 }
639
640 type = vhdr->h_vlan_encapsulated_proto;
641
642 rcu_read_lock();
643 ptype = gro_find_receive_by_type(type);
644 if (!ptype)
645 goto out_unlock;
646
647 flush = 0;
648
649 for (p = *head; p; p = p->next) {
650 struct vlan_hdr *vhdr2;
651
652 if (!NAPI_GRO_CB(p)->same_flow)
653 continue;
654
655 vhdr2 = (struct vlan_hdr *)(p->data + off_vlan);
656 if (compare_vlan_header(vhdr, vhdr2))
657 NAPI_GRO_CB(p)->same_flow = 0;
658 }
659
660 skb_gro_pull(skb, sizeof(*vhdr));
661 skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr));
662 pp = ptype->callbacks.gro_receive(head, skb);
663
664out_unlock:
665 rcu_read_unlock();
666out:
667 NAPI_GRO_CB(skb)->flush |= flush;
668
669 return pp;
670}
671
672static int vlan_gro_complete(struct sk_buff *skb, int nhoff)
673{
674 struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + nhoff);
675 __be16 type = vhdr->h_vlan_encapsulated_proto;
676 struct packet_offload *ptype;
677 int err = -ENOENT;
678
679 rcu_read_lock();
680 ptype = gro_find_complete_by_type(type);
681 if (ptype)
682 err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*vhdr));
683
684 rcu_read_unlock();
685 return err;
686}
687
688static struct packet_offload vlan_packet_offloads[] __read_mostly = {
689 {
690 .type = cpu_to_be16(ETH_P_8021Q),
691 .priority = 10,
692 .callbacks = {
693 .gro_receive = vlan_gro_receive,
694 .gro_complete = vlan_gro_complete,
695 },
696 },
697 {
698 .type = cpu_to_be16(ETH_P_8021AD),
699 .priority = 10,
700 .callbacks = {
701 .gro_receive = vlan_gro_receive,
702 .gro_complete = vlan_gro_complete,
703 },
704 },
705};
706
621static int __net_init vlan_init_net(struct net *net) 707static int __net_init vlan_init_net(struct net *net)
622{ 708{
623 struct vlan_net *vn = net_generic(net, vlan_net_id); 709 struct vlan_net *vn = net_generic(net, vlan_net_id);
@@ -645,6 +731,7 @@ static struct pernet_operations vlan_net_ops = {
645static int __init vlan_proto_init(void) 731static int __init vlan_proto_init(void)
646{ 732{
647 int err; 733 int err;
734 unsigned int i;
648 735
649 pr_info("%s v%s\n", vlan_fullname, vlan_version); 736 pr_info("%s v%s\n", vlan_fullname, vlan_version);
650 737
@@ -668,6 +755,9 @@ static int __init vlan_proto_init(void)
668 if (err < 0) 755 if (err < 0)
669 goto err5; 756 goto err5;
670 757
758 for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
759 dev_add_offload(&vlan_packet_offloads[i]);
760
671 vlan_ioctl_set(vlan_ioctl_handler); 761 vlan_ioctl_set(vlan_ioctl_handler);
672 return 0; 762 return 0;
673 763
@@ -685,7 +775,13 @@ err0:
685 775
686static void __exit vlan_cleanup_module(void) 776static void __exit vlan_cleanup_module(void)
687{ 777{
778 unsigned int i;
779
688 vlan_ioctl_set(NULL); 780 vlan_ioctl_set(NULL);
781
782 for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
783 dev_remove_offload(&vlan_packet_offloads[i]);
784
689 vlan_netlink_fini(); 785 vlan_netlink_fini();
690 786
691 unregister_netdevice_notifier(&vlan_notifier_block); 787 unregister_netdevice_notifier(&vlan_notifier_block);
diff --git a/net/9p/client.c b/net/9p/client.c
index 6f4c4c88db84..498454b3c06c 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -843,7 +843,8 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
843 if (err < 0) { 843 if (err < 0) {
844 if (err == -EIO) 844 if (err == -EIO)
845 c->status = Disconnected; 845 c->status = Disconnected;
846 goto reterr; 846 if (err != -ERESTARTSYS)
847 goto reterr;
847 } 848 }
848 if (req->status == REQ_STATUS_ERROR) { 849 if (req->status == REQ_STATUS_ERROR) {
849 p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); 850 p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
@@ -1582,6 +1583,10 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
1582 p9_free_req(clnt, req); 1583 p9_free_req(clnt, req);
1583 break; 1584 break;
1584 } 1585 }
1586 if (rsize < count) {
1587 pr_err("bogus RREAD count (%d > %d)\n", count, rsize);
1588 count = rsize;
1589 }
1585 1590
1586 p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); 1591 p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
1587 if (!count) { 1592 if (!count) {
@@ -1647,6 +1652,11 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
1647 if (*err) { 1652 if (*err) {
1648 trace_9p_protocol_dump(clnt, req->rc); 1653 trace_9p_protocol_dump(clnt, req->rc);
1649 p9_free_req(clnt, req); 1654 p9_free_req(clnt, req);
1655 break;
1656 }
1657 if (rsize < count) {
1658 pr_err("bogus RWRITE count (%d > %d)\n", count, rsize);
1659 count = rsize;
1650 } 1660 }
1651 1661
1652 p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); 1662 p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 3533d2a53ab6..37a78d20c0f6 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -648,6 +648,7 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
648 struct rdma_conn_param conn_param; 648 struct rdma_conn_param conn_param;
649 struct ib_qp_init_attr qp_attr; 649 struct ib_qp_init_attr qp_attr;
650 struct ib_device_attr devattr; 650 struct ib_device_attr devattr;
651 struct ib_cq_init_attr cq_attr = {};
651 652
652 /* Parse the transport specific mount options */ 653 /* Parse the transport specific mount options */
653 err = parse_opts(args, &opts); 654 err = parse_opts(args, &opts);
@@ -705,9 +706,10 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
705 goto error; 706 goto error;
706 707
707 /* Create the Completion Queue */ 708 /* Create the Completion Queue */
709 cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1;
708 rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, 710 rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler,
709 cq_event_handler, client, 711 cq_event_handler, client,
710 opts.sq_depth + opts.rq_depth + 1, 0); 712 &cq_attr);
711 if (IS_ERR(rdma->cq)) 713 if (IS_ERR(rdma->cq))
712 goto error; 714 goto error;
713 ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); 715 ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
diff --git a/net/Kconfig b/net/Kconfig
index 44dd5786ee91..57a7c5af3175 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -45,6 +45,9 @@ config COMPAT_NETLINK_MESSAGES
45 Newly written code should NEVER need this option but do 45 Newly written code should NEVER need this option but do
46 compat-independent messages instead! 46 compat-independent messages instead!
47 47
48config NET_INGRESS
49 bool
50
48menu "Networking options" 51menu "Networking options"
49 52
50source "net/packet/Kconfig" 53source "net/packet/Kconfig"
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 3b7ad43c7dad..d5871ac493eb 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1030,7 +1030,7 @@ static int atalk_create(struct net *net, struct socket *sock, int protocol,
1030 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 1030 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
1031 goto out; 1031 goto out;
1032 rc = -ENOMEM; 1032 rc = -ENOMEM;
1033 sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto); 1033 sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, kern);
1034 if (!sk) 1034 if (!sk)
1035 goto out; 1035 goto out;
1036 rc = 0; 1036 rc = 0;
diff --git a/net/atm/common.c b/net/atm/common.c
index ed0466637e13..49a872db7e42 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -141,7 +141,7 @@ static struct proto vcc_proto = {
141 .release_cb = vcc_release_cb, 141 .release_cb = vcc_release_cb,
142}; 142};
143 143
144int vcc_create(struct net *net, struct socket *sock, int protocol, int family) 144int vcc_create(struct net *net, struct socket *sock, int protocol, int family, int kern)
145{ 145{
146 struct sock *sk; 146 struct sock *sk;
147 struct atm_vcc *vcc; 147 struct atm_vcc *vcc;
@@ -149,7 +149,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family)
149 sock->sk = NULL; 149 sock->sk = NULL;
150 if (sock->type == SOCK_STREAM) 150 if (sock->type == SOCK_STREAM)
151 return -EINVAL; 151 return -EINVAL;
152 sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto); 152 sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto, kern);
153 if (!sk) 153 if (!sk)
154 return -ENOMEM; 154 return -ENOMEM;
155 sock_init_data(sock, sk); 155 sock_init_data(sock, sk);
diff --git a/net/atm/common.h b/net/atm/common.h
index 4d6f5b2068ac..959436b87182 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -10,7 +10,7 @@
10#include <linux/poll.h> /* for poll_table */ 10#include <linux/poll.h> /* for poll_table */
11 11
12 12
13int vcc_create(struct net *net, struct socket *sock, int protocol, int family); 13int vcc_create(struct net *net, struct socket *sock, int protocol, int family, int kern);
14int vcc_release(struct socket *sock); 14int vcc_release(struct socket *sock);
15int vcc_connect(struct socket *sock, int itf, short vpi, int vci); 15int vcc_connect(struct socket *sock, int itf, short vpi, int vci);
16int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 16int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index ae0324021407..040207ec399f 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -136,7 +136,7 @@ static int pvc_create(struct net *net, struct socket *sock, int protocol,
136 return -EAFNOSUPPORT; 136 return -EAFNOSUPPORT;
137 137
138 sock->ops = &pvc_proto_ops; 138 sock->ops = &pvc_proto_ops;
139 return vcc_create(net, sock, protocol, PF_ATMPVC); 139 return vcc_create(net, sock, protocol, PF_ATMPVC, kern);
140} 140}
141 141
142static const struct net_proto_family pvc_family_ops = { 142static const struct net_proto_family pvc_family_ops = {
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 1ba23f5018e7..3fa0a9ee98d1 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -660,7 +660,7 @@ static int svc_create(struct net *net, struct socket *sock, int protocol,
660 return -EAFNOSUPPORT; 660 return -EAFNOSUPPORT;
661 661
662 sock->ops = &svc_proto_ops; 662 sock->ops = &svc_proto_ops;
663 error = vcc_create(net, sock, protocol, AF_ATMSVC); 663 error = vcc_create(net, sock, protocol, AF_ATMSVC, kern);
664 if (error) 664 if (error)
665 return error; 665 return error;
666 ATM_SD(sock)->local.sas_family = AF_ATMSVC; 666 ATM_SD(sock)->local.sas_family = AF_ATMSVC;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 330c1f4a5a0b..ae3a47f9d1d5 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -40,7 +40,6 @@
40#include <linux/notifier.h> 40#include <linux/notifier.h>
41#include <linux/proc_fs.h> 41#include <linux/proc_fs.h>
42#include <linux/stat.h> 42#include <linux/stat.h>
43#include <linux/netfilter.h>
44#include <linux/sysctl.h> 43#include <linux/sysctl.h>
45#include <linux/init.h> 44#include <linux/init.h>
46#include <linux/spinlock.h> 45#include <linux/spinlock.h>
@@ -58,7 +57,7 @@ static const struct proto_ops ax25_proto_ops;
58 57
59static void ax25_free_sock(struct sock *sk) 58static void ax25_free_sock(struct sock *sk)
60{ 59{
61 ax25_cb_put(ax25_sk(sk)); 60 ax25_cb_put(sk_to_ax25(sk));
62} 61}
63 62
64/* 63/*
@@ -307,7 +306,7 @@ void ax25_destroy_socket(ax25_cb *ax25)
307 while ((skb = skb_dequeue(&ax25->sk->sk_receive_queue)) != NULL) { 306 while ((skb = skb_dequeue(&ax25->sk->sk_receive_queue)) != NULL) {
308 if (skb->sk != ax25->sk) { 307 if (skb->sk != ax25->sk) {
309 /* A pending connection */ 308 /* A pending connection */
310 ax25_cb *sax25 = ax25_sk(skb->sk); 309 ax25_cb *sax25 = sk_to_ax25(skb->sk);
311 310
312 /* Queue the unaccepted socket for death */ 311 /* Queue the unaccepted socket for death */
313 sock_orphan(skb->sk); 312 sock_orphan(skb->sk);
@@ -552,7 +551,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
552 return -EFAULT; 551 return -EFAULT;
553 552
554 lock_sock(sk); 553 lock_sock(sk);
555 ax25 = ax25_sk(sk); 554 ax25 = sk_to_ax25(sk);
556 555
557 switch (optname) { 556 switch (optname) {
558 case AX25_WINDOW: 557 case AX25_WINDOW:
@@ -698,7 +697,7 @@ static int ax25_getsockopt(struct socket *sock, int level, int optname,
698 length = min_t(unsigned int, maxlen, sizeof(int)); 697 length = min_t(unsigned int, maxlen, sizeof(int));
699 698
700 lock_sock(sk); 699 lock_sock(sk);
701 ax25 = ax25_sk(sk); 700 ax25 = sk_to_ax25(sk);
702 701
703 switch (optname) { 702 switch (optname) {
704 case AX25_WINDOW: 703 case AX25_WINDOW:
@@ -797,7 +796,7 @@ out:
797static struct proto ax25_proto = { 796static struct proto ax25_proto = {
798 .name = "AX25", 797 .name = "AX25",
799 .owner = THIS_MODULE, 798 .owner = THIS_MODULE,
800 .obj_size = sizeof(struct sock), 799 .obj_size = sizeof(struct ax25_sock),
801}; 800};
802 801
803static int ax25_create(struct net *net, struct socket *sock, int protocol, 802static int ax25_create(struct net *net, struct socket *sock, int protocol,
@@ -855,11 +854,11 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
855 return -ESOCKTNOSUPPORT; 854 return -ESOCKTNOSUPPORT;
856 } 855 }
857 856
858 sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto); 857 sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, kern);
859 if (sk == NULL) 858 if (sk == NULL)
860 return -ENOMEM; 859 return -ENOMEM;
861 860
862 ax25 = sk->sk_protinfo = ax25_create_cb(); 861 ax25 = ax25_sk(sk)->cb = ax25_create_cb();
863 if (!ax25) { 862 if (!ax25) {
864 sk_free(sk); 863 sk_free(sk);
865 return -ENOMEM; 864 return -ENOMEM;
@@ -881,7 +880,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
881 struct sock *sk; 880 struct sock *sk;
882 ax25_cb *ax25, *oax25; 881 ax25_cb *ax25, *oax25;
883 882
884 sk = sk_alloc(sock_net(osk), PF_AX25, GFP_ATOMIC, osk->sk_prot); 883 sk = sk_alloc(sock_net(osk), PF_AX25, GFP_ATOMIC, osk->sk_prot, 0);
885 if (sk == NULL) 884 if (sk == NULL)
886 return NULL; 885 return NULL;
887 886
@@ -911,7 +910,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
911 sk->sk_state = TCP_ESTABLISHED; 910 sk->sk_state = TCP_ESTABLISHED;
912 sock_copy_flags(sk, osk); 911 sock_copy_flags(sk, osk);
913 912
914 oax25 = ax25_sk(osk); 913 oax25 = sk_to_ax25(osk);
915 914
916 ax25->modulus = oax25->modulus; 915 ax25->modulus = oax25->modulus;
917 ax25->backoff = oax25->backoff; 916 ax25->backoff = oax25->backoff;
@@ -939,7 +938,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
939 } 938 }
940 } 939 }
941 940
942 sk->sk_protinfo = ax25; 941 ax25_sk(sk)->cb = ax25;
943 sk->sk_destruct = ax25_free_sock; 942 sk->sk_destruct = ax25_free_sock;
944 ax25->sk = sk; 943 ax25->sk = sk;
945 944
@@ -957,7 +956,7 @@ static int ax25_release(struct socket *sock)
957 sock_hold(sk); 956 sock_hold(sk);
958 sock_orphan(sk); 957 sock_orphan(sk);
959 lock_sock(sk); 958 lock_sock(sk);
960 ax25 = ax25_sk(sk); 959 ax25 = sk_to_ax25(sk);
961 960
962 if (sk->sk_type == SOCK_SEQPACKET) { 961 if (sk->sk_type == SOCK_SEQPACKET) {
963 switch (ax25->state) { 962 switch (ax25->state) {
@@ -1067,7 +1066,7 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1067 1066
1068 lock_sock(sk); 1067 lock_sock(sk);
1069 1068
1070 ax25 = ax25_sk(sk); 1069 ax25 = sk_to_ax25(sk);
1071 if (!sock_flag(sk, SOCK_ZAPPED)) { 1070 if (!sock_flag(sk, SOCK_ZAPPED)) {
1072 err = -EINVAL; 1071 err = -EINVAL;
1073 goto out; 1072 goto out;
@@ -1114,7 +1113,7 @@ static int __must_check ax25_connect(struct socket *sock,
1114 struct sockaddr *uaddr, int addr_len, int flags) 1113 struct sockaddr *uaddr, int addr_len, int flags)
1115{ 1114{
1116 struct sock *sk = sock->sk; 1115 struct sock *sk = sock->sk;
1117 ax25_cb *ax25 = ax25_sk(sk), *ax25t; 1116 ax25_cb *ax25 = sk_to_ax25(sk), *ax25t;
1118 struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr; 1117 struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr;
1119 ax25_digi *digi = NULL; 1118 ax25_digi *digi = NULL;
1120 int ct = 0, err = 0; 1119 int ct = 0, err = 0;
@@ -1395,7 +1394,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
1395 1394
1396 memset(fsa, 0, sizeof(*fsa)); 1395 memset(fsa, 0, sizeof(*fsa));
1397 lock_sock(sk); 1396 lock_sock(sk);
1398 ax25 = ax25_sk(sk); 1397 ax25 = sk_to_ax25(sk);
1399 1398
1400 if (peer != 0) { 1399 if (peer != 0) {
1401 if (sk->sk_state != TCP_ESTABLISHED) { 1400 if (sk->sk_state != TCP_ESTABLISHED) {
@@ -1447,7 +1446,7 @@ static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1447 return -EINVAL; 1446 return -EINVAL;
1448 1447
1449 lock_sock(sk); 1448 lock_sock(sk);
1450 ax25 = ax25_sk(sk); 1449 ax25 = sk_to_ax25(sk);
1451 1450
1452 if (sock_flag(sk, SOCK_ZAPPED)) { 1451 if (sock_flag(sk, SOCK_ZAPPED)) {
1453 err = -EADDRNOTAVAIL; 1452 err = -EADDRNOTAVAIL;
@@ -1622,7 +1621,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
1622 if (skb == NULL) 1621 if (skb == NULL)
1623 goto out; 1622 goto out;
1624 1623
1625 if (!ax25_sk(sk)->pidincl) 1624 if (!sk_to_ax25(sk)->pidincl)
1626 skb_pull(skb, 1); /* Remove PID */ 1625 skb_pull(skb, 1); /* Remove PID */
1627 1626
1628 skb_reset_transport_header(skb); 1627 skb_reset_transport_header(skb);
@@ -1763,7 +1762,7 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1763 1762
1764 case SIOCAX25GETINFO: 1763 case SIOCAX25GETINFO:
1765 case SIOCAX25GETINFOOLD: { 1764 case SIOCAX25GETINFOOLD: {
1766 ax25_cb *ax25 = ax25_sk(sk); 1765 ax25_cb *ax25 = sk_to_ax25(sk);
1767 struct ax25_info_struct ax25_info; 1766 struct ax25_info_struct ax25_info;
1768 1767
1769 ax25_info.t1 = ax25->t1 / HZ; 1768 ax25_info.t1 = ax25->t1 / HZ;
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 7ed8ab724819..bb5a0e4e98d9 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -23,7 +23,6 @@
23#include <linux/inet.h> 23#include <linux/inet.h>
24#include <linux/netdevice.h> 24#include <linux/netdevice.h>
25#include <linux/skbuff.h> 25#include <linux/skbuff.h>
26#include <linux/netfilter.h>
27#include <net/sock.h> 26#include <net/sock.h>
28#include <net/tcp_states.h> 27#include <net/tcp_states.h>
29#include <asm/uaccess.h> 28#include <asm/uaccess.h>
@@ -354,7 +353,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
354 return 0; 353 return 0;
355 } 354 }
356 355
357 ax25 = ax25_sk(make); 356 ax25 = sk_to_ax25(make);
358 skb_set_owner_r(skb, make); 357 skb_set_owner_r(skb, make);
359 skb_queue_head(&sk->sk_receive_queue, skb); 358 skb_queue_head(&sk->sk_receive_queue, skb);
360 359
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 7c646bb2c6f7..b563a3f5f2a8 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -31,7 +31,6 @@
31#include <linux/notifier.h> 31#include <linux/notifier.h>
32#include <linux/proc_fs.h> 32#include <linux/proc_fs.h>
33#include <linux/stat.h> 33#include <linux/stat.h>
34#include <linux/netfilter.h>
35#include <linux/sysctl.h> 34#include <linux/sysctl.h>
36#include <net/ip.h> 35#include <net/ip.h>
37#include <net/arp.h> 36#include <net/arp.h>
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index be2acab9be9d..8ddd41baa81c 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -24,7 +24,6 @@
24#include <linux/inet.h> 24#include <linux/inet.h>
25#include <linux/netdevice.h> 25#include <linux/netdevice.h>
26#include <linux/skbuff.h> 26#include <linux/skbuff.h>
27#include <linux/netfilter.h>
28#include <net/sock.h> 27#include <net/sock.h>
29#include <asm/uaccess.h> 28#include <asm/uaccess.h>
30#include <linux/fcntl.h> 29#include <linux/fcntl.h>
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index 71c4badbc807..4ad2fb7bcd35 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -34,7 +34,6 @@
34#include <linux/proc_fs.h> 34#include <linux/proc_fs.h>
35#include <linux/seq_file.h> 35#include <linux/seq_file.h>
36#include <linux/stat.h> 36#include <linux/stat.h>
37#include <linux/netfilter.h>
38#include <linux/sysctl.h> 37#include <linux/sysctl.h>
39#include <linux/export.h> 38#include <linux/export.h>
40#include <net/ip.h> 39#include <net/ip.h>
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index eb7d8c0388e4..21434ab79d2c 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
1# 1#
2# Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 2# Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
3# 3#
4# Marek Lindner, Simon Wunderlich 4# Marek Lindner, Simon Wunderlich
5# 5#
@@ -20,7 +20,7 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv.o
20batman-adv-y += bat_iv_ogm.o 20batman-adv-y += bat_iv_ogm.o
21batman-adv-y += bitarray.o 21batman-adv-y += bitarray.o
22batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o 22batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o
23batman-adv-y += debugfs.o 23batman-adv-$(CONFIG_DEBUG_FS) += debugfs.o
24batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o 24batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o
25batman-adv-y += fragmentation.o 25batman-adv-y += fragmentation.o
26batman-adv-y += gateway_client.o 26batman-adv-y += gateway_client.o
@@ -29,6 +29,7 @@ batman-adv-y += hard-interface.o
29batman-adv-y += hash.o 29batman-adv-y += hash.o
30batman-adv-y += icmp_socket.o 30batman-adv-y += icmp_socket.o
31batman-adv-y += main.o 31batman-adv-y += main.o
32batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
32batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o 33batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o
33batman-adv-y += originator.o 34batman-adv-y += originator.o
34batman-adv-y += routing.o 35batman-adv-y += routing.o
@@ -36,4 +37,3 @@ batman-adv-y += send.o
36batman-adv-y += soft-interface.o 37batman-adv-y += soft-interface.o
37batman-adv-y += sysfs.o 38batman-adv-y += sysfs.o
38batman-adv-y += translation-table.o 39batman-adv-y += translation-table.o
39batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 4e49666f8c65..4e59cf3eb079 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 00e00e09b000..753383c2215c 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -15,20 +15,50 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "bat_algo.h"
18#include "main.h" 19#include "main.h"
19#include "translation-table.h" 20
21#include <linux/atomic.h>
22#include <linux/bitmap.h>
23#include <linux/bitops.h>
24#include <linux/bug.h>
25#include <linux/byteorder/generic.h>
26#include <linux/cache.h>
27#include <linux/errno.h>
28#include <linux/etherdevice.h>
29#include <linux/fs.h>
30#include <linux/if_ether.h>
31#include <linux/init.h>
32#include <linux/jiffies.h>
33#include <linux/list.h>
34#include <linux/netdevice.h>
35#include <linux/pkt_sched.h>
36#include <linux/printk.h>
37#include <linux/random.h>
38#include <linux/rculist.h>
39#include <linux/rcupdate.h>
40#include <linux/seq_file.h>
41#include <linux/skbuff.h>
42#include <linux/slab.h>
43#include <linux/spinlock.h>
44#include <linux/stddef.h>
45#include <linux/string.h>
46#include <linux/types.h>
47#include <linux/workqueue.h>
48
49#include "bitarray.h"
50#include "hard-interface.h"
51#include "hash.h"
52#include "network-coding.h"
20#include "originator.h" 53#include "originator.h"
54#include "packet.h"
21#include "routing.h" 55#include "routing.h"
22#include "gateway_common.h"
23#include "gateway_client.h"
24#include "hard-interface.h"
25#include "send.h" 56#include "send.h"
26#include "bat_algo.h" 57#include "translation-table.h"
27#include "network-coding.h"
28 58
29/** 59/**
30 * enum batadv_dup_status - duplicate status 60 * enum batadv_dup_status - duplicate status
31 * @BATADV_NO_DUP: the packet is a duplicate 61 * @BATADV_NO_DUP: the packet is no duplicate
32 * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the 62 * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the
33 * neighbor) 63 * neighbor)
34 * @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor 64 * @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor
@@ -55,7 +85,7 @@ static void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index,
55} 85}
56 86
57/** 87/**
58 * batadv_ring_buffer_set - compute the average of all non-zero values stored 88 * batadv_ring_buffer_avg - compute the average of all non-zero values stored
59 * in the given ring buffer 89 * in the given ring buffer
60 * @lq_recv: pointer to the ring buffer 90 * @lq_recv: pointer to the ring buffer
61 * 91 *
@@ -64,7 +94,9 @@ static void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index,
64static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]) 94static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[])
65{ 95{
66 const uint8_t *ptr; 96 const uint8_t *ptr;
67 uint16_t count = 0, i = 0, sum = 0; 97 uint16_t count = 0;
98 uint16_t i = 0;
99 uint16_t sum = 0;
68 100
69 ptr = lq_recv; 101 ptr = lq_recv;
70 102
@@ -308,7 +340,6 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
308 struct batadv_ogm_packet *batadv_ogm_packet; 340 struct batadv_ogm_packet *batadv_ogm_packet;
309 unsigned char *ogm_buff; 341 unsigned char *ogm_buff;
310 uint32_t random_seqno; 342 uint32_t random_seqno;
311 int res = -ENOMEM;
312 343
313 /* randomize initial seqno to avoid collision */ 344 /* randomize initial seqno to avoid collision */
314 get_random_bytes(&random_seqno, sizeof(random_seqno)); 345 get_random_bytes(&random_seqno, sizeof(random_seqno));
@@ -317,7 +348,7 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
317 hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN; 348 hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN;
318 ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC); 349 ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC);
319 if (!ogm_buff) 350 if (!ogm_buff)
320 goto out; 351 return -ENOMEM;
321 352
322 hard_iface->bat_iv.ogm_buff = ogm_buff; 353 hard_iface->bat_iv.ogm_buff = ogm_buff;
323 354
@@ -329,10 +360,7 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
329 batadv_ogm_packet->reserved = 0; 360 batadv_ogm_packet->reserved = 0;
330 batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE; 361 batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE;
331 362
332 res = 0; 363 return 0;
333
334out:
335 return res;
336} 364}
337 365
338static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface) 366static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface)
@@ -396,8 +424,8 @@ static uint8_t batadv_hop_penalty(uint8_t tq,
396} 424}
397 425
398/* is there another aggregated packet here? */ 426/* is there another aggregated packet here? */
399static int batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len, 427static bool batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len,
400 __be16 tvlv_len) 428 __be16 tvlv_len)
401{ 429{
402 int next_buff_pos = 0; 430 int next_buff_pos = 0;
403 431
@@ -413,7 +441,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
413 struct batadv_hard_iface *hard_iface) 441 struct batadv_hard_iface *hard_iface)
414{ 442{
415 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); 443 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
416 char *fwd_str; 444 const char *fwd_str;
417 uint8_t packet_num; 445 uint8_t packet_num;
418 int16_t buff_pos; 446 int16_t buff_pos;
419 struct batadv_ogm_packet *batadv_ogm_packet; 447 struct batadv_ogm_packet *batadv_ogm_packet;
@@ -451,7 +479,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
451 batadv_ogm_packet->orig, 479 batadv_ogm_packet->orig,
452 ntohl(batadv_ogm_packet->seqno), 480 ntohl(batadv_ogm_packet->seqno),
453 batadv_ogm_packet->tq, batadv_ogm_packet->ttl, 481 batadv_ogm_packet->tq, batadv_ogm_packet->ttl,
454 (batadv_ogm_packet->flags & BATADV_DIRECTLINK ? 482 ((batadv_ogm_packet->flags & BATADV_DIRECTLINK) ?
455 "on" : "off"), 483 "on" : "off"),
456 hard_iface->net_dev->name, 484 hard_iface->net_dev->name,
457 hard_iface->net_dev->dev_addr); 485 hard_iface->net_dev->dev_addr);
@@ -548,58 +576,62 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet,
548 * - the send time is within our MAX_AGGREGATION_MS time 576 * - the send time is within our MAX_AGGREGATION_MS time
549 * - the resulting packet wont be bigger than 577 * - the resulting packet wont be bigger than
550 * MAX_AGGREGATION_BYTES 578 * MAX_AGGREGATION_BYTES
579 * otherwise aggregation is not possible
551 */ 580 */
552 if (time_before(send_time, forw_packet->send_time) && 581 if (!time_before(send_time, forw_packet->send_time) ||
553 time_after_eq(aggregation_end_time, forw_packet->send_time) && 582 !time_after_eq(aggregation_end_time, forw_packet->send_time))
554 (aggregated_bytes <= BATADV_MAX_AGGREGATION_BYTES)) { 583 return false;
555 /* check aggregation compatibility 584
556 * -> direct link packets are broadcasted on 585 if (aggregated_bytes > BATADV_MAX_AGGREGATION_BYTES)
557 * their interface only 586 return false;
558 * -> aggregate packet if the current packet is 587
559 * a "global" packet as well as the base 588 /* packet is not leaving on the same interface. */
560 * packet 589 if (forw_packet->if_outgoing != if_outgoing)
561 */ 590 return false;
562 primary_if = batadv_primary_if_get_selected(bat_priv); 591
563 if (!primary_if) 592 /* check aggregation compatibility
564 goto out; 593 * -> direct link packets are broadcasted on
565 594 * their interface only
566 /* packet is not leaving on the same interface. */ 595 * -> aggregate packet if the current packet is
567 if (forw_packet->if_outgoing != if_outgoing) 596 * a "global" packet as well as the base
568 goto out; 597 * packet
598 */
599 primary_if = batadv_primary_if_get_selected(bat_priv);
600 if (!primary_if)
601 return false;
569 602
570 /* packets without direct link flag and high TTL 603 /* packets without direct link flag and high TTL
571 * are flooded through the net 604 * are flooded through the net
572 */ 605 */
573 if ((!directlink) && 606 if (!directlink &&
574 (!(batadv_ogm_packet->flags & BATADV_DIRECTLINK)) && 607 !(batadv_ogm_packet->flags & BATADV_DIRECTLINK) &&
575 (batadv_ogm_packet->ttl != 1) && 608 batadv_ogm_packet->ttl != 1 &&
576 609
577 /* own packets originating non-primary 610 /* own packets originating non-primary
578 * interfaces leave only that interface 611 * interfaces leave only that interface
579 */ 612 */
580 ((!forw_packet->own) || 613 (!forw_packet->own ||
581 (forw_packet->if_incoming == primary_if))) { 614 forw_packet->if_incoming == primary_if)) {
582 res = true; 615 res = true;
583 goto out; 616 goto out;
584 } 617 }
585 618
586 /* if the incoming packet is sent via this one 619 /* if the incoming packet is sent via this one
587 * interface only - we still can aggregate 620 * interface only - we still can aggregate
588 */ 621 */
589 if ((directlink) && 622 if (directlink &&
590 (new_bat_ogm_packet->ttl == 1) && 623 new_bat_ogm_packet->ttl == 1 &&
591 (forw_packet->if_incoming == if_incoming) && 624 forw_packet->if_incoming == if_incoming &&
592 625
593 /* packets from direct neighbors or 626 /* packets from direct neighbors or
594 * own secondary interface packets 627 * own secondary interface packets
595 * (= secondary interface packets in general) 628 * (= secondary interface packets in general)
596 */ 629 */
597 (batadv_ogm_packet->flags & BATADV_DIRECTLINK || 630 (batadv_ogm_packet->flags & BATADV_DIRECTLINK ||
598 (forw_packet->own && 631 (forw_packet->own &&
599 forw_packet->if_incoming != primary_if))) { 632 forw_packet->if_incoming != primary_if))) {
600 res = true; 633 res = true;
601 goto out; 634 goto out;
602 }
603 } 635 }
604 636
605out: 637out:
@@ -642,19 +674,16 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
642 if (!batadv_atomic_dec_not_zero(&bat_priv->batman_queue_left)) { 674 if (!batadv_atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
643 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 675 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
644 "batman packet queue full\n"); 676 "batman packet queue full\n");
645 goto out; 677 goto out_free_outgoing;
646 } 678 }
647 } 679 }
648 680
649 forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC); 681 forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC);
650 if (!forw_packet_aggr) { 682 if (!forw_packet_aggr)
651 if (!own_packet) 683 goto out_nomem;
652 atomic_inc(&bat_priv->batman_queue_left);
653 goto out;
654 }
655 684
656 if ((atomic_read(&bat_priv->aggregated_ogms)) && 685 if (atomic_read(&bat_priv->aggregated_ogms) &&
657 (packet_len < BATADV_MAX_AGGREGATION_BYTES)) 686 packet_len < BATADV_MAX_AGGREGATION_BYTES)
658 skb_size = BATADV_MAX_AGGREGATION_BYTES; 687 skb_size = BATADV_MAX_AGGREGATION_BYTES;
659 else 688 else
660 skb_size = packet_len; 689 skb_size = packet_len;
@@ -662,12 +691,8 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
662 skb_size += ETH_HLEN; 691 skb_size += ETH_HLEN;
663 692
664 forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size); 693 forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size);
665 if (!forw_packet_aggr->skb) { 694 if (!forw_packet_aggr->skb)
666 if (!own_packet) 695 goto out_free_forw_packet;
667 atomic_inc(&bat_priv->batman_queue_left);
668 kfree(forw_packet_aggr);
669 goto out;
670 }
671 forw_packet_aggr->skb->priority = TC_PRIO_CONTROL; 696 forw_packet_aggr->skb->priority = TC_PRIO_CONTROL;
672 skb_reserve(forw_packet_aggr->skb, ETH_HLEN); 697 skb_reserve(forw_packet_aggr->skb, ETH_HLEN);
673 698
@@ -699,7 +724,12 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
699 send_time - jiffies); 724 send_time - jiffies);
700 725
701 return; 726 return;
702out: 727out_free_forw_packet:
728 kfree(forw_packet_aggr);
729out_nomem:
730 if (!own_packet)
731 atomic_inc(&bat_priv->batman_queue_left);
732out_free_outgoing:
703 batadv_hardif_free_ref(if_outgoing); 733 batadv_hardif_free_ref(if_outgoing);
704out_free_incoming: 734out_free_incoming:
705 batadv_hardif_free_ref(if_incoming); 735 batadv_hardif_free_ref(if_incoming);
@@ -752,13 +782,13 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv,
752 unsigned long max_aggregation_jiffies; 782 unsigned long max_aggregation_jiffies;
753 783
754 batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff; 784 batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff;
755 direct_link = batadv_ogm_packet->flags & BATADV_DIRECTLINK ? 1 : 0; 785 direct_link = !!(batadv_ogm_packet->flags & BATADV_DIRECTLINK);
756 max_aggregation_jiffies = msecs_to_jiffies(BATADV_MAX_AGGREGATION_MS); 786 max_aggregation_jiffies = msecs_to_jiffies(BATADV_MAX_AGGREGATION_MS);
757 787
758 /* find position for the packet in the forward queue */ 788 /* find position for the packet in the forward queue */
759 spin_lock_bh(&bat_priv->forw_bat_list_lock); 789 spin_lock_bh(&bat_priv->forw_bat_list_lock);
760 /* own packets are not to be aggregated */ 790 /* own packets are not to be aggregated */
761 if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) { 791 if (atomic_read(&bat_priv->aggregated_ogms) && !own_packet) {
762 hlist_for_each_entry(forw_packet_pos, 792 hlist_for_each_entry(forw_packet_pos,
763 &bat_priv->forw_bat_list, list) { 793 &bat_priv->forw_bat_list, list) {
764 if (batadv_iv_ogm_can_aggregate(batadv_ogm_packet, 794 if (batadv_iv_ogm_can_aggregate(batadv_ogm_packet,
@@ -1034,9 +1064,10 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
1034 batadv_orig_node_free_ref(orig_tmp); 1064 batadv_orig_node_free_ref(orig_tmp);
1035 if (!neigh_node) 1065 if (!neigh_node)
1036 goto unlock; 1066 goto unlock;
1037 } else 1067 } else {
1038 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 1068 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
1039 "Updating existing last-hop neighbor of originator\n"); 1069 "Updating existing last-hop neighbor of originator\n");
1070 }
1040 1071
1041 rcu_read_unlock(); 1072 rcu_read_unlock();
1042 neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); 1073 neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
@@ -1081,7 +1112,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
1081 * won't consider it either 1112 * won't consider it either
1082 */ 1113 */
1083 if (router_ifinfo && 1114 if (router_ifinfo &&
1084 (neigh_ifinfo->bat_iv.tq_avg == router_ifinfo->bat_iv.tq_avg)) { 1115 neigh_ifinfo->bat_iv.tq_avg == router_ifinfo->bat_iv.tq_avg) {
1085 orig_node_tmp = router->orig_node; 1116 orig_node_tmp = router->orig_node;
1086 spin_lock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock); 1117 spin_lock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock);
1087 if_num = router->if_incoming->if_num; 1118 if_num = router->if_incoming->if_num;
@@ -1356,8 +1387,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
1356out: 1387out:
1357 spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); 1388 spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
1358 batadv_orig_node_free_ref(orig_node); 1389 batadv_orig_node_free_ref(orig_node);
1359 if (orig_ifinfo) 1390 batadv_orig_ifinfo_free_ref(orig_ifinfo);
1360 batadv_orig_ifinfo_free_ref(orig_ifinfo);
1361 return ret; 1391 return ret;
1362} 1392}
1363 1393
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index e3da07a64026..cf68c328345e 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2006-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Simon Wunderlich, Marek Lindner 3 * Simon Wunderlich, Marek Lindner
4 * 4 *
@@ -15,10 +15,10 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "main.h"
19#include "bitarray.h" 18#include "bitarray.h"
19#include "main.h"
20 20
21#include <linux/bitops.h> 21#include <linux/bitmap.h>
22 22
23/* shift the packet array by n places. */ 23/* shift the packet array by n places. */
24static void batadv_bitmap_shift_left(unsigned long *seq_bits, int32_t n) 24static void batadv_bitmap_shift_left(unsigned long *seq_bits, int32_t n)
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index 2acaafe60188..0c2456225fae 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2006-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Simon Wunderlich, Marek Lindner 3 * Simon Wunderlich, Marek Lindner
4 * 4 *
@@ -18,6 +18,12 @@
18#ifndef _NET_BATMAN_ADV_BITARRAY_H_ 18#ifndef _NET_BATMAN_ADV_BITARRAY_H_
19#define _NET_BATMAN_ADV_BITARRAY_H_ 19#define _NET_BATMAN_ADV_BITARRAY_H_
20 20
21#include "main.h"
22
23#include <linux/bitops.h>
24#include <linux/compiler.h>
25#include <linux/types.h>
26
21/* Returns 1 if the corresponding bit in the given seq_bits indicates true 27/* Returns 1 if the corresponding bit in the given seq_bits indicates true
22 * and curr_seqno is within range of last_seqno. Otherwise returns 0. 28 * and curr_seqno is within range of last_seqno. Otherwise returns 0.
23 */ 29 */
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index ac4b96eccade..ba0609292ae7 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Simon Wunderlich 3 * Simon Wunderlich
4 * 4 *
@@ -15,19 +15,41 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "main.h"
19#include "hash.h"
20#include "hard-interface.h"
21#include "originator.h"
22#include "bridge_loop_avoidance.h" 18#include "bridge_loop_avoidance.h"
23#include "translation-table.h" 19#include "main.h"
24#include "send.h"
25 20
26#include <linux/etherdevice.h> 21#include <linux/atomic.h>
22#include <linux/byteorder/generic.h>
23#include <linux/compiler.h>
27#include <linux/crc16.h> 24#include <linux/crc16.h>
25#include <linux/errno.h>
26#include <linux/etherdevice.h>
27#include <linux/fs.h>
28#include <linux/if_arp.h> 28#include <linux/if_arp.h>
29#include <net/arp.h> 29#include <linux/if_ether.h>
30#include <linux/if_vlan.h> 30#include <linux/if_vlan.h>
31#include <linux/jhash.h>
32#include <linux/jiffies.h>
33#include <linux/kernel.h>
34#include <linux/list.h>
35#include <linux/lockdep.h>
36#include <linux/netdevice.h>
37#include <linux/rculist.h>
38#include <linux/rcupdate.h>
39#include <linux/seq_file.h>
40#include <linux/skbuff.h>
41#include <linux/slab.h>
42#include <linux/spinlock.h>
43#include <linux/stddef.h>
44#include <linux/string.h>
45#include <linux/workqueue.h>
46#include <net/arp.h>
47
48#include "hard-interface.h"
49#include "hash.h"
50#include "originator.h"
51#include "packet.h"
52#include "translation-table.h"
31 53
32static const uint8_t batadv_announce_mac[4] = {0x43, 0x05, 0x43, 0x05}; 54static const uint8_t batadv_announce_mac[4] = {0x43, 0x05, 0x43, 0x05};
33 55
@@ -42,12 +64,8 @@ static inline uint32_t batadv_choose_claim(const void *data, uint32_t size)
42 struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data; 64 struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
43 uint32_t hash = 0; 65 uint32_t hash = 0;
44 66
45 hash = batadv_hash_bytes(hash, &claim->addr, sizeof(claim->addr)); 67 hash = jhash(&claim->addr, sizeof(claim->addr), hash);
46 hash = batadv_hash_bytes(hash, &claim->vid, sizeof(claim->vid)); 68 hash = jhash(&claim->vid, sizeof(claim->vid), hash);
47
48 hash += (hash << 3);
49 hash ^= (hash >> 11);
50 hash += (hash << 15);
51 69
52 return hash % size; 70 return hash % size;
53} 71}
@@ -59,12 +77,8 @@ static inline uint32_t batadv_choose_backbone_gw(const void *data,
59 const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data; 77 const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
60 uint32_t hash = 0; 78 uint32_t hash = 0;
61 79
62 hash = batadv_hash_bytes(hash, &claim->addr, sizeof(claim->addr)); 80 hash = jhash(&claim->addr, sizeof(claim->addr), hash);
63 hash = batadv_hash_bytes(hash, &claim->vid, sizeof(claim->vid)); 81 hash = jhash(&claim->vid, sizeof(claim->vid), hash);
64
65 hash += (hash << 3);
66 hash ^= (hash >> 11);
67 hash += (hash << 15);
68 82
69 return hash % size; 83 return hash % size;
70} 84}
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 43c985d92c3e..0282690389ac 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Simon Wunderlich 3 * Simon Wunderlich
4 * 4 *
@@ -18,6 +18,16 @@
18#ifndef _NET_BATMAN_ADV_BLA_H_ 18#ifndef _NET_BATMAN_ADV_BLA_H_
19#define _NET_BATMAN_ADV_BLA_H_ 19#define _NET_BATMAN_ADV_BLA_H_
20 20
21#include "main.h"
22
23#include <linux/types.h>
24
25struct batadv_hard_iface;
26struct batadv_orig_node;
27struct batadv_priv;
28struct seq_file;
29struct sk_buff;
30
21#ifdef CONFIG_BATMAN_ADV_BLA 31#ifdef CONFIG_BATMAN_ADV_BLA
22int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, 32int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
23 unsigned short vid, bool is_bcast); 33 unsigned short vid, bool is_bcast);
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index a4972874c056..c4c1e8030ba0 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2010-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -15,21 +15,42 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "debugfs.h"
18#include "main.h" 19#include "main.h"
19 20
21#include <linux/compiler.h>
20#include <linux/debugfs.h> 22#include <linux/debugfs.h>
23#include <linux/device.h>
24#include <linux/errno.h>
25#include <linux/export.h>
26#include <linux/fcntl.h>
27#include <linux/fs.h>
28#include <linux/jiffies.h>
29#include <linux/kernel.h>
30#include <linux/module.h>
31#include <linux/netdevice.h>
32#include <linux/poll.h>
33#include <linux/printk.h>
34#include <linux/sched.h> /* for linux/wait.h */
35#include <linux/seq_file.h>
36#include <linux/slab.h>
37#include <linux/spinlock.h>
38#include <linux/stat.h>
39#include <linux/stddef.h>
40#include <linux/stringify.h>
41#include <linux/sysfs.h>
42#include <linux/types.h>
43#include <linux/uaccess.h>
44#include <linux/wait.h>
45#include <stdarg.h>
21 46
22#include "debugfs.h"
23#include "translation-table.h"
24#include "originator.h"
25#include "hard-interface.h"
26#include "gateway_common.h"
27#include "gateway_client.h"
28#include "soft-interface.h"
29#include "icmp_socket.h"
30#include "bridge_loop_avoidance.h" 47#include "bridge_loop_avoidance.h"
31#include "distributed-arp-table.h" 48#include "distributed-arp-table.h"
49#include "gateway_client.h"
50#include "icmp_socket.h"
32#include "network-coding.h" 51#include "network-coding.h"
52#include "originator.h"
53#include "translation-table.h"
33 54
34static struct dentry *batadv_debugfs; 55static struct dentry *batadv_debugfs;
35 56
@@ -482,11 +503,7 @@ rem_attr:
482 debugfs_remove_recursive(hard_iface->debug_dir); 503 debugfs_remove_recursive(hard_iface->debug_dir);
483 hard_iface->debug_dir = NULL; 504 hard_iface->debug_dir = NULL;
484out: 505out:
485#ifdef CONFIG_DEBUG_FS
486 return -ENOMEM; 506 return -ENOMEM;
487#else
488 return 0;
489#endif /* CONFIG_DEBUG_FS */
490} 507}
491 508
492/** 509/**
@@ -541,11 +558,7 @@ rem_attr:
541 debugfs_remove_recursive(bat_priv->debug_dir); 558 debugfs_remove_recursive(bat_priv->debug_dir);
542 bat_priv->debug_dir = NULL; 559 bat_priv->debug_dir = NULL;
543out: 560out:
544#ifdef CONFIG_DEBUG_FS
545 return -ENOMEM; 561 return -ENOMEM;
546#else
547 return 0;
548#endif /* CONFIG_DEBUG_FS */
549} 562}
550 563
551void batadv_debugfs_del_meshif(struct net_device *dev) 564void batadv_debugfs_del_meshif(struct net_device *dev)
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 37c4d6ddd04d..187acdc85dfa 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2010-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -18,8 +18,17 @@
18#ifndef _NET_BATMAN_ADV_DEBUGFS_H_ 18#ifndef _NET_BATMAN_ADV_DEBUGFS_H_
19#define _NET_BATMAN_ADV_DEBUGFS_H_ 19#define _NET_BATMAN_ADV_DEBUGFS_H_
20 20
21#include "main.h"
22
23#include <linux/kconfig.h>
24
25struct batadv_hard_iface;
26struct net_device;
27
21#define BATADV_DEBUGFS_SUBDIR "batman_adv" 28#define BATADV_DEBUGFS_SUBDIR "batman_adv"
22 29
30#if IS_ENABLED(CONFIG_DEBUG_FS)
31
23void batadv_debugfs_init(void); 32void batadv_debugfs_init(void);
24void batadv_debugfs_destroy(void); 33void batadv_debugfs_destroy(void);
25int batadv_debugfs_add_meshif(struct net_device *dev); 34int batadv_debugfs_add_meshif(struct net_device *dev);
@@ -27,4 +36,36 @@ void batadv_debugfs_del_meshif(struct net_device *dev);
27int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface); 36int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface);
28void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface); 37void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface);
29 38
39#else
40
41static inline void batadv_debugfs_init(void)
42{
43}
44
45static inline void batadv_debugfs_destroy(void)
46{
47}
48
49static inline int batadv_debugfs_add_meshif(struct net_device *dev)
50{
51 return 0;
52}
53
54static inline void batadv_debugfs_del_meshif(struct net_device *dev)
55{
56}
57
58static inline
59int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
60{
61 return 0;
62}
63
64static inline
65void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface)
66{
67}
68
69#endif
70
30#endif /* _NET_BATMAN_ADV_DEBUGFS_H_ */ 71#endif /* _NET_BATMAN_ADV_DEBUGFS_H_ */
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index aad022dd15df..fb54e6aed096 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Antonio Quartulli 3 * Antonio Quartulli
4 * 4 *
@@ -15,18 +15,36 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include <linux/if_ether.h> 18#include "distributed-arp-table.h"
19#include "main.h"
20
21#include <linux/atomic.h>
22#include <linux/byteorder/generic.h>
23#include <linux/errno.h>
24#include <linux/etherdevice.h>
25#include <linux/fs.h>
19#include <linux/if_arp.h> 26#include <linux/if_arp.h>
27#include <linux/if_ether.h>
20#include <linux/if_vlan.h> 28#include <linux/if_vlan.h>
29#include <linux/in.h>
30#include <linux/jiffies.h>
31#include <linux/kernel.h>
32#include <linux/list.h>
33#include <linux/rculist.h>
34#include <linux/rcupdate.h>
35#include <linux/seq_file.h>
36#include <linux/skbuff.h>
37#include <linux/slab.h>
38#include <linux/spinlock.h>
39#include <linux/stddef.h>
40#include <linux/string.h>
41#include <linux/workqueue.h>
21#include <net/arp.h> 42#include <net/arp.h>
22 43
23#include "main.h"
24#include "hash.h"
25#include "distributed-arp-table.h"
26#include "hard-interface.h" 44#include "hard-interface.h"
45#include "hash.h"
27#include "originator.h" 46#include "originator.h"
28#include "send.h" 47#include "send.h"
29#include "types.h"
30#include "translation-table.h" 48#include "translation-table.h"
31 49
32static void batadv_dat_purge(struct work_struct *work); 50static void batadv_dat_purge(struct work_struct *work);
@@ -206,9 +224,22 @@ static uint32_t batadv_hash_dat(const void *data, uint32_t size)
206{ 224{
207 uint32_t hash = 0; 225 uint32_t hash = 0;
208 const struct batadv_dat_entry *dat = data; 226 const struct batadv_dat_entry *dat = data;
227 const unsigned char *key;
228 uint32_t i;
209 229
210 hash = batadv_hash_bytes(hash, &dat->ip, sizeof(dat->ip)); 230 key = (const unsigned char *)&dat->ip;
211 hash = batadv_hash_bytes(hash, &dat->vid, sizeof(dat->vid)); 231 for (i = 0; i < sizeof(dat->ip); i++) {
232 hash += key[i];
233 hash += (hash << 10);
234 hash ^= (hash >> 6);
235 }
236
237 key = (const unsigned char *)&dat->vid;
238 for (i = 0; i < sizeof(dat->vid); i++) {
239 hash += key[i];
240 hash += (hash << 10);
241 hash ^= (hash >> 6);
242 }
212 243
213 hash += (hash << 3); 244 hash += (hash << 3);
214 hash ^= (hash >> 11); 245 hash ^= (hash >> 11);
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index 2fe0764c64be..3181507ebc14 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Antonio Quartulli 3 * Antonio Quartulli
4 * 4 *
@@ -18,12 +18,19 @@
18#ifndef _NET_BATMAN_ADV_DISTRIBUTED_ARP_TABLE_H_ 18#ifndef _NET_BATMAN_ADV_DISTRIBUTED_ARP_TABLE_H_
19#define _NET_BATMAN_ADV_DISTRIBUTED_ARP_TABLE_H_ 19#define _NET_BATMAN_ADV_DISTRIBUTED_ARP_TABLE_H_
20 20
21#ifdef CONFIG_BATMAN_ADV_DAT 21#include "main.h"
22
23#include <linux/compiler.h>
24#include <linux/netdevice.h>
25#include <linux/types.h>
22 26
23#include "types.h"
24#include "originator.h" 27#include "originator.h"
28#include "packet.h"
25 29
26#include <linux/if_arp.h> 30struct seq_file;
31struct sk_buff;
32
33#ifdef CONFIG_BATMAN_ADV_DAT
27 34
28/* BATADV_DAT_ADDR_MAX - maximum address value in the DHT space */ 35/* BATADV_DAT_ADDR_MAX - maximum address value in the DHT space */
29#define BATADV_DAT_ADDR_MAX ((batadv_dat_addr_t)~(batadv_dat_addr_t)0) 36#define BATADV_DAT_ADDR_MAX ((batadv_dat_addr_t)~(batadv_dat_addr_t)0)
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 3d1dcaa3e8b5..c0f0d01ab244 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2013-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2013-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Martin Hundebøll <martin@hundeboll.net> 3 * Martin Hundebøll <martin@hundeboll.net>
4 * 4 *
@@ -15,12 +15,28 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "main.h"
19#include "fragmentation.h" 18#include "fragmentation.h"
20#include "send.h" 19#include "main.h"
20
21#include <linux/atomic.h>
22#include <linux/byteorder/generic.h>
23#include <linux/etherdevice.h>
24#include <linux/fs.h>
25#include <linux/if_ether.h>
26#include <linux/jiffies.h>
27#include <linux/kernel.h>
28#include <linux/netdevice.h>
29#include <linux/pkt_sched.h>
30#include <linux/skbuff.h>
31#include <linux/slab.h>
32#include <linux/spinlock.h>
33#include <linux/string.h>
34
35#include "hard-interface.h"
21#include "originator.h" 36#include "originator.h"
37#include "packet.h"
22#include "routing.h" 38#include "routing.h"
23#include "hard-interface.h" 39#include "send.h"
24#include "soft-interface.h" 40#include "soft-interface.h"
25 41
26/** 42/**
@@ -161,6 +177,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
161 hlist_add_head(&frag_entry_new->list, &chain->head); 177 hlist_add_head(&frag_entry_new->list, &chain->head);
162 chain->size = skb->len - hdr_size; 178 chain->size = skb->len - hdr_size;
163 chain->timestamp = jiffies; 179 chain->timestamp = jiffies;
180 chain->total_size = ntohs(frag_packet->total_size);
164 ret = true; 181 ret = true;
165 goto out; 182 goto out;
166 } 183 }
@@ -195,9 +212,11 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
195 212
196out: 213out:
197 if (chain->size > batadv_frag_size_limit() || 214 if (chain->size > batadv_frag_size_limit() ||
198 ntohs(frag_packet->total_size) > batadv_frag_size_limit()) { 215 chain->total_size != ntohs(frag_packet->total_size) ||
216 chain->total_size > batadv_frag_size_limit()) {
199 /* Clear chain if total size of either the list or the packet 217 /* Clear chain if total size of either the list or the packet
200 * exceeds the maximum size of one merged packet. 218 * exceeds the maximum size of one merged packet. Don't allow
219 * packets to have different total_size.
201 */ 220 */
202 batadv_frag_clear_chain(&chain->head); 221 batadv_frag_clear_chain(&chain->head);
203 chain->size = 0; 222 chain->size = 0;
@@ -228,19 +247,13 @@ err:
228 * Returns the merged skb or NULL on error. 247 * Returns the merged skb or NULL on error.
229 */ 248 */
230static struct sk_buff * 249static struct sk_buff *
231batadv_frag_merge_packets(struct hlist_head *chain, struct sk_buff *skb) 250batadv_frag_merge_packets(struct hlist_head *chain)
232{ 251{
233 struct batadv_frag_packet *packet; 252 struct batadv_frag_packet *packet;
234 struct batadv_frag_list_entry *entry; 253 struct batadv_frag_list_entry *entry;
235 struct sk_buff *skb_out = NULL; 254 struct sk_buff *skb_out = NULL;
236 int size, hdr_size = sizeof(struct batadv_frag_packet); 255 int size, hdr_size = sizeof(struct batadv_frag_packet);
237 256
238 /* Make sure incoming skb has non-bogus data. */
239 packet = (struct batadv_frag_packet *)skb->data;
240 size = ntohs(packet->total_size);
241 if (size > batadv_frag_size_limit())
242 goto free;
243
244 /* Remove first entry, as this is the destination for the rest of the 257 /* Remove first entry, as this is the destination for the rest of the
245 * fragments. 258 * fragments.
246 */ 259 */
@@ -249,6 +262,9 @@ batadv_frag_merge_packets(struct hlist_head *chain, struct sk_buff *skb)
249 skb_out = entry->skb; 262 skb_out = entry->skb;
250 kfree(entry); 263 kfree(entry);
251 264
265 packet = (struct batadv_frag_packet *)skb_out->data;
266 size = ntohs(packet->total_size);
267
252 /* Make room for the rest of the fragments. */ 268 /* Make room for the rest of the fragments. */
253 if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) { 269 if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) {
254 kfree_skb(skb_out); 270 kfree_skb(skb_out);
@@ -304,7 +320,7 @@ bool batadv_frag_skb_buffer(struct sk_buff **skb,
304 if (hlist_empty(&head)) 320 if (hlist_empty(&head))
305 goto out; 321 goto out;
306 322
307 skb_out = batadv_frag_merge_packets(&head, *skb); 323 skb_out = batadv_frag_merge_packets(&head);
308 if (!skb_out) 324 if (!skb_out)
309 goto out_err; 325 goto out_err;
310 326
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index d848cf6676a2..8b9877e70b95 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2013-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2013-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Martin Hundebøll <martin@hundeboll.net> 3 * Martin Hundebøll <martin@hundeboll.net>
4 * 4 *
@@ -18,6 +18,15 @@
18#ifndef _NET_BATMAN_ADV_FRAGMENTATION_H_ 18#ifndef _NET_BATMAN_ADV_FRAGMENTATION_H_
19#define _NET_BATMAN_ADV_FRAGMENTATION_H_ 19#define _NET_BATMAN_ADV_FRAGMENTATION_H_
20 20
21#include "main.h"
22
23#include <linux/compiler.h>
24#include <linux/list.h>
25#include <linux/stddef.h>
26#include <linux/types.h>
27
28struct sk_buff;
29
21void batadv_frag_purge_orig(struct batadv_orig_node *orig, 30void batadv_frag_purge_orig(struct batadv_orig_node *orig,
22 bool (*check_cb)(struct batadv_frag_table_entry *)); 31 bool (*check_cb)(struct batadv_frag_table_entry *));
23bool batadv_frag_skb_fwd(struct sk_buff *skb, 32bool batadv_frag_skb_fwd(struct sk_buff *skb,
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 090828cf1fa7..bb0158620628 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -15,18 +15,38 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "main.h"
19#include "sysfs.h"
20#include "gateway_client.h" 18#include "gateway_client.h"
19#include "main.h"
20
21#include <linux/atomic.h>
22#include <linux/byteorder/generic.h>
23#include <linux/etherdevice.h>
24#include <linux/fs.h>
25#include <linux/if_ether.h>
26#include <linux/if_vlan.h>
27#include <linux/in.h>
28#include <linux/ip.h>
29#include <linux/ipv6.h>
30#include <linux/jiffies.h>
31#include <linux/kernel.h>
32#include <linux/list.h>
33#include <linux/netdevice.h>
34#include <linux/rculist.h>
35#include <linux/rcupdate.h>
36#include <linux/seq_file.h>
37#include <linux/skbuff.h>
38#include <linux/slab.h>
39#include <linux/spinlock.h>
40#include <linux/stddef.h>
41#include <linux/udp.h>
42
21#include "gateway_common.h" 43#include "gateway_common.h"
22#include "hard-interface.h" 44#include "hard-interface.h"
23#include "originator.h" 45#include "originator.h"
24#include "translation-table.h" 46#include "packet.h"
25#include "routing.h" 47#include "routing.h"
26#include <linux/ip.h> 48#include "sysfs.h"
27#include <linux/ipv6.h> 49#include "translation-table.h"
28#include <linux/udp.h>
29#include <linux/if_vlan.h>
30 50
31/* These are the offsets of the "hw type" and "hw address length" in the dhcp 51/* These are the offsets of the "hw type" and "hw address length" in the dhcp
32 * packet starting at the beginning of the dhcp header 52 * packet starting at the beginning of the dhcp header
@@ -733,11 +753,6 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
733 if (!pskb_may_pull(skb, *header_len + sizeof(*udphdr))) 753 if (!pskb_may_pull(skb, *header_len + sizeof(*udphdr)))
734 return BATADV_DHCP_NO; 754 return BATADV_DHCP_NO;
735 755
736 /* skb->data might have been reallocated by pskb_may_pull() */
737 ethhdr = eth_hdr(skb);
738 if (ntohs(ethhdr->h_proto) == ETH_P_8021Q)
739 ethhdr = (struct ethhdr *)(skb->data + VLAN_HLEN);
740
741 udphdr = (struct udphdr *)(skb->data + *header_len); 756 udphdr = (struct udphdr *)(skb->data + *header_len);
742 *header_len += sizeof(*udphdr); 757 *header_len += sizeof(*udphdr);
743 758
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 7ee53bb7d50f..89565b451c18 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -18,6 +18,14 @@
18#ifndef _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ 18#ifndef _NET_BATMAN_ADV_GATEWAY_CLIENT_H_
19#define _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ 19#define _NET_BATMAN_ADV_GATEWAY_CLIENT_H_
20 20
21#include "main.h"
22
23#include <linux/types.h>
24
25struct batadv_tvlv_gateway_data;
26struct seq_file;
27struct sk_buff;
28
21void batadv_gw_check_client_stop(struct batadv_priv *bat_priv); 29void batadv_gw_check_client_stop(struct batadv_priv *bat_priv);
22void batadv_gw_reselect(struct batadv_priv *bat_priv); 30void batadv_gw_reselect(struct batadv_priv *bat_priv);
23void batadv_gw_election(struct batadv_priv *bat_priv); 31void batadv_gw_election(struct batadv_priv *bat_priv);
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 88a1bc3804d1..39cf44ccebd4 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -15,9 +15,18 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "main.h"
19#include "gateway_common.h" 18#include "gateway_common.h"
19#include "main.h"
20
21#include <linux/atomic.h>
22#include <linux/byteorder/generic.h>
23#include <linux/kernel.h>
24#include <linux/netdevice.h>
25#include <linux/stddef.h>
26#include <linux/string.h>
27
20#include "gateway_client.h" 28#include "gateway_client.h"
29#include "packet.h"
21 30
22/** 31/**
23 * batadv_parse_gw_bandwidth - parse supplied string buffer to extract download 32 * batadv_parse_gw_bandwidth - parse supplied string buffer to extract download
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index aa5116561947..bd5c812cebf4 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -18,6 +18,13 @@
18#ifndef _NET_BATMAN_ADV_GATEWAY_COMMON_H_ 18#ifndef _NET_BATMAN_ADV_GATEWAY_COMMON_H_
19#define _NET_BATMAN_ADV_GATEWAY_COMMON_H_ 19#define _NET_BATMAN_ADV_GATEWAY_COMMON_H_
20 20
21#include "main.h"
22
23#include <linux/types.h>
24
25struct batadv_priv;
26struct net_device;
27
21enum batadv_gw_modes { 28enum batadv_gw_modes {
22 BATADV_GW_MODE_OFF, 29 BATADV_GW_MODE_OFF,
23 BATADV_GW_MODE_CLIENT, 30 BATADV_GW_MODE_CLIENT,
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index baf1f9843f2c..f4a15d2e5eaf 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -15,22 +15,36 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "main.h"
19#include "distributed-arp-table.h"
20#include "hard-interface.h" 18#include "hard-interface.h"
21#include "soft-interface.h" 19#include "main.h"
22#include "send.h"
23#include "translation-table.h"
24#include "routing.h"
25#include "sysfs.h"
26#include "debugfs.h"
27#include "originator.h"
28#include "hash.h"
29#include "bridge_loop_avoidance.h"
30#include "gateway_client.h"
31 20
21#include <linux/bug.h>
22#include <linux/byteorder/generic.h>
23#include <linux/errno.h>
24#include <linux/fs.h>
32#include <linux/if_arp.h> 25#include <linux/if_arp.h>
33#include <linux/if_ether.h> 26#include <linux/if_ether.h>
27#include <linux/if.h>
28#include <linux/kernel.h>
29#include <linux/list.h>
30#include <linux/netdevice.h>
31#include <linux/printk.h>
32#include <linux/rculist.h>
33#include <linux/rtnetlink.h>
34#include <linux/slab.h>
35#include <linux/workqueue.h>
36#include <net/net_namespace.h>
37
38#include "bridge_loop_avoidance.h"
39#include "debugfs.h"
40#include "distributed-arp-table.h"
41#include "gateway_client.h"
42#include "originator.h"
43#include "packet.h"
44#include "send.h"
45#include "soft-interface.h"
46#include "sysfs.h"
47#include "translation-table.h"
34 48
35void batadv_hardif_free_rcu(struct rcu_head *rcu) 49void batadv_hardif_free_rcu(struct rcu_head *rcu)
36{ 50{
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 1918cd50b62e..5a31420513e1 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -18,6 +18,17 @@
18#ifndef _NET_BATMAN_ADV_HARD_INTERFACE_H_ 18#ifndef _NET_BATMAN_ADV_HARD_INTERFACE_H_
19#define _NET_BATMAN_ADV_HARD_INTERFACE_H_ 19#define _NET_BATMAN_ADV_HARD_INTERFACE_H_
20 20
21#include "main.h"
22
23#include <linux/atomic.h>
24#include <linux/compiler.h>
25#include <linux/notifier.h>
26#include <linux/rcupdate.h>
27#include <linux/stddef.h>
28#include <linux/types.h>
29
30struct net_device;
31
21enum batadv_hard_if_state { 32enum batadv_hard_if_state {
22 BATADV_IF_NOT_IN_USE, 33 BATADV_IF_NOT_IN_USE,
23 BATADV_IF_TO_BE_REMOVED, 34 BATADV_IF_TO_BE_REMOVED,
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 7c1c63080e20..e89f3146b092 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2006-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Simon Wunderlich, Marek Lindner 3 * Simon Wunderlich, Marek Lindner
4 * 4 *
@@ -15,8 +15,12 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "main.h"
19#include "hash.h" 18#include "hash.h"
19#include "main.h"
20
21#include <linux/fs.h>
22#include <linux/lockdep.h>
23#include <linux/slab.h>
20 24
21/* clears the hash */ 25/* clears the hash */
22static void batadv_hash_init(struct batadv_hashtable *hash) 26static void batadv_hash_init(struct batadv_hashtable *hash)
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 539fc1266793..5065f50c9c3c 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2006-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Simon Wunderlich, Marek Lindner 3 * Simon Wunderlich, Marek Lindner
4 * 4 *
@@ -18,7 +18,16 @@
18#ifndef _NET_BATMAN_ADV_HASH_H_ 18#ifndef _NET_BATMAN_ADV_HASH_H_
19#define _NET_BATMAN_ADV_HASH_H_ 19#define _NET_BATMAN_ADV_HASH_H_
20 20
21#include "main.h"
22
23#include <linux/compiler.h>
21#include <linux/list.h> 24#include <linux/list.h>
25#include <linux/rculist.h>
26#include <linux/spinlock.h>
27#include <linux/stddef.h>
28#include <linux/types.h>
29
30struct lock_class_key;
22 31
23/* callback to a compare function. should compare 2 element datas for their 32/* callback to a compare function. should compare 2 element datas for their
24 * keys, return 0 if same and not 0 if not same 33 * keys, return 0 if same and not 0 if not same
@@ -80,28 +89,6 @@ static inline void batadv_hash_delete(struct batadv_hashtable *hash,
80} 89}
81 90
82/** 91/**
83 * batadv_hash_bytes - hash some bytes and add them to the previous hash
84 * @hash: previous hash value
85 * @data: data to be hashed
86 * @size: number of bytes to be hashed
87 *
88 * Returns the new hash value.
89 */
90static inline uint32_t batadv_hash_bytes(uint32_t hash, const void *data,
91 uint32_t size)
92{
93 const unsigned char *key = data;
94 int i;
95
96 for (i = 0; i < size; i++) {
97 hash += key[i];
98 hash += (hash << 10);
99 hash ^= (hash >> 6);
100 }
101 return hash;
102}
103
104/**
105 * batadv_hash_add - adds data to the hashtable 92 * batadv_hash_add - adds data to the hashtable
106 * @hash: storage hash table 93 * @hash: storage hash table
107 * @compare: callback to determine if 2 hash elements are identical 94 * @compare: callback to determine if 2 hash elements are identical
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 161ef8f17d2e..07061bcbaa04 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -15,14 +15,39 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "icmp_socket.h"
18#include "main.h" 19#include "main.h"
20
21#include <linux/atomic.h>
22#include <linux/compiler.h>
19#include <linux/debugfs.h> 23#include <linux/debugfs.h>
24#include <linux/errno.h>
25#include <linux/etherdevice.h>
26#include <linux/export.h>
27#include <linux/fcntl.h>
28#include <linux/fs.h>
29#include <linux/if_ether.h>
30#include <linux/kernel.h>
31#include <linux/list.h>
32#include <linux/module.h>
33#include <linux/netdevice.h>
34#include <linux/pkt_sched.h>
35#include <linux/poll.h>
36#include <linux/printk.h>
37#include <linux/sched.h> /* for linux/wait.h */
38#include <linux/skbuff.h>
20#include <linux/slab.h> 39#include <linux/slab.h>
21#include "icmp_socket.h" 40#include <linux/spinlock.h>
22#include "send.h" 41#include <linux/stat.h>
23#include "hash.h" 42#include <linux/stddef.h>
24#include "originator.h" 43#include <linux/string.h>
44#include <linux/uaccess.h>
45#include <linux/wait.h>
46
25#include "hard-interface.h" 47#include "hard-interface.h"
48#include "originator.h"
49#include "packet.h"
50#include "send.h"
26 51
27static struct batadv_socket_client *batadv_socket_client_hash[256]; 52static struct batadv_socket_client *batadv_socket_client_hash[256];
28 53
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 0c33950aa4aa..7de7fce4b48c 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -18,6 +18,13 @@
18#ifndef _NET_BATMAN_ADV_ICMP_SOCKET_H_ 18#ifndef _NET_BATMAN_ADV_ICMP_SOCKET_H_
19#define _NET_BATMAN_ADV_ICMP_SOCKET_H_ 19#define _NET_BATMAN_ADV_ICMP_SOCKET_H_
20 20
21#include "main.h"
22
23#include <linux/types.h>
24
25struct batadv_icmp_header;
26struct batadv_priv;
27
21#define BATADV_ICMP_SOCKET "socket" 28#define BATADV_ICMP_SOCKET "socket"
22 29
23void batadv_socket_init(void); 30void batadv_socket_init(void);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 12fc77bef23f..8457097f1643 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -15,31 +15,53 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "main.h"
19
20#include <linux/atomic.h>
21#include <linux/bug.h>
22#include <linux/byteorder/generic.h>
18#include <linux/crc32c.h> 23#include <linux/crc32c.h>
19#include <linux/highmem.h> 24#include <linux/errno.h>
25#include <linux/fs.h>
26#include <linux/if_ether.h>
20#include <linux/if_vlan.h> 27#include <linux/if_vlan.h>
21#include <net/ip.h> 28#include <linux/init.h>
22#include <net/ipv6.h> 29#include <linux/ip.h>
30#include <linux/ipv6.h>
31#include <linux/kernel.h>
32#include <linux/list.h>
33#include <linux/module.h>
34#include <linux/moduleparam.h>
35#include <linux/netdevice.h>
36#include <linux/pkt_sched.h>
37#include <linux/rculist.h>
38#include <linux/rcupdate.h>
39#include <linux/seq_file.h>
40#include <linux/skbuff.h>
41#include <linux/slab.h>
42#include <linux/spinlock.h>
43#include <linux/stddef.h>
44#include <linux/string.h>
45#include <linux/workqueue.h>
23#include <net/dsfield.h> 46#include <net/dsfield.h>
24#include "main.h" 47#include <net/rtnetlink.h>
25#include "sysfs.h" 48
49#include "bat_algo.h"
50#include "bridge_loop_avoidance.h"
26#include "debugfs.h" 51#include "debugfs.h"
52#include "distributed-arp-table.h"
53#include "gateway_client.h"
54#include "gateway_common.h"
55#include "hard-interface.h"
56#include "icmp_socket.h"
57#include "multicast.h"
58#include "network-coding.h"
59#include "originator.h"
60#include "packet.h"
27#include "routing.h" 61#include "routing.h"
28#include "send.h" 62#include "send.h"
29#include "originator.h"
30#include "soft-interface.h" 63#include "soft-interface.h"
31#include "icmp_socket.h"
32#include "translation-table.h" 64#include "translation-table.h"
33#include "hard-interface.h"
34#include "gateway_client.h"
35#include "bridge_loop_avoidance.h"
36#include "distributed-arp-table.h"
37#include "multicast.h"
38#include "gateway_common.h"
39#include "hash.h"
40#include "bat_algo.h"
41#include "network-coding.h"
42#include "fragmentation.h"
43 65
44/* List manipulations on hardif_list have to be rtnl_lock()'ed, 66/* List manipulations on hardif_list have to be rtnl_lock()'ed,
45 * list traversals just rcu-locked 67 * list traversals just rcu-locked
@@ -209,10 +231,13 @@ void batadv_mesh_free(struct net_device *soft_iface)
209 * interfaces in the current mesh 231 * interfaces in the current mesh
210 * @bat_priv: the bat priv with all the soft interface information 232 * @bat_priv: the bat priv with all the soft interface information
211 * @addr: the address to check 233 * @addr: the address to check
234 *
235 * Returns 'true' if the mac address was found, false otherwise.
212 */ 236 */
213int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr) 237bool batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr)
214{ 238{
215 const struct batadv_hard_iface *hard_iface; 239 const struct batadv_hard_iface *hard_iface;
240 bool is_my_mac = false;
216 241
217 rcu_read_lock(); 242 rcu_read_lock();
218 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { 243 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
@@ -223,12 +248,12 @@ int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr)
223 continue; 248 continue;
224 249
225 if (batadv_compare_eth(hard_iface->net_dev->dev_addr, addr)) { 250 if (batadv_compare_eth(hard_iface->net_dev->dev_addr, addr)) {
226 rcu_read_unlock(); 251 is_my_mac = true;
227 return 1; 252 break;
228 } 253 }
229 } 254 }
230 rcu_read_unlock(); 255 rcu_read_unlock();
231 return 0; 256 return is_my_mac;
232} 257}
233 258
234/** 259/**
@@ -510,14 +535,12 @@ static struct batadv_algo_ops *batadv_algo_get(char *name)
510int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops) 535int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
511{ 536{
512 struct batadv_algo_ops *bat_algo_ops_tmp; 537 struct batadv_algo_ops *bat_algo_ops_tmp;
513 int ret;
514 538
515 bat_algo_ops_tmp = batadv_algo_get(bat_algo_ops->name); 539 bat_algo_ops_tmp = batadv_algo_get(bat_algo_ops->name);
516 if (bat_algo_ops_tmp) { 540 if (bat_algo_ops_tmp) {
517 pr_info("Trying to register already registered routing algorithm: %s\n", 541 pr_info("Trying to register already registered routing algorithm: %s\n",
518 bat_algo_ops->name); 542 bat_algo_ops->name);
519 ret = -EEXIST; 543 return -EEXIST;
520 goto out;
521 } 544 }
522 545
523 /* all algorithms must implement all ops (for now) */ 546 /* all algorithms must implement all ops (for now) */
@@ -531,32 +554,26 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
531 !bat_algo_ops->bat_neigh_is_equiv_or_better) { 554 !bat_algo_ops->bat_neigh_is_equiv_or_better) {
532 pr_info("Routing algo '%s' does not implement required ops\n", 555 pr_info("Routing algo '%s' does not implement required ops\n",
533 bat_algo_ops->name); 556 bat_algo_ops->name);
534 ret = -EINVAL; 557 return -EINVAL;
535 goto out;
536 } 558 }
537 559
538 INIT_HLIST_NODE(&bat_algo_ops->list); 560 INIT_HLIST_NODE(&bat_algo_ops->list);
539 hlist_add_head(&bat_algo_ops->list, &batadv_algo_list); 561 hlist_add_head(&bat_algo_ops->list, &batadv_algo_list);
540 ret = 0;
541 562
542out: 563 return 0;
543 return ret;
544} 564}
545 565
546int batadv_algo_select(struct batadv_priv *bat_priv, char *name) 566int batadv_algo_select(struct batadv_priv *bat_priv, char *name)
547{ 567{
548 struct batadv_algo_ops *bat_algo_ops; 568 struct batadv_algo_ops *bat_algo_ops;
549 int ret = -EINVAL;
550 569
551 bat_algo_ops = batadv_algo_get(name); 570 bat_algo_ops = batadv_algo_get(name);
552 if (!bat_algo_ops) 571 if (!bat_algo_ops)
553 goto out; 572 return -EINVAL;
554 573
555 bat_priv->bat_algo_ops = bat_algo_ops; 574 bat_priv->bat_algo_ops = bat_algo_ops;
556 ret = 0;
557 575
558out: 576 return 0;
559 return ret;
560} 577}
561 578
562int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) 579int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
@@ -819,15 +836,15 @@ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
819 new_buff = kmalloc(min_packet_len + additional_packet_len, GFP_ATOMIC); 836 new_buff = kmalloc(min_packet_len + additional_packet_len, GFP_ATOMIC);
820 837
821 /* keep old buffer if kmalloc should fail */ 838 /* keep old buffer if kmalloc should fail */
822 if (new_buff) { 839 if (!new_buff)
823 memcpy(new_buff, *packet_buff, min_packet_len); 840 return false;
824 kfree(*packet_buff); 841
825 *packet_buff = new_buff; 842 memcpy(new_buff, *packet_buff, min_packet_len);
826 *packet_buff_len = min_packet_len + additional_packet_len; 843 kfree(*packet_buff);
827 return true; 844 *packet_buff = new_buff;
828 } 845 *packet_buff_len = min_packet_len + additional_packet_len;
829 846
830 return false; 847 return true;
831} 848}
832 849
833/** 850/**
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 4d2318829a34..41d27c7872b9 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -24,7 +24,7 @@
24#define BATADV_DRIVER_DEVICE "batman-adv" 24#define BATADV_DRIVER_DEVICE "batman-adv"
25 25
26#ifndef BATADV_SOURCE_VERSION 26#ifndef BATADV_SOURCE_VERSION
27#define BATADV_SOURCE_VERSION "2015.0" 27#define BATADV_SOURCE_VERSION "2015.1"
28#endif 28#endif
29 29
30/* B.A.T.M.A.N. parameters */ 30/* B.A.T.M.A.N. parameters */
@@ -44,7 +44,7 @@
44#define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */ 44#define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */
45#define BATADV_TT_WORK_PERIOD 5000 /* 5 seconds */ 45#define BATADV_TT_WORK_PERIOD 5000 /* 5 seconds */
46#define BATADV_ORIG_WORK_PERIOD 1000 /* 1 second */ 46#define BATADV_ORIG_WORK_PERIOD 1000 /* 1 second */
47#define BATADV_DAT_ENTRY_TIMEOUT (5*60000) /* 5 mins in milliseconds */ 47#define BATADV_DAT_ENTRY_TIMEOUT (5 * 60000) /* 5 mins in milliseconds */
48/* sliding packet range of received originator messages in sequence numbers 48/* sliding packet range of received originator messages in sequence numbers
49 * (should be a multiple of our word size) 49 * (should be a multiple of our word size)
50 */ 50 */
@@ -163,28 +163,26 @@ enum batadv_uev_type {
163 163
164/* Kernel headers */ 164/* Kernel headers */
165 165
166#include <linux/mutex.h> /* mutex */ 166#include <linux/atomic.h>
167#include <linux/module.h> /* needed by all modules */ 167#include <linux/bitops.h> /* for packet.h */
168#include <linux/netdevice.h> /* netdevice */ 168#include <linux/compiler.h>
169#include <linux/etherdevice.h> /* ethernet address classification */ 169#include <linux/cpumask.h>
170#include <linux/if_ether.h> /* ethernet header */ 170#include <linux/etherdevice.h>
171#include <linux/poll.h> /* poll_table */ 171#include <linux/if_ether.h> /* for packet.h */
172#include <linux/kthread.h> /* kernel threads */ 172#include <linux/netdevice.h>
173#include <linux/pkt_sched.h> /* schedule types */ 173#include <linux/printk.h>
174#include <linux/workqueue.h> /* workqueue */ 174#include <linux/types.h>
175#include <linux/percpu.h> 175#include <linux/percpu.h>
176#include <linux/slab.h>
177#include <net/sock.h> /* struct sock */
178#include <net/addrconf.h> /* ipv6 address stuff */
179#include <linux/ip.h>
180#include <net/rtnetlink.h>
181#include <linux/jiffies.h> 176#include <linux/jiffies.h>
182#include <linux/seq_file.h>
183#include <linux/if_vlan.h> 177#include <linux/if_vlan.h>
184 178
185#include "types.h" 179#include "types.h"
186 180
187#define BATADV_PRINT_VID(vid) (vid & BATADV_VLAN_HAS_TAG ? \ 181struct batadv_ogm_packet;
182struct seq_file;
183struct sk_buff;
184
185#define BATADV_PRINT_VID(vid) ((vid & BATADV_VLAN_HAS_TAG) ? \
188 (int)(vid & VLAN_VID_MASK) : -1) 186 (int)(vid & VLAN_VID_MASK) : -1)
189 187
190extern char batadv_routing_algo[]; 188extern char batadv_routing_algo[];
@@ -195,7 +193,7 @@ extern struct workqueue_struct *batadv_event_workqueue;
195 193
196int batadv_mesh_init(struct net_device *soft_iface); 194int batadv_mesh_init(struct net_device *soft_iface);
197void batadv_mesh_free(struct net_device *soft_iface); 195void batadv_mesh_free(struct net_device *soft_iface);
198int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr); 196bool batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr);
199struct batadv_hard_iface * 197struct batadv_hard_iface *
200batadv_seq_print_text_primary_if_get(struct seq_file *seq); 198batadv_seq_print_text_primary_if_get(struct seq_file *seq);
201int batadv_max_header_len(void); 199int batadv_max_header_len(void);
@@ -279,7 +277,7 @@ static inline void _batadv_dbg(int type __always_unused,
279 * 277 *
280 * note: can't use ether_addr_equal() as it requires aligned memory 278 * note: can't use ether_addr_equal() as it requires aligned memory
281 */ 279 */
282static inline int batadv_compare_eth(const void *data1, const void *data2) 280static inline bool batadv_compare_eth(const void *data1, const void *data2)
283{ 281{
284 return ether_addr_equal_unaligned(data1, data2); 282 return ether_addr_equal_unaligned(data1, data2);
285} 283}
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index b24e4bb64fb5..7aa480b7edd0 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2014-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Linus Lüssing 3 * Linus Lüssing
4 * 4 *
@@ -15,10 +15,33 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "main.h"
19#include "multicast.h" 18#include "multicast.h"
20#include "originator.h" 19#include "main.h"
21#include "hard-interface.h" 20
21#include <linux/atomic.h>
22#include <linux/byteorder/generic.h>
23#include <linux/errno.h>
24#include <linux/etherdevice.h>
25#include <linux/fs.h>
26#include <linux/if_ether.h>
27#include <linux/in6.h>
28#include <linux/in.h>
29#include <linux/ip.h>
30#include <linux/ipv6.h>
31#include <linux/list.h>
32#include <linux/netdevice.h>
33#include <linux/rculist.h>
34#include <linux/rcupdate.h>
35#include <linux/skbuff.h>
36#include <linux/slab.h>
37#include <linux/spinlock.h>
38#include <linux/stddef.h>
39#include <linux/string.h>
40#include <linux/types.h>
41#include <net/addrconf.h>
42#include <net/ipv6.h>
43
44#include "packet.h"
22#include "translation-table.h" 45#include "translation-table.h"
23 46
24/** 47/**
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 3a44ebdb43cb..beb6e56c624a 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2014-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Linus Lüssing 3 * Linus Lüssing
4 * 4 *
@@ -18,6 +18,12 @@
18#ifndef _NET_BATMAN_ADV_MULTICAST_H_ 18#ifndef _NET_BATMAN_ADV_MULTICAST_H_
19#define _NET_BATMAN_ADV_MULTICAST_H_ 19#define _NET_BATMAN_ADV_MULTICAST_H_
20 20
21#include "main.h"
22
23struct batadv_orig_node;
24struct batadv_priv;
25struct sk_buff;
26
21/** 27/**
22 * batadv_forw_mode - the way a packet should be forwarded as 28 * batadv_forw_mode - the way a packet should be forwarded as
23 * @BATADV_FORW_ALL: forward the packet to all nodes (currently via classic 29 * @BATADV_FORW_ALL: forward the packet to all nodes (currently via classic
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 127cc4d7380a..f0a50f31d822 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2012-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2012-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Martin Hundebøll, Jeppe Ledet-Pedersen 3 * Martin Hundebøll, Jeppe Ledet-Pedersen
4 * 4 *
@@ -15,15 +15,44 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "network-coding.h"
19#include "main.h"
20
21#include <linux/atomic.h>
22#include <linux/byteorder/generic.h>
23#include <linux/compiler.h>
18#include <linux/debugfs.h> 24#include <linux/debugfs.h>
25#include <linux/errno.h>
26#include <linux/etherdevice.h>
27#include <linux/fs.h>
28#include <linux/if_ether.h>
29#include <linux/if_packet.h>
30#include <linux/init.h>
31#include <linux/jhash.h>
32#include <linux/jiffies.h>
33#include <linux/kernel.h>
34#include <linux/list.h>
35#include <linux/lockdep.h>
36#include <linux/netdevice.h>
37#include <linux/printk.h>
38#include <linux/random.h>
39#include <linux/rculist.h>
40#include <linux/rcupdate.h>
41#include <linux/seq_file.h>
42#include <linux/skbuff.h>
43#include <linux/slab.h>
44#include <linux/spinlock.h>
45#include <linux/stat.h>
46#include <linux/stddef.h>
47#include <linux/string.h>
48#include <linux/workqueue.h>
19 49
20#include "main.h" 50#include "hard-interface.h"
21#include "hash.h" 51#include "hash.h"
22#include "network-coding.h"
23#include "send.h"
24#include "originator.h" 52#include "originator.h"
25#include "hard-interface.h" 53#include "packet.h"
26#include "routing.h" 54#include "routing.h"
55#include "send.h"
27 56
28static struct lock_class_key batadv_nc_coding_hash_lock_class_key; 57static struct lock_class_key batadv_nc_coding_hash_lock_class_key;
29static struct lock_class_key batadv_nc_decoding_hash_lock_class_key; 58static struct lock_class_key batadv_nc_decoding_hash_lock_class_key;
@@ -155,7 +184,7 @@ err:
155 */ 184 */
156void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv) 185void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv)
157{ 186{
158 atomic_set(&bat_priv->network_coding, 1); 187 atomic_set(&bat_priv->network_coding, 0);
159 bat_priv->nc.min_tq = 200; 188 bat_priv->nc.min_tq = 200;
160 bat_priv->nc.max_fwd_delay = 10; 189 bat_priv->nc.max_fwd_delay = 10;
161 bat_priv->nc.max_buffer_time = 200; 190 bat_priv->nc.max_buffer_time = 200;
@@ -275,7 +304,7 @@ static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv,
275 * max_buffer time 304 * max_buffer time
276 */ 305 */
277 return batadv_has_timed_out(nc_path->last_valid, 306 return batadv_has_timed_out(nc_path->last_valid,
278 bat_priv->nc.max_buffer_time*10); 307 bat_priv->nc.max_buffer_time * 10);
279} 308}
280 309
281/** 310/**
@@ -453,14 +482,8 @@ static uint32_t batadv_nc_hash_choose(const void *data, uint32_t size)
453 const struct batadv_nc_path *nc_path = data; 482 const struct batadv_nc_path *nc_path = data;
454 uint32_t hash = 0; 483 uint32_t hash = 0;
455 484
456 hash = batadv_hash_bytes(hash, &nc_path->prev_hop, 485 hash = jhash(&nc_path->prev_hop, sizeof(nc_path->prev_hop), hash);
457 sizeof(nc_path->prev_hop)); 486 hash = jhash(&nc_path->next_hop, sizeof(nc_path->next_hop), hash);
458 hash = batadv_hash_bytes(hash, &nc_path->next_hop,
459 sizeof(nc_path->next_hop));
460
461 hash += (hash << 3);
462 hash ^= (hash >> 11);
463 hash += (hash << 15);
464 487
465 return hash % size; 488 return hash % size;
466} 489}
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index 358c0d686ab0..5b79aa8c64c1 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2012-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2012-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Martin Hundebøll, Jeppe Ledet-Pedersen 3 * Martin Hundebøll, Jeppe Ledet-Pedersen
4 * 4 *
@@ -18,6 +18,19 @@
18#ifndef _NET_BATMAN_ADV_NETWORK_CODING_H_ 18#ifndef _NET_BATMAN_ADV_NETWORK_CODING_H_
19#define _NET_BATMAN_ADV_NETWORK_CODING_H_ 19#define _NET_BATMAN_ADV_NETWORK_CODING_H_
20 20
21#include "main.h"
22
23#include <linux/types.h>
24
25struct batadv_nc_node;
26struct batadv_neigh_node;
27struct batadv_ogm_packet;
28struct batadv_orig_node;
29struct batadv_priv;
30struct net_device;
31struct seq_file;
32struct sk_buff;
33
21#ifdef CONFIG_BATMAN_ADV_NC 34#ifdef CONFIG_BATMAN_ADV_NC
22 35
23void batadv_nc_status_update(struct net_device *net_dev); 36void batadv_nc_status_update(struct net_device *net_dev);
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 90e805aba379..018b7495ad84 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -15,19 +15,31 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "originator.h"
18#include "main.h" 19#include "main.h"
20
21#include <linux/errno.h>
22#include <linux/etherdevice.h>
23#include <linux/fs.h>
24#include <linux/jiffies.h>
25#include <linux/kernel.h>
26#include <linux/list.h>
27#include <linux/lockdep.h>
28#include <linux/netdevice.h>
29#include <linux/seq_file.h>
30#include <linux/slab.h>
31#include <linux/spinlock.h>
32#include <linux/workqueue.h>
33
19#include "distributed-arp-table.h" 34#include "distributed-arp-table.h"
20#include "originator.h" 35#include "fragmentation.h"
21#include "hash.h"
22#include "translation-table.h"
23#include "routing.h"
24#include "gateway_client.h" 36#include "gateway_client.h"
25#include "hard-interface.h" 37#include "hard-interface.h"
26#include "soft-interface.h" 38#include "hash.h"
27#include "bridge_loop_avoidance.h"
28#include "network-coding.h"
29#include "fragmentation.h"
30#include "multicast.h" 39#include "multicast.h"
40#include "network-coding.h"
41#include "routing.h"
42#include "translation-table.h"
31 43
32/* hash class keys */ 44/* hash class keys */
33static struct lock_class_key batadv_orig_hash_lock_class_key; 45static struct lock_class_key batadv_orig_hash_lock_class_key;
@@ -197,13 +209,19 @@ static void batadv_neigh_node_free_rcu(struct rcu_head *rcu)
197 struct hlist_node *node_tmp; 209 struct hlist_node *node_tmp;
198 struct batadv_neigh_node *neigh_node; 210 struct batadv_neigh_node *neigh_node;
199 struct batadv_neigh_ifinfo *neigh_ifinfo; 211 struct batadv_neigh_ifinfo *neigh_ifinfo;
212 struct batadv_algo_ops *bao;
200 213
201 neigh_node = container_of(rcu, struct batadv_neigh_node, rcu); 214 neigh_node = container_of(rcu, struct batadv_neigh_node, rcu);
215 bao = neigh_node->orig_node->bat_priv->bat_algo_ops;
202 216
203 hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, 217 hlist_for_each_entry_safe(neigh_ifinfo, node_tmp,
204 &neigh_node->ifinfo_list, list) { 218 &neigh_node->ifinfo_list, list) {
205 batadv_neigh_ifinfo_free_ref_now(neigh_ifinfo); 219 batadv_neigh_ifinfo_free_ref_now(neigh_ifinfo);
206 } 220 }
221
222 if (bao->bat_neigh_free)
223 bao->bat_neigh_free(neigh_node);
224
207 batadv_hardif_free_ref_now(neigh_node->if_incoming); 225 batadv_hardif_free_ref_now(neigh_node->if_incoming);
208 226
209 kfree(neigh_node); 227 kfree(neigh_node);
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index aa4a43696295..79734d302010 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -18,8 +18,21 @@
18#ifndef _NET_BATMAN_ADV_ORIGINATOR_H_ 18#ifndef _NET_BATMAN_ADV_ORIGINATOR_H_
19#define _NET_BATMAN_ADV_ORIGINATOR_H_ 19#define _NET_BATMAN_ADV_ORIGINATOR_H_
20 20
21#include "main.h"
22
23#include <linux/atomic.h>
24#include <linux/compiler.h>
25#include <linux/if_ether.h>
26#include <linux/jhash.h>
27#include <linux/rculist.h>
28#include <linux/rcupdate.h>
29#include <linux/stddef.h>
30#include <linux/types.h>
31
21#include "hash.h" 32#include "hash.h"
22 33
34struct seq_file;
35
23int batadv_compare_orig(const struct hlist_node *node, const void *data2); 36int batadv_compare_orig(const struct hlist_node *node, const void *data2);
24int batadv_originator_init(struct batadv_priv *bat_priv); 37int batadv_originator_init(struct batadv_priv *bat_priv);
25void batadv_originator_free(struct batadv_priv *bat_priv); 38void batadv_originator_free(struct batadv_priv *bat_priv);
@@ -75,20 +88,9 @@ void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan);
75 */ 88 */
76static inline uint32_t batadv_choose_orig(const void *data, uint32_t size) 89static inline uint32_t batadv_choose_orig(const void *data, uint32_t size)
77{ 90{
78 const unsigned char *key = data;
79 uint32_t hash = 0; 91 uint32_t hash = 0;
80 size_t i;
81
82 for (i = 0; i < 6; i++) {
83 hash += key[i];
84 hash += (hash << 10);
85 hash ^= (hash >> 6);
86 }
87
88 hash += (hash << 3);
89 hash ^= (hash >> 11);
90 hash += (hash << 15);
91 92
93 hash = jhash(data, ETH_ALEN, hash);
92 return hash % size; 94 return hash % size;
93} 95}
94 96
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index b81fbbf21a63..9e747c08d0bc 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -18,6 +18,9 @@
18#ifndef _NET_BATMAN_ADV_PACKET_H_ 18#ifndef _NET_BATMAN_ADV_PACKET_H_
19#define _NET_BATMAN_ADV_PACKET_H_ 19#define _NET_BATMAN_ADV_PACKET_H_
20 20
21#include <asm/byteorder.h>
22#include <linux/types.h>
23
21/** 24/**
22 * enum batadv_packettype - types for batman-adv encapsulated packets 25 * enum batadv_packettype - types for batman-adv encapsulated packets
23 * @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV 26 * @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index da83982bf974..c360c0cd19c2 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -15,20 +15,36 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "main.h"
19#include "routing.h" 18#include "routing.h"
20#include "send.h" 19#include "main.h"
21#include "soft-interface.h" 20
22#include "hard-interface.h" 21#include <linux/atomic.h>
23#include "icmp_socket.h" 22#include <linux/byteorder/generic.h>
24#include "translation-table.h" 23#include <linux/compiler.h>
25#include "originator.h" 24#include <linux/errno.h>
25#include <linux/etherdevice.h>
26#include <linux/if_ether.h>
27#include <linux/jiffies.h>
28#include <linux/netdevice.h>
29#include <linux/printk.h>
30#include <linux/rculist.h>
31#include <linux/rcupdate.h>
32#include <linux/skbuff.h>
33#include <linux/spinlock.h>
34#include <linux/stddef.h>
35
36#include "bitarray.h"
26#include "bridge_loop_avoidance.h" 37#include "bridge_loop_avoidance.h"
27#include "distributed-arp-table.h" 38#include "distributed-arp-table.h"
28#include "network-coding.h"
29#include "fragmentation.h" 39#include "fragmentation.h"
30 40#include "hard-interface.h"
31#include <linux/if_vlan.h> 41#include "icmp_socket.h"
42#include "network-coding.h"
43#include "originator.h"
44#include "packet.h"
45#include "send.h"
46#include "soft-interface.h"
47#include "translation-table.h"
32 48
33static int batadv_route_unicast_packet(struct sk_buff *skb, 49static int batadv_route_unicast_packet(struct sk_buff *skb,
34 struct batadv_hard_iface *recv_if); 50 struct batadv_hard_iface *recv_if);
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 557d3d12a9ab..6bc29d33abc1 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -18,6 +18,16 @@
18#ifndef _NET_BATMAN_ADV_ROUTING_H_ 18#ifndef _NET_BATMAN_ADV_ROUTING_H_
19#define _NET_BATMAN_ADV_ROUTING_H_ 19#define _NET_BATMAN_ADV_ROUTING_H_
20 20
21#include "main.h"
22
23#include <linux/types.h>
24
25struct batadv_hard_iface;
26struct batadv_neigh_node;
27struct batadv_orig_node;
28struct batadv_priv;
29struct sk_buff;
30
21bool batadv_check_management_packet(struct sk_buff *skb, 31bool batadv_check_management_packet(struct sk_buff *skb,
22 struct batadv_hard_iface *hard_iface, 32 struct batadv_hard_iface *hard_iface,
23 int header_len); 33 int header_len);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 3d64ed20c393..0a01992e65ab 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -15,19 +15,37 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "send.h"
18#include "main.h" 19#include "main.h"
20
21#include <linux/atomic.h>
22#include <linux/byteorder/generic.h>
23#include <linux/etherdevice.h>
24#include <linux/fs.h>
25#include <linux/if_ether.h>
26#include <linux/if.h>
27#include <linux/jiffies.h>
28#include <linux/kernel.h>
29#include <linux/list.h>
30#include <linux/netdevice.h>
31#include <linux/printk.h>
32#include <linux/rculist.h>
33#include <linux/rcupdate.h>
34#include <linux/skbuff.h>
35#include <linux/slab.h>
36#include <linux/spinlock.h>
37#include <linux/stddef.h>
38#include <linux/workqueue.h>
39
19#include "distributed-arp-table.h" 40#include "distributed-arp-table.h"
20#include "send.h" 41#include "fragmentation.h"
21#include "routing.h"
22#include "translation-table.h"
23#include "soft-interface.h"
24#include "hard-interface.h"
25#include "gateway_common.h"
26#include "gateway_client.h" 42#include "gateway_client.h"
27#include "originator.h" 43#include "hard-interface.h"
28#include "network-coding.h" 44#include "network-coding.h"
29#include "fragmentation.h" 45#include "originator.h"
30#include "multicast.h" 46#include "routing.h"
47#include "soft-interface.h"
48#include "translation-table.h"
31 49
32static void batadv_send_outstanding_bcast_packet(struct work_struct *work); 50static void batadv_send_outstanding_bcast_packet(struct work_struct *work);
33 51
@@ -255,8 +273,8 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
255 struct batadv_orig_node *orig_node, 273 struct batadv_orig_node *orig_node,
256 unsigned short vid) 274 unsigned short vid)
257{ 275{
258 struct ethhdr *ethhdr;
259 struct batadv_unicast_packet *unicast_packet; 276 struct batadv_unicast_packet *unicast_packet;
277 struct ethhdr *ethhdr;
260 int ret = NET_XMIT_DROP; 278 int ret = NET_XMIT_DROP;
261 279
262 if (!orig_node) 280 if (!orig_node)
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 38d0ec1833ae..0536835fe503 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -18,6 +18,19 @@
18#ifndef _NET_BATMAN_ADV_SEND_H_ 18#ifndef _NET_BATMAN_ADV_SEND_H_
19#define _NET_BATMAN_ADV_SEND_H_ 19#define _NET_BATMAN_ADV_SEND_H_
20 20
21#include "main.h"
22
23#include <linux/compiler.h>
24#include <linux/types.h>
25
26#include "packet.h"
27
28struct batadv_hard_iface;
29struct batadv_orig_node;
30struct batadv_priv;
31struct sk_buff;
32struct work_struct;
33
21int batadv_send_skb_packet(struct sk_buff *skb, 34int batadv_send_skb_packet(struct sk_buff *skb,
22 struct batadv_hard_iface *hard_iface, 35 struct batadv_hard_iface *hard_iface,
23 const uint8_t *dst_addr); 36 const uint8_t *dst_addr);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 5ec31d7de24f..c002961da75d 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -15,26 +15,50 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "main.h"
19#include "soft-interface.h" 18#include "soft-interface.h"
20#include "hard-interface.h" 19#include "main.h"
21#include "distributed-arp-table.h" 20
22#include "routing.h" 21#include <linux/atomic.h>
23#include "send.h" 22#include <linux/byteorder/generic.h>
24#include "debugfs.h" 23#include <linux/cache.h>
25#include "translation-table.h" 24#include <linux/compiler.h>
26#include "hash.h" 25#include <linux/errno.h>
27#include "gateway_common.h"
28#include "gateway_client.h"
29#include "sysfs.h"
30#include "originator.h"
31#include <linux/slab.h>
32#include <linux/ethtool.h>
33#include <linux/etherdevice.h> 26#include <linux/etherdevice.h>
27#include <linux/ethtool.h>
28#include <linux/fs.h>
29#include <linux/if_ether.h>
34#include <linux/if_vlan.h> 30#include <linux/if_vlan.h>
35#include "multicast.h" 31#include <linux/jiffies.h>
32#include <linux/kernel.h>
33#include <linux/list.h>
34#include <linux/lockdep.h>
35#include <linux/netdevice.h>
36#include <linux/percpu.h>
37#include <linux/printk.h>
38#include <linux/random.h>
39#include <linux/rculist.h>
40#include <linux/rcupdate.h>
41#include <linux/skbuff.h>
42#include <linux/slab.h>
43#include <linux/socket.h>
44#include <linux/spinlock.h>
45#include <linux/stddef.h>
46#include <linux/string.h>
47#include <linux/types.h>
48#include <linux/workqueue.h>
49
36#include "bridge_loop_avoidance.h" 50#include "bridge_loop_avoidance.h"
51#include "debugfs.h"
52#include "distributed-arp-table.h"
53#include "gateway_client.h"
54#include "gateway_common.h"
55#include "hard-interface.h"
56#include "multicast.h"
37#include "network-coding.h" 57#include "network-coding.h"
58#include "packet.h"
59#include "send.h"
60#include "sysfs.h"
61#include "translation-table.h"
38 62
39static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); 63static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
40static void batadv_get_drvinfo(struct net_device *dev, 64static void batadv_get_drvinfo(struct net_device *dev,
@@ -105,6 +129,7 @@ static struct net_device_stats *batadv_interface_stats(struct net_device *dev)
105static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) 129static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
106{ 130{
107 struct batadv_priv *bat_priv = netdev_priv(dev); 131 struct batadv_priv *bat_priv = netdev_priv(dev);
132 struct batadv_softif_vlan *vlan;
108 struct sockaddr *addr = p; 133 struct sockaddr *addr = p;
109 uint8_t old_addr[ETH_ALEN]; 134 uint8_t old_addr[ETH_ALEN];
110 135
@@ -115,12 +140,17 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
115 ether_addr_copy(dev->dev_addr, addr->sa_data); 140 ether_addr_copy(dev->dev_addr, addr->sa_data);
116 141
117 /* only modify transtable if it has been initialized before */ 142 /* only modify transtable if it has been initialized before */
118 if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE) { 143 if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
119 batadv_tt_local_remove(bat_priv, old_addr, BATADV_NO_FLAGS, 144 return 0;
145
146 rcu_read_lock();
147 hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) {
148 batadv_tt_local_remove(bat_priv, old_addr, vlan->vid,
120 "mac address changed", false); 149 "mac address changed", false);
121 batadv_tt_local_add(dev, addr->sa_data, BATADV_NO_FLAGS, 150 batadv_tt_local_add(dev, addr->sa_data, vlan->vid,
122 BATADV_NULL_IFINDEX, BATADV_NO_MARK); 151 BATADV_NULL_IFINDEX, BATADV_NO_MARK);
123 } 152 }
153 rcu_read_unlock();
124 154
125 return 0; 155 return 0;
126} 156}
@@ -732,7 +762,7 @@ static int batadv_softif_init_late(struct net_device *dev)
732 atomic_set(&bat_priv->aggregated_ogms, 1); 762 atomic_set(&bat_priv->aggregated_ogms, 1);
733 atomic_set(&bat_priv->bonding, 0); 763 atomic_set(&bat_priv->bonding, 0);
734#ifdef CONFIG_BATMAN_ADV_BLA 764#ifdef CONFIG_BATMAN_ADV_BLA
735 atomic_set(&bat_priv->bridge_loop_avoidance, 0); 765 atomic_set(&bat_priv->bridge_loop_avoidance, 1);
736#endif 766#endif
737#ifdef CONFIG_BATMAN_ADV_DAT 767#ifdef CONFIG_BATMAN_ADV_DAT
738 atomic_set(&bat_priv->distributed_arp_table, 1); 768 atomic_set(&bat_priv->distributed_arp_table, 1);
@@ -818,7 +848,7 @@ static int batadv_softif_slave_add(struct net_device *dev,
818 int ret = -EINVAL; 848 int ret = -EINVAL;
819 849
820 hard_iface = batadv_hardif_get_by_netdev(slave_dev); 850 hard_iface = batadv_hardif_get_by_netdev(slave_dev);
821 if (!hard_iface || hard_iface->soft_iface != NULL) 851 if (!hard_iface || hard_iface->soft_iface)
822 goto out; 852 goto out;
823 853
824 ret = batadv_hardif_enable_interface(hard_iface, dev->name); 854 ret = batadv_hardif_enable_interface(hard_iface, dev->name);
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index dbab22fd89a5..578e8a663c30 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -18,6 +18,17 @@
18#ifndef _NET_BATMAN_ADV_SOFT_INTERFACE_H_ 18#ifndef _NET_BATMAN_ADV_SOFT_INTERFACE_H_
19#define _NET_BATMAN_ADV_SOFT_INTERFACE_H_ 19#define _NET_BATMAN_ADV_SOFT_INTERFACE_H_
20 20
21#include "main.h"
22
23#include <net/rtnetlink.h>
24
25struct batadv_hard_iface;
26struct batadv_orig_node;
27struct batadv_priv;
28struct batadv_softif_vlan;
29struct net_device;
30struct sk_buff;
31
21int batadv_skb_head_push(struct sk_buff *skb, unsigned int len); 32int batadv_skb_head_push(struct sk_buff *skb, unsigned int len);
22void batadv_interface_rx(struct net_device *soft_iface, 33void batadv_interface_rx(struct net_device *soft_iface,
23 struct sk_buff *skb, struct batadv_hard_iface *recv_if, 34 struct sk_buff *skb, struct batadv_hard_iface *recv_if,
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index a75dc12f96f8..d6a312a82c03 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2010-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -15,16 +15,35 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "main.h"
19#include "sysfs.h" 18#include "sysfs.h"
20#include "translation-table.h" 19#include "main.h"
20
21#include <linux/atomic.h>
22#include <linux/compiler.h>
23#include <linux/device.h>
24#include <linux/errno.h>
25#include <linux/fs.h>
26#include <linux/if.h>
27#include <linux/if_vlan.h>
28#include <linux/kernel.h>
29#include <linux/netdevice.h>
30#include <linux/printk.h>
31#include <linux/rculist.h>
32#include <linux/rcupdate.h>
33#include <linux/rtnetlink.h>
34#include <linux/slab.h>
35#include <linux/stat.h>
36#include <linux/stddef.h>
37#include <linux/string.h>
38#include <linux/stringify.h>
39
21#include "distributed-arp-table.h" 40#include "distributed-arp-table.h"
22#include "network-coding.h" 41#include "gateway_client.h"
23#include "originator.h" 42#include "gateway_common.h"
24#include "hard-interface.h" 43#include "hard-interface.h"
44#include "network-coding.h"
45#include "packet.h"
25#include "soft-interface.h" 46#include "soft-interface.h"
26#include "gateway_common.h"
27#include "gateway_client.h"
28 47
29static struct net_device *batadv_kobj_to_netdev(struct kobject *obj) 48static struct net_device *batadv_kobj_to_netdev(struct kobject *obj)
30{ 49{
@@ -151,7 +170,7 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \
151 static BATADV_ATTR(_name, _mode, batadv_show_##_name, \ 170 static BATADV_ATTR(_name, _mode, batadv_show_##_name, \
152 batadv_store_##_name) 171 batadv_store_##_name)
153 172
154#define BATADV_ATTR_SIF_STORE_UINT(_name, _min, _max, _post_func) \ 173#define BATADV_ATTR_SIF_STORE_UINT(_name, _var, _min, _max, _post_func) \
155ssize_t batadv_store_##_name(struct kobject *kobj, \ 174ssize_t batadv_store_##_name(struct kobject *kobj, \
156 struct attribute *attr, char *buff, \ 175 struct attribute *attr, char *buff, \
157 size_t count) \ 176 size_t count) \
@@ -161,24 +180,24 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \
161 \ 180 \
162 return __batadv_store_uint_attr(buff, count, _min, _max, \ 181 return __batadv_store_uint_attr(buff, count, _min, _max, \
163 _post_func, attr, \ 182 _post_func, attr, \
164 &bat_priv->_name, net_dev); \ 183 &bat_priv->_var, net_dev); \
165} 184}
166 185
167#define BATADV_ATTR_SIF_SHOW_UINT(_name) \ 186#define BATADV_ATTR_SIF_SHOW_UINT(_name, _var) \
168ssize_t batadv_show_##_name(struct kobject *kobj, \ 187ssize_t batadv_show_##_name(struct kobject *kobj, \
169 struct attribute *attr, char *buff) \ 188 struct attribute *attr, char *buff) \
170{ \ 189{ \
171 struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); \ 190 struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); \
172 \ 191 \
173 return sprintf(buff, "%i\n", atomic_read(&bat_priv->_name)); \ 192 return sprintf(buff, "%i\n", atomic_read(&bat_priv->_var)); \
174} \ 193} \
175 194
176/* Use this, if you are going to set [name] in the soft-interface 195/* Use this, if you are going to set [name] in the soft-interface
177 * (bat_priv) to an unsigned integer value 196 * (bat_priv) to an unsigned integer value
178 */ 197 */
179#define BATADV_ATTR_SIF_UINT(_name, _mode, _min, _max, _post_func) \ 198#define BATADV_ATTR_SIF_UINT(_name, _var, _mode, _min, _max, _post_func)\
180 static BATADV_ATTR_SIF_STORE_UINT(_name, _min, _max, _post_func)\ 199 static BATADV_ATTR_SIF_STORE_UINT(_name, _var, _min, _max, _post_func)\
181 static BATADV_ATTR_SIF_SHOW_UINT(_name) \ 200 static BATADV_ATTR_SIF_SHOW_UINT(_name, _var) \
182 static BATADV_ATTR(_name, _mode, batadv_show_##_name, \ 201 static BATADV_ATTR(_name, _mode, batadv_show_##_name, \
183 batadv_store_##_name) 202 batadv_store_##_name)
184 203
@@ -540,19 +559,20 @@ BATADV_ATTR_SIF_BOOL(fragmentation, S_IRUGO | S_IWUSR, batadv_update_min_mtu);
540static BATADV_ATTR(routing_algo, S_IRUGO, batadv_show_bat_algo, NULL); 559static BATADV_ATTR(routing_algo, S_IRUGO, batadv_show_bat_algo, NULL);
541static BATADV_ATTR(gw_mode, S_IRUGO | S_IWUSR, batadv_show_gw_mode, 560static BATADV_ATTR(gw_mode, S_IRUGO | S_IWUSR, batadv_show_gw_mode,
542 batadv_store_gw_mode); 561 batadv_store_gw_mode);
543BATADV_ATTR_SIF_UINT(orig_interval, S_IRUGO | S_IWUSR, 2 * BATADV_JITTER, 562BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, S_IRUGO | S_IWUSR,
544 INT_MAX, NULL); 563 2 * BATADV_JITTER, INT_MAX, NULL);
545BATADV_ATTR_SIF_UINT(hop_penalty, S_IRUGO | S_IWUSR, 0, BATADV_TQ_MAX_VALUE, 564BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0,
546 NULL); 565 BATADV_TQ_MAX_VALUE, NULL);
547BATADV_ATTR_SIF_UINT(gw_sel_class, S_IRUGO | S_IWUSR, 1, BATADV_TQ_MAX_VALUE, 566BATADV_ATTR_SIF_UINT(gw_sel_class, gw_sel_class, S_IRUGO | S_IWUSR, 1,
548 batadv_post_gw_reselect); 567 BATADV_TQ_MAX_VALUE, batadv_post_gw_reselect);
549static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth, 568static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,
550 batadv_store_gw_bwidth); 569 batadv_store_gw_bwidth);
551#ifdef CONFIG_BATMAN_ADV_MCAST 570#ifdef CONFIG_BATMAN_ADV_MCAST
552BATADV_ATTR_SIF_BOOL(multicast_mode, S_IRUGO | S_IWUSR, NULL); 571BATADV_ATTR_SIF_BOOL(multicast_mode, S_IRUGO | S_IWUSR, NULL);
553#endif 572#endif
554#ifdef CONFIG_BATMAN_ADV_DEBUG 573#ifdef CONFIG_BATMAN_ADV_DEBUG
555BATADV_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, BATADV_DBG_ALL, NULL); 574BATADV_ATTR_SIF_UINT(log_level, log_level, S_IRUGO | S_IWUSR, 0,
575 BATADV_DBG_ALL, NULL);
556#endif 576#endif
557#ifdef CONFIG_BATMAN_ADV_NC 577#ifdef CONFIG_BATMAN_ADV_NC
558BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR, 578BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR,
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index b715b60db7cd..2294583f7cf9 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2010-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -18,6 +18,16 @@
18#ifndef _NET_BATMAN_ADV_SYSFS_H_ 18#ifndef _NET_BATMAN_ADV_SYSFS_H_
19#define _NET_BATMAN_ADV_SYSFS_H_ 19#define _NET_BATMAN_ADV_SYSFS_H_
20 20
21#include "main.h"
22
23#include <linux/sysfs.h>
24#include <linux/types.h>
25
26struct batadv_priv;
27struct batadv_softif_vlan;
28struct kobject;
29struct net_device;
30
21#define BATADV_SYSFS_IF_MESH_SUBDIR "mesh" 31#define BATADV_SYSFS_IF_MESH_SUBDIR "mesh"
22#define BATADV_SYSFS_IF_BAT_SUBDIR "batman_adv" 32#define BATADV_SYSFS_IF_BAT_SUBDIR "batman_adv"
23/** 33/**
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 07b263a437d1..b4824951010b 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich, Antonio Quartulli 3 * Marek Lindner, Simon Wunderlich, Antonio Quartulli
4 * 4 *
@@ -15,18 +15,41 @@
15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "main.h"
19#include "translation-table.h" 18#include "translation-table.h"
20#include "soft-interface.h" 19#include "main.h"
20
21#include <linux/atomic.h>
22#include <linux/bug.h>
23#include <linux/byteorder/generic.h>
24#include <linux/compiler.h>
25#include <linux/crc32c.h>
26#include <linux/errno.h>
27#include <linux/etherdevice.h>
28#include <linux/fs.h>
29#include <linux/if_ether.h>
30#include <linux/jhash.h>
31#include <linux/jiffies.h>
32#include <linux/kernel.h>
33#include <linux/list.h>
34#include <linux/lockdep.h>
35#include <linux/netdevice.h>
36#include <linux/rculist.h>
37#include <linux/rcupdate.h>
38#include <linux/seq_file.h>
39#include <linux/slab.h>
40#include <linux/spinlock.h>
41#include <linux/stddef.h>
42#include <linux/string.h>
43#include <linux/workqueue.h>
44#include <net/net_namespace.h>
45
46#include "bridge_loop_avoidance.h"
21#include "hard-interface.h" 47#include "hard-interface.h"
22#include "send.h"
23#include "hash.h" 48#include "hash.h"
24#include "originator.h"
25#include "routing.h"
26#include "bridge_loop_avoidance.h"
27#include "multicast.h" 49#include "multicast.h"
28 50#include "originator.h"
29#include <linux/crc32c.h> 51#include "packet.h"
52#include "soft-interface.h"
30 53
31/* hash class keys */ 54/* hash class keys */
32static struct lock_class_key batadv_tt_local_hash_lock_class_key; 55static struct lock_class_key batadv_tt_local_hash_lock_class_key;
@@ -67,12 +90,8 @@ static inline uint32_t batadv_choose_tt(const void *data, uint32_t size)
67 uint32_t hash = 0; 90 uint32_t hash = 0;
68 91
69 tt = (struct batadv_tt_common_entry *)data; 92 tt = (struct batadv_tt_common_entry *)data;
70 hash = batadv_hash_bytes(hash, &tt->addr, ETH_ALEN); 93 hash = jhash(&tt->addr, ETH_ALEN, hash);
71 hash = batadv_hash_bytes(hash, &tt->vid, sizeof(tt->vid)); 94 hash = jhash(&tt->vid, sizeof(tt->vid), hash);
72
73 hash += (hash << 3);
74 hash ^= (hash >> 11);
75 hash += (hash << 15);
76 95
77 return hash % size; 96 return hash % size;
78} 97}
@@ -954,17 +973,17 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
954 " * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n", 973 " * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n",
955 tt_common_entry->addr, 974 tt_common_entry->addr,
956 BATADV_PRINT_VID(tt_common_entry->vid), 975 BATADV_PRINT_VID(tt_common_entry->vid),
957 (tt_common_entry->flags & 976 ((tt_common_entry->flags &
958 BATADV_TT_CLIENT_ROAM ? 'R' : '.'), 977 BATADV_TT_CLIENT_ROAM) ? 'R' : '.'),
959 no_purge ? 'P' : '.', 978 no_purge ? 'P' : '.',
960 (tt_common_entry->flags & 979 ((tt_common_entry->flags &
961 BATADV_TT_CLIENT_NEW ? 'N' : '.'), 980 BATADV_TT_CLIENT_NEW) ? 'N' : '.'),
962 (tt_common_entry->flags & 981 ((tt_common_entry->flags &
963 BATADV_TT_CLIENT_PENDING ? 'X' : '.'), 982 BATADV_TT_CLIENT_PENDING) ? 'X' : '.'),
964 (tt_common_entry->flags & 983 ((tt_common_entry->flags &
965 BATADV_TT_CLIENT_WIFI ? 'W' : '.'), 984 BATADV_TT_CLIENT_WIFI) ? 'W' : '.'),
966 (tt_common_entry->flags & 985 ((tt_common_entry->flags &
967 BATADV_TT_CLIENT_ISOLA ? 'I' : '.'), 986 BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'),
968 no_purge ? 0 : last_seen_secs, 987 no_purge ? 0 : last_seen_secs,
969 no_purge ? 0 : last_seen_msecs, 988 no_purge ? 0 : last_seen_msecs,
970 vlan->tt.crc); 989 vlan->tt.crc);
@@ -1528,10 +1547,10 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv,
1528 BATADV_PRINT_VID(tt_global_entry->common.vid), 1547 BATADV_PRINT_VID(tt_global_entry->common.vid),
1529 best_entry->ttvn, best_entry->orig_node->orig, 1548 best_entry->ttvn, best_entry->orig_node->orig,
1530 last_ttvn, vlan->tt.crc, 1549 last_ttvn, vlan->tt.crc,
1531 (flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'), 1550 ((flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'),
1532 (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'), 1551 ((flags & BATADV_TT_CLIENT_WIFI) ? 'W' : '.'),
1533 (flags & BATADV_TT_CLIENT_ISOLA ? 'I' : '.'), 1552 ((flags & BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'),
1534 (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.')); 1553 ((flags & BATADV_TT_CLIENT_TEMP) ? 'T' : '.'));
1535 1554
1536 batadv_orig_node_vlan_free_ref(vlan); 1555 batadv_orig_node_vlan_free_ref(vlan);
1537 } 1556 }
@@ -1560,10 +1579,10 @@ print_list:
1560 BATADV_PRINT_VID(tt_global_entry->common.vid), 1579 BATADV_PRINT_VID(tt_global_entry->common.vid),
1561 orig_entry->ttvn, orig_entry->orig_node->orig, 1580 orig_entry->ttvn, orig_entry->orig_node->orig,
1562 last_ttvn, vlan->tt.crc, 1581 last_ttvn, vlan->tt.crc,
1563 (flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'), 1582 ((flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'),
1564 (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'), 1583 ((flags & BATADV_TT_CLIENT_WIFI) ? 'W' : '.'),
1565 (flags & BATADV_TT_CLIENT_ISOLA ? 'I' : '.'), 1584 ((flags & BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'),
1566 (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.')); 1585 ((flags & BATADV_TT_CLIENT_TEMP) ? 'T' : '.'));
1567 1586
1568 batadv_orig_node_vlan_free_ref(vlan); 1587 batadv_orig_node_vlan_free_ref(vlan);
1569 } 1588 }
@@ -2529,7 +2548,7 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
2529 batadv_dbg(BATADV_DBG_TT, bat_priv, 2548 batadv_dbg(BATADV_DBG_TT, bat_priv,
2530 "Received TT_REQUEST from %pM for ttvn: %u (%pM) [%c]\n", 2549 "Received TT_REQUEST from %pM for ttvn: %u (%pM) [%c]\n",
2531 req_src, tt_data->ttvn, req_dst, 2550 req_src, tt_data->ttvn, req_dst,
2532 (tt_data->flags & BATADV_TT_FULL_TABLE ? 'F' : '.')); 2551 ((tt_data->flags & BATADV_TT_FULL_TABLE) ? 'F' : '.'));
2533 2552
2534 /* Let's get the orig node of the REAL destination */ 2553 /* Let's get the orig node of the REAL destination */
2535 req_dst_orig_node = batadv_orig_hash_find(bat_priv, req_dst); 2554 req_dst_orig_node = batadv_orig_hash_find(bat_priv, req_dst);
@@ -2660,7 +2679,7 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv,
2660 batadv_dbg(BATADV_DBG_TT, bat_priv, 2679 batadv_dbg(BATADV_DBG_TT, bat_priv,
2661 "Received TT_REQUEST from %pM for ttvn: %u (me) [%c]\n", 2680 "Received TT_REQUEST from %pM for ttvn: %u (me) [%c]\n",
2662 req_src, tt_data->ttvn, 2681 req_src, tt_data->ttvn,
2663 (tt_data->flags & BATADV_TT_FULL_TABLE ? 'F' : '.')); 2682 ((tt_data->flags & BATADV_TT_FULL_TABLE) ? 'F' : '.'));
2664 2683
2665 spin_lock_bh(&bat_priv->tt.commit_lock); 2684 spin_lock_bh(&bat_priv->tt.commit_lock);
2666 2685
@@ -2899,7 +2918,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
2899 batadv_dbg(BATADV_DBG_TT, bat_priv, 2918 batadv_dbg(BATADV_DBG_TT, bat_priv,
2900 "Received TT_RESPONSE from %pM for ttvn %d t_size: %d [%c]\n", 2919 "Received TT_RESPONSE from %pM for ttvn %d t_size: %d [%c]\n",
2901 resp_src, tt_data->ttvn, num_entries, 2920 resp_src, tt_data->ttvn, num_entries,
2902 (tt_data->flags & BATADV_TT_FULL_TABLE ? 'F' : '.')); 2921 ((tt_data->flags & BATADV_TT_FULL_TABLE) ? 'F' : '.'));
2903 2922
2904 orig_node = batadv_orig_hash_find(bat_priv, resp_src); 2923 orig_node = batadv_orig_hash_find(bat_priv, resp_src);
2905 if (!orig_node) 2924 if (!orig_node)
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index ad84d7b89e39..6acc25d3a925 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich, Antonio Quartulli 3 * Marek Lindner, Simon Wunderlich, Antonio Quartulli
4 * 4 *
@@ -18,6 +18,15 @@
18#ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ 18#ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_
19#define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ 19#define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_
20 20
21#include "main.h"
22
23#include <linux/types.h>
24
25struct batadv_orig_node;
26struct batadv_priv;
27struct net_device;
28struct seq_file;
29
21int batadv_tt_init(struct batadv_priv *bat_priv); 30int batadv_tt_init(struct batadv_priv *bat_priv);
22bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, 31bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
23 unsigned short vid, int ifindex, uint32_t mark); 32 unsigned short vid, int ifindex, uint32_t mark);
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 9398c3fb4174..67d63483618e 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -18,9 +18,23 @@
18#ifndef _NET_BATMAN_ADV_TYPES_H_ 18#ifndef _NET_BATMAN_ADV_TYPES_H_
19#define _NET_BATMAN_ADV_TYPES_H_ 19#define _NET_BATMAN_ADV_TYPES_H_
20 20
21#ifndef _NET_BATMAN_ADV_MAIN_H_
22#error only "main.h" can be included directly
23#endif
24
25#include <linux/bitops.h>
26#include <linux/compiler.h>
27#include <linux/if_ether.h>
28#include <linux/netdevice.h>
29#include <linux/sched.h> /* for linux/wait.h */
30#include <linux/spinlock.h>
31#include <linux/types.h>
32#include <linux/wait.h>
33#include <linux/workqueue.h>
34
21#include "packet.h" 35#include "packet.h"
22#include "bitarray.h" 36
23#include <linux/kernel.h> 37struct seq_file;
24 38
25#ifdef CONFIG_BATMAN_ADV_DAT 39#ifdef CONFIG_BATMAN_ADV_DAT
26 40
@@ -132,6 +146,7 @@ struct batadv_orig_ifinfo {
132 * @timestamp: time (jiffie) of last received fragment 146 * @timestamp: time (jiffie) of last received fragment
133 * @seqno: sequence number of the fragments in the list 147 * @seqno: sequence number of the fragments in the list
134 * @size: accumulated size of packets in list 148 * @size: accumulated size of packets in list
149 * @total_size: expected size of the assembled packet
135 */ 150 */
136struct batadv_frag_table_entry { 151struct batadv_frag_table_entry {
137 struct hlist_head head; 152 struct hlist_head head;
@@ -139,6 +154,7 @@ struct batadv_frag_table_entry {
139 unsigned long timestamp; 154 unsigned long timestamp;
140 uint16_t seqno; 155 uint16_t seqno;
141 uint16_t size; 156 uint16_t size;
157 uint16_t total_size;
142}; 158};
143 159
144/** 160/**
@@ -181,9 +197,10 @@ struct batadv_orig_node_vlan {
181 197
182/** 198/**
183 * struct batadv_orig_bat_iv - B.A.T.M.A.N. IV private orig_node members 199 * struct batadv_orig_bat_iv - B.A.T.M.A.N. IV private orig_node members
184 * @bcast_own: bitfield containing the number of our OGMs this orig_node 200 * @bcast_own: set of bitfields (one per hard interface) where each one counts
185 * rebroadcasted "back" to us (relative to last_real_seqno) 201 * the number of our OGMs this orig_node rebroadcasted "back" to us (relative
186 * @bcast_own_sum: counted result of bcast_own 202 * to last_real_seqno). Every bitfield is BATADV_TQ_LOCAL_WINDOW_SIZE bits long.
203 * @bcast_own_sum: sum of bcast_own
187 * @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum, 204 * @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum,
188 * neigh_node->bat_iv.real_bits & neigh_node->bat_iv.real_packet_count 205 * neigh_node->bat_iv.real_bits & neigh_node->bat_iv.real_packet_count
189 */ 206 */
@@ -1118,6 +1135,8 @@ struct batadv_forw_packet {
1118 * @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or better 1135 * @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or better
1119 * than neigh2 for their respective outgoing interface from the metric 1136 * than neigh2 for their respective outgoing interface from the metric
1120 * prospective 1137 * prospective
1138 * @bat_neigh_free: free the resources allocated by the routing algorithm for a
1139 * neigh_node object
1121 * @bat_orig_print: print the originator table (optional) 1140 * @bat_orig_print: print the originator table (optional)
1122 * @bat_orig_free: free the resources allocated by the routing algorithm for an 1141 * @bat_orig_free: free the resources allocated by the routing algorithm for an
1123 * orig_node object 1142 * orig_node object
@@ -1135,6 +1154,7 @@ struct batadv_algo_ops {
1135 void (*bat_primary_iface_set)(struct batadv_hard_iface *hard_iface); 1154 void (*bat_primary_iface_set)(struct batadv_hard_iface *hard_iface);
1136 void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface); 1155 void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface);
1137 void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet); 1156 void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet);
1157 /* neigh_node handling API */
1138 int (*bat_neigh_cmp)(struct batadv_neigh_node *neigh1, 1158 int (*bat_neigh_cmp)(struct batadv_neigh_node *neigh1,
1139 struct batadv_hard_iface *if_outgoing1, 1159 struct batadv_hard_iface *if_outgoing1,
1140 struct batadv_neigh_node *neigh2, 1160 struct batadv_neigh_node *neigh2,
@@ -1144,6 +1164,7 @@ struct batadv_algo_ops {
1144 struct batadv_hard_iface *if_outgoing1, 1164 struct batadv_hard_iface *if_outgoing1,
1145 struct batadv_neigh_node *neigh2, 1165 struct batadv_neigh_node *neigh2,
1146 struct batadv_hard_iface *if_outgoing2); 1166 struct batadv_hard_iface *if_outgoing2);
1167 void (*bat_neigh_free)(struct batadv_neigh_node *neigh);
1147 /* orig_node handling API */ 1168 /* orig_node handling API */
1148 void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq, 1169 void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq,
1149 struct batadv_hard_iface *hard_iface); 1170 struct batadv_hard_iface *hard_iface);
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 1742b849fcff..2fb7b3064904 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -192,7 +192,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_dev *dev,
192 if (ipv6_addr_any(nexthop)) 192 if (ipv6_addr_any(nexthop))
193 return NULL; 193 return NULL;
194 } else { 194 } else {
195 nexthop = rt6_nexthop(rt); 195 nexthop = rt6_nexthop(rt, daddr);
196 196
197 /* We need to remember the address because it is needed 197 /* We need to remember the address because it is needed
198 * by bt_xmit() when sending the packet. In bt_xmit(), the 198 * by bt_xmit() when sending the packet. In bt_xmit(), the
@@ -856,7 +856,7 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev)
856 set_dev_addr(netdev, &chan->src, chan->src_type); 856 set_dev_addr(netdev, &chan->src, chan->src_type);
857 857
858 netdev->netdev_ops = &netdev_ops; 858 netdev->netdev_ops = &netdev_ops;
859 SET_NETDEV_DEV(netdev, &chan->conn->hcon->dev); 859 SET_NETDEV_DEV(netdev, &chan->conn->hcon->hdev->dev);
860 SET_NETDEV_DEVTYPE(netdev, &bt_type); 860 SET_NETDEV_DEVTYPE(netdev, &bt_type);
861 861
862 err = register_netdev(netdev); 862 err = register_netdev(netdev);
@@ -928,7 +928,7 @@ static void delete_netdev(struct work_struct *work)
928 928
929 unregister_netdev(entry->netdev); 929 unregister_netdev(entry->netdev);
930 930
931 /* The entry pointer is deleted in device_event() */ 931 /* The entry pointer is deleted by the netdev destructor. */
932} 932}
933 933
934static void chan_close_cb(struct l2cap_chan *chan) 934static void chan_close_cb(struct l2cap_chan *chan)
@@ -937,7 +937,7 @@ static void chan_close_cb(struct l2cap_chan *chan)
937 struct lowpan_dev *dev = NULL; 937 struct lowpan_dev *dev = NULL;
938 struct lowpan_peer *peer; 938 struct lowpan_peer *peer;
939 int err = -ENOENT; 939 int err = -ENOENT;
940 bool last = false, removed = true; 940 bool last = false, remove = true;
941 941
942 BT_DBG("chan %p conn %p", chan, chan->conn); 942 BT_DBG("chan %p conn %p", chan, chan->conn);
943 943
@@ -948,7 +948,7 @@ static void chan_close_cb(struct l2cap_chan *chan)
948 /* If conn is set, then the netdev is also there and we should 948 /* If conn is set, then the netdev is also there and we should
949 * not remove it. 949 * not remove it.
950 */ 950 */
951 removed = false; 951 remove = false;
952 } 952 }
953 953
954 spin_lock(&devices_lock); 954 spin_lock(&devices_lock);
@@ -977,7 +977,7 @@ static void chan_close_cb(struct l2cap_chan *chan)
977 977
978 ifdown(dev->netdev); 978 ifdown(dev->netdev);
979 979
980 if (!removed) { 980 if (remove) {
981 INIT_WORK(&entry->delete_netdev, delete_netdev); 981 INIT_WORK(&entry->delete_netdev, delete_netdev);
982 schedule_work(&entry->delete_netdev); 982 schedule_work(&entry->delete_netdev);
983 } 983 }
@@ -1208,8 +1208,6 @@ static void disconnect_all_peers(void)
1208 1208
1209 list_del_rcu(&peer->list); 1209 list_del_rcu(&peer->list);
1210 kfree_rcu(peer, rcu); 1210 kfree_rcu(peer, rcu);
1211
1212 module_put(THIS_MODULE);
1213 } 1211 }
1214 spin_unlock(&devices_lock); 1212 spin_unlock(&devices_lock);
1215} 1213}
@@ -1418,7 +1416,6 @@ static int device_event(struct notifier_block *unused,
1418 BT_DBG("Unregistered netdev %s %p", 1416 BT_DBG("Unregistered netdev %s %p",
1419 netdev->name, netdev); 1417 netdev->name, netdev);
1420 list_del(&entry->list); 1418 list_del(&entry->list);
1421 kfree(entry);
1422 break; 1419 break;
1423 } 1420 }
1424 } 1421 }
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 9a8ea232d28f..29c12ae72a66 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -12,9 +12,10 @@ obj-$(CONFIG_BT_6LOWPAN) += bluetooth_6lowpan.o
12bluetooth_6lowpan-y := 6lowpan.o 12bluetooth_6lowpan-y := 6lowpan.o
13 13
14bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ 14bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
15 hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \ 15 hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o lib.o \
16 a2mp.o amp.o ecc.o hci_request.o mgmt_util.o 16 a2mp.o amp.o ecc.o hci_request.o mgmt_util.o
17 17
18bluetooth-$(CONFIG_BT_BREDR) += sco.o
18bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o 19bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o
19bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o 20bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o
20 21
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index bde2bdd9e929..b5116fa9835e 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -202,7 +202,7 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol,
202 if (sock->type != SOCK_RAW) 202 if (sock->type != SOCK_RAW)
203 return -ESOCKTNOSUPPORT; 203 return -ESOCKTNOSUPPORT;
204 204
205 sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto); 205 sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, kern);
206 if (!sk) 206 if (!sk)
207 return -ENOMEM; 207 return -ENOMEM;
208 208
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index d82787d417bd..ce86a7bae844 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -205,7 +205,7 @@ static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol,
205 if (sock->type != SOCK_RAW) 205 if (sock->type != SOCK_RAW)
206 return -ESOCKTNOSUPPORT; 206 return -ESOCKTNOSUPPORT;
207 207
208 sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto); 208 sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, kern);
209 if (!sk) 209 if (!sk)
210 return -ENOMEM; 210 return -ENOMEM;
211 211
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index ee5e59839b02..2c48bf0b5afb 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -276,7 +276,7 @@ u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency,
276} 276}
277 277
278void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, 278void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand,
279 __u8 ltk[16]) 279 __u8 ltk[16], __u8 key_size)
280{ 280{
281 struct hci_dev *hdev = conn->hdev; 281 struct hci_dev *hdev = conn->hdev;
282 struct hci_cp_le_start_enc cp; 282 struct hci_cp_le_start_enc cp;
@@ -288,7 +288,7 @@ void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand,
288 cp.handle = cpu_to_le16(conn->handle); 288 cp.handle = cpu_to_le16(conn->handle);
289 cp.rand = rand; 289 cp.rand = rand;
290 cp.ediv = ediv; 290 cp.ediv = ediv;
291 memcpy(cp.ltk, ltk, sizeof(cp.ltk)); 291 memcpy(cp.ltk, ltk, key_size);
292 292
293 hci_send_cmd(hdev, HCI_OP_LE_START_ENC, sizeof(cp), &cp); 293 hci_send_cmd(hdev, HCI_OP_LE_START_ENC, sizeof(cp), &cp);
294} 294}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index c4802f3bd4c5..2f8fb33067e1 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -94,7 +94,6 @@ static ssize_t dut_mode_write(struct file *file, const char __user *user_buf,
94 char buf[32]; 94 char buf[32];
95 size_t buf_size = min(count, (sizeof(buf)-1)); 95 size_t buf_size = min(count, (sizeof(buf)-1));
96 bool enable; 96 bool enable;
97 int err;
98 97
99 if (!test_bit(HCI_UP, &hdev->flags)) 98 if (!test_bit(HCI_UP, &hdev->flags))
100 return -ENETDOWN; 99 return -ENETDOWN;
@@ -121,12 +120,8 @@ static ssize_t dut_mode_write(struct file *file, const char __user *user_buf,
121 if (IS_ERR(skb)) 120 if (IS_ERR(skb))
122 return PTR_ERR(skb); 121 return PTR_ERR(skb);
123 122
124 err = -bt_to_errno(skb->data[0]);
125 kfree_skb(skb); 123 kfree_skb(skb);
126 124
127 if (err < 0)
128 return err;
129
130 hci_dev_change_flag(hdev, HCI_DUT_MODE); 125 hci_dev_change_flag(hdev, HCI_DUT_MODE);
131 126
132 return count; 127 return count;
@@ -1558,6 +1553,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
1558 BT_DBG("%s %p", hdev->name, hdev); 1553 BT_DBG("%s %p", hdev->name, hdev);
1559 1554
1560 if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) && 1555 if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
1556 !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
1561 test_bit(HCI_UP, &hdev->flags)) { 1557 test_bit(HCI_UP, &hdev->flags)) {
1562 /* Execute vendor specific shutdown routine */ 1558 /* Execute vendor specific shutdown routine */
1563 if (hdev->shutdown) 1559 if (hdev->shutdown)
@@ -1595,6 +1591,11 @@ static int hci_dev_do_close(struct hci_dev *hdev)
1595 if (hci_dev_test_flag(hdev, HCI_MGMT)) 1591 if (hci_dev_test_flag(hdev, HCI_MGMT))
1596 cancel_delayed_work_sync(&hdev->rpa_expired); 1592 cancel_delayed_work_sync(&hdev->rpa_expired);
1597 1593
1594 if (hdev->adv_instance_timeout) {
1595 cancel_delayed_work_sync(&hdev->adv_instance_expire);
1596 hdev->adv_instance_timeout = 0;
1597 }
1598
1598 /* Avoid potential lockdep warnings from the *_flush() calls by 1599 /* Avoid potential lockdep warnings from the *_flush() calls by
1599 * ensuring the workqueue is empty up front. 1600 * ensuring the workqueue is empty up front.
1600 */ 1601 */
@@ -2151,6 +2152,17 @@ static void hci_discov_off(struct work_struct *work)
2151 mgmt_discoverable_timeout(hdev); 2152 mgmt_discoverable_timeout(hdev);
2152} 2153}
2153 2154
2155static void hci_adv_timeout_expire(struct work_struct *work)
2156{
2157 struct hci_dev *hdev;
2158
2159 hdev = container_of(work, struct hci_dev, adv_instance_expire.work);
2160
2161 BT_DBG("%s", hdev->name);
2162
2163 mgmt_adv_timeout_expired(hdev);
2164}
2165
2154void hci_uuids_clear(struct hci_dev *hdev) 2166void hci_uuids_clear(struct hci_dev *hdev)
2155{ 2167{
2156 struct bt_uuid *uuid, *tmp; 2168 struct bt_uuid *uuid, *tmp;
@@ -2614,6 +2626,130 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr,
2614 return 0; 2626 return 0;
2615} 2627}
2616 2628
2629/* This function requires the caller holds hdev->lock */
2630struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance)
2631{
2632 struct adv_info *adv_instance;
2633
2634 list_for_each_entry(adv_instance, &hdev->adv_instances, list) {
2635 if (adv_instance->instance == instance)
2636 return adv_instance;
2637 }
2638
2639 return NULL;
2640}
2641
2642/* This function requires the caller holds hdev->lock */
2643struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance) {
2644 struct adv_info *cur_instance;
2645
2646 cur_instance = hci_find_adv_instance(hdev, instance);
2647 if (!cur_instance)
2648 return NULL;
2649
2650 if (cur_instance == list_last_entry(&hdev->adv_instances,
2651 struct adv_info, list))
2652 return list_first_entry(&hdev->adv_instances,
2653 struct adv_info, list);
2654 else
2655 return list_next_entry(cur_instance, list);
2656}
2657
2658/* This function requires the caller holds hdev->lock */
2659int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance)
2660{
2661 struct adv_info *adv_instance;
2662
2663 adv_instance = hci_find_adv_instance(hdev, instance);
2664 if (!adv_instance)
2665 return -ENOENT;
2666
2667 BT_DBG("%s removing %dMR", hdev->name, instance);
2668
2669 if (hdev->cur_adv_instance == instance && hdev->adv_instance_timeout) {
2670 cancel_delayed_work(&hdev->adv_instance_expire);
2671 hdev->adv_instance_timeout = 0;
2672 }
2673
2674 list_del(&adv_instance->list);
2675 kfree(adv_instance);
2676
2677 hdev->adv_instance_cnt--;
2678
2679 return 0;
2680}
2681
2682/* This function requires the caller holds hdev->lock */
2683void hci_adv_instances_clear(struct hci_dev *hdev)
2684{
2685 struct adv_info *adv_instance, *n;
2686
2687 if (hdev->adv_instance_timeout) {
2688 cancel_delayed_work(&hdev->adv_instance_expire);
2689 hdev->adv_instance_timeout = 0;
2690 }
2691
2692 list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) {
2693 list_del(&adv_instance->list);
2694 kfree(adv_instance);
2695 }
2696
2697 hdev->adv_instance_cnt = 0;
2698}
2699
2700/* This function requires the caller holds hdev->lock */
2701int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags,
2702 u16 adv_data_len, u8 *adv_data,
2703 u16 scan_rsp_len, u8 *scan_rsp_data,
2704 u16 timeout, u16 duration)
2705{
2706 struct adv_info *adv_instance;
2707
2708 adv_instance = hci_find_adv_instance(hdev, instance);
2709 if (adv_instance) {
2710 memset(adv_instance->adv_data, 0,
2711 sizeof(adv_instance->adv_data));
2712 memset(adv_instance->scan_rsp_data, 0,
2713 sizeof(adv_instance->scan_rsp_data));
2714 } else {
2715 if (hdev->adv_instance_cnt >= HCI_MAX_ADV_INSTANCES ||
2716 instance < 1 || instance > HCI_MAX_ADV_INSTANCES)
2717 return -EOVERFLOW;
2718
2719 adv_instance = kzalloc(sizeof(*adv_instance), GFP_KERNEL);
2720 if (!adv_instance)
2721 return -ENOMEM;
2722
2723 adv_instance->pending = true;
2724 adv_instance->instance = instance;
2725 list_add(&adv_instance->list, &hdev->adv_instances);
2726 hdev->adv_instance_cnt++;
2727 }
2728
2729 adv_instance->flags = flags;
2730 adv_instance->adv_data_len = adv_data_len;
2731 adv_instance->scan_rsp_len = scan_rsp_len;
2732
2733 if (adv_data_len)
2734 memcpy(adv_instance->adv_data, adv_data, adv_data_len);
2735
2736 if (scan_rsp_len)
2737 memcpy(adv_instance->scan_rsp_data,
2738 scan_rsp_data, scan_rsp_len);
2739
2740 adv_instance->timeout = timeout;
2741 adv_instance->remaining_time = timeout;
2742
2743 if (duration == 0)
2744 adv_instance->duration = HCI_DEFAULT_ADV_DURATION;
2745 else
2746 adv_instance->duration = duration;
2747
2748 BT_DBG("%s for %dMR", hdev->name, instance);
2749
2750 return 0;
2751}
2752
2617struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list, 2753struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list,
2618 bdaddr_t *bdaddr, u8 type) 2754 bdaddr_t *bdaddr, u8 type)
2619{ 2755{
@@ -3019,6 +3155,9 @@ struct hci_dev *hci_alloc_dev(void)
3019 hdev->manufacturer = 0xffff; /* Default to internal use */ 3155 hdev->manufacturer = 0xffff; /* Default to internal use */
3020 hdev->inq_tx_power = HCI_TX_POWER_INVALID; 3156 hdev->inq_tx_power = HCI_TX_POWER_INVALID;
3021 hdev->adv_tx_power = HCI_TX_POWER_INVALID; 3157 hdev->adv_tx_power = HCI_TX_POWER_INVALID;
3158 hdev->adv_instance_cnt = 0;
3159 hdev->cur_adv_instance = 0x00;
3160 hdev->adv_instance_timeout = 0;
3022 3161
3023 hdev->sniff_max_interval = 800; 3162 hdev->sniff_max_interval = 800;
3024 hdev->sniff_min_interval = 80; 3163 hdev->sniff_min_interval = 80;
@@ -3060,6 +3199,7 @@ struct hci_dev *hci_alloc_dev(void)
3060 INIT_LIST_HEAD(&hdev->pend_le_conns); 3199 INIT_LIST_HEAD(&hdev->pend_le_conns);
3061 INIT_LIST_HEAD(&hdev->pend_le_reports); 3200 INIT_LIST_HEAD(&hdev->pend_le_reports);
3062 INIT_LIST_HEAD(&hdev->conn_hash.list); 3201 INIT_LIST_HEAD(&hdev->conn_hash.list);
3202 INIT_LIST_HEAD(&hdev->adv_instances);
3063 3203
3064 INIT_WORK(&hdev->rx_work, hci_rx_work); 3204 INIT_WORK(&hdev->rx_work, hci_rx_work);
3065 INIT_WORK(&hdev->cmd_work, hci_cmd_work); 3205 INIT_WORK(&hdev->cmd_work, hci_cmd_work);
@@ -3071,6 +3211,7 @@ struct hci_dev *hci_alloc_dev(void)
3071 INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); 3211 INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off);
3072 INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); 3212 INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work);
3073 INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); 3213 INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work);
3214 INIT_DELAYED_WORK(&hdev->adv_instance_expire, hci_adv_timeout_expire);
3074 3215
3075 skb_queue_head_init(&hdev->rx_q); 3216 skb_queue_head_init(&hdev->rx_q);
3076 skb_queue_head_init(&hdev->cmd_q); 3217 skb_queue_head_init(&hdev->cmd_q);
@@ -3082,7 +3223,6 @@ struct hci_dev *hci_alloc_dev(void)
3082 3223
3083 hci_init_sysfs(hdev); 3224 hci_init_sysfs(hdev);
3084 discovery_init(hdev); 3225 discovery_init(hdev);
3085 adv_info_init(hdev);
3086 3226
3087 return hdev; 3227 return hdev;
3088} 3228}
@@ -3253,6 +3393,7 @@ void hci_unregister_dev(struct hci_dev *hdev)
3253 hci_smp_ltks_clear(hdev); 3393 hci_smp_ltks_clear(hdev);
3254 hci_smp_irks_clear(hdev); 3394 hci_smp_irks_clear(hdev);
3255 hci_remote_oob_data_clear(hdev); 3395 hci_remote_oob_data_clear(hdev);
3396 hci_adv_instances_clear(hdev);
3256 hci_bdaddr_list_clear(&hdev->le_white_list); 3397 hci_bdaddr_list_clear(&hdev->le_white_list);
3257 hci_conn_params_clear_all(hdev); 3398 hci_conn_params_clear_all(hdev);
3258 hci_discovery_filter_clear(hdev); 3399 hci_discovery_filter_clear(hdev);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 7b61be73650f..32363c2b7f83 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2603,6 +2603,63 @@ unlock:
2603 hci_dev_unlock(hdev); 2603 hci_dev_unlock(hdev);
2604} 2604}
2605 2605
2606static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status,
2607 u16 opcode, struct sk_buff *skb)
2608{
2609 const struct hci_rp_read_enc_key_size *rp;
2610 struct hci_conn *conn;
2611 u16 handle;
2612
2613 BT_DBG("%s status 0x%02x", hdev->name, status);
2614
2615 if (!skb || skb->len < sizeof(*rp)) {
2616 BT_ERR("%s invalid HCI Read Encryption Key Size response",
2617 hdev->name);
2618 return;
2619 }
2620
2621 rp = (void *)skb->data;
2622 handle = le16_to_cpu(rp->handle);
2623
2624 hci_dev_lock(hdev);
2625
2626 conn = hci_conn_hash_lookup_handle(hdev, handle);
2627 if (!conn)
2628 goto unlock;
2629
2630 /* If we fail to read the encryption key size, assume maximum
2631 * (which is the same we do also when this HCI command isn't
2632 * supported.
2633 */
2634 if (rp->status) {
2635 BT_ERR("%s failed to read key size for handle %u", hdev->name,
2636 handle);
2637 conn->enc_key_size = HCI_LINK_KEY_SIZE;
2638 } else {
2639 conn->enc_key_size = rp->key_size;
2640 }
2641
2642 if (conn->state == BT_CONFIG) {
2643 conn->state = BT_CONNECTED;
2644 hci_connect_cfm(conn, 0);
2645 hci_conn_drop(conn);
2646 } else {
2647 u8 encrypt;
2648
2649 if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
2650 encrypt = 0x00;
2651 else if (test_bit(HCI_CONN_AES_CCM, &conn->flags))
2652 encrypt = 0x02;
2653 else
2654 encrypt = 0x01;
2655
2656 hci_encrypt_cfm(conn, 0, encrypt);
2657 }
2658
2659unlock:
2660 hci_dev_unlock(hdev);
2661}
2662
2606static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) 2663static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
2607{ 2664{
2608 struct hci_ev_encrypt_change *ev = (void *) skb->data; 2665 struct hci_ev_encrypt_change *ev = (void *) skb->data;
@@ -2650,22 +2707,51 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
2650 goto unlock; 2707 goto unlock;
2651 } 2708 }
2652 2709
2653 if (conn->state == BT_CONFIG) { 2710 /* In Secure Connections Only mode, do not allow any connections
2654 if (!ev->status) 2711 * that are not encrypted with AES-CCM using a P-256 authenticated
2655 conn->state = BT_CONNECTED; 2712 * combination key.
2713 */
2714 if (hci_dev_test_flag(hdev, HCI_SC_ONLY) &&
2715 (!test_bit(HCI_CONN_AES_CCM, &conn->flags) ||
2716 conn->key_type != HCI_LK_AUTH_COMBINATION_P256)) {
2717 hci_connect_cfm(conn, HCI_ERROR_AUTH_FAILURE);
2718 hci_conn_drop(conn);
2719 goto unlock;
2720 }
2721
2722 /* Try reading the encryption key size for encrypted ACL links */
2723 if (!ev->status && ev->encrypt && conn->type == ACL_LINK) {
2724 struct hci_cp_read_enc_key_size cp;
2725 struct hci_request req;
2656 2726
2657 /* In Secure Connections Only mode, do not allow any 2727 /* Only send HCI_Read_Encryption_Key_Size if the
2658 * connections that are not encrypted with AES-CCM 2728 * controller really supports it. If it doesn't, assume
2659 * using a P-256 authenticated combination key. 2729 * the default size (16).
2660 */ 2730 */
2661 if (hci_dev_test_flag(hdev, HCI_SC_ONLY) && 2731 if (!(hdev->commands[20] & 0x10)) {
2662 (!test_bit(HCI_CONN_AES_CCM, &conn->flags) || 2732 conn->enc_key_size = HCI_LINK_KEY_SIZE;
2663 conn->key_type != HCI_LK_AUTH_COMBINATION_P256)) { 2733 goto notify;
2664 hci_connect_cfm(conn, HCI_ERROR_AUTH_FAILURE);
2665 hci_conn_drop(conn);
2666 goto unlock;
2667 } 2734 }
2668 2735
2736 hci_req_init(&req, hdev);
2737
2738 cp.handle = cpu_to_le16(conn->handle);
2739 hci_req_add(&req, HCI_OP_READ_ENC_KEY_SIZE, sizeof(cp), &cp);
2740
2741 if (hci_req_run_skb(&req, read_enc_key_size_complete)) {
2742 BT_ERR("Sending HCI Read Encryption Key Size failed");
2743 conn->enc_key_size = HCI_LINK_KEY_SIZE;
2744 goto notify;
2745 }
2746
2747 goto unlock;
2748 }
2749
2750notify:
2751 if (conn->state == BT_CONFIG) {
2752 if (!ev->status)
2753 conn->state = BT_CONNECTED;
2754
2669 hci_connect_cfm(conn, ev->status); 2755 hci_connect_cfm(conn, ev->status);
2670 hci_conn_drop(conn); 2756 hci_conn_drop(conn);
2671 } else 2757 } else
@@ -4955,7 +5041,8 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
4955 goto not_found; 5041 goto not_found;
4956 } 5042 }
4957 5043
4958 memcpy(cp.ltk, ltk->val, sizeof(ltk->val)); 5044 memcpy(cp.ltk, ltk->val, ltk->enc_size);
5045 memset(cp.ltk + ltk->enc_size, 0, sizeof(cp.ltk) - ltk->enc_size);
4959 cp.handle = cpu_to_le16(conn->handle); 5046 cp.handle = cpu_to_le16(conn->handle);
4960 5047
4961 conn->pending_sec_level = smp_ltk_sec_level(ltk); 5048 conn->pending_sec_level = smp_ltk_sec_level(ltk);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 56f9edbf3d05..f2d30d1156c9 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -503,9 +503,9 @@ static int hci_sock_release(struct socket *sock)
503 503
504 if (hdev) { 504 if (hdev) {
505 if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { 505 if (hci_pi(sk)->channel == HCI_CHANNEL_USER) {
506 mgmt_index_added(hdev);
507 hci_dev_clear_flag(hdev, HCI_USER_CHANNEL);
508 hci_dev_close(hdev->id); 506 hci_dev_close(hdev->id);
507 hci_dev_clear_flag(hdev, HCI_USER_CHANNEL);
508 mgmt_index_added(hdev);
509 } 509 }
510 510
511 atomic_dec(&hdev->promisc); 511 atomic_dec(&hdev->promisc);
@@ -741,10 +741,11 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
741 goto done; 741 goto done;
742 } 742 }
743 743
744 if (test_bit(HCI_UP, &hdev->flags) || 744 if (test_bit(HCI_INIT, &hdev->flags) ||
745 test_bit(HCI_INIT, &hdev->flags) ||
746 hci_dev_test_flag(hdev, HCI_SETUP) || 745 hci_dev_test_flag(hdev, HCI_SETUP) ||
747 hci_dev_test_flag(hdev, HCI_CONFIG)) { 746 hci_dev_test_flag(hdev, HCI_CONFIG) ||
747 (!hci_dev_test_flag(hdev, HCI_AUTO_OFF) &&
748 test_bit(HCI_UP, &hdev->flags))) {
748 err = -EBUSY; 749 err = -EBUSY;
749 hci_dev_put(hdev); 750 hci_dev_put(hdev);
750 goto done; 751 goto done;
@@ -760,10 +761,21 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
760 761
761 err = hci_dev_open(hdev->id); 762 err = hci_dev_open(hdev->id);
762 if (err) { 763 if (err) {
763 hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); 764 if (err == -EALREADY) {
764 mgmt_index_added(hdev); 765 /* In case the transport is already up and
765 hci_dev_put(hdev); 766 * running, clear the error here.
766 goto done; 767 *
768 * This can happen when opening an user
769 * channel and HCI_AUTO_OFF grace period
770 * is still active.
771 */
772 err = 0;
773 } else {
774 hci_dev_clear_flag(hdev, HCI_USER_CHANNEL);
775 mgmt_index_added(hdev);
776 hci_dev_put(hdev);
777 goto done;
778 }
767 } 779 }
768 780
769 atomic_inc(&hdev->promisc); 781 atomic_inc(&hdev->promisc);
@@ -1377,7 +1389,7 @@ static int hci_sock_create(struct net *net, struct socket *sock, int protocol,
1377 1389
1378 sock->ops = &hci_sock_ops; 1390 sock->ops = &hci_sock_ops;
1379 1391
1380 sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto); 1392 sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, kern);
1381 if (!sk) 1393 if (!sk)
1382 return -ENOMEM; 1394 return -ENOMEM;
1383 1395
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 9070dfd6b4ad..f1a117f8cad2 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -915,6 +915,7 @@ static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr,
915 session->conn = l2cap_conn_get(conn); 915 session->conn = l2cap_conn_get(conn);
916 session->user.probe = hidp_session_probe; 916 session->user.probe = hidp_session_probe;
917 session->user.remove = hidp_session_remove; 917 session->user.remove = hidp_session_remove;
918 INIT_LIST_HEAD(&session->user.list);
918 session->ctrl_sock = ctrl_sock; 919 session->ctrl_sock = ctrl_sock;
919 session->intr_sock = intr_sock; 920 session->intr_sock = intr_sock;
920 skb_queue_head_init(&session->ctrl_transmit); 921 skb_queue_head_init(&session->ctrl_transmit);
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index cb3fdde1968a..008ba439bd62 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -235,7 +235,7 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol,
235 if (sock->type != SOCK_RAW) 235 if (sock->type != SOCK_RAW)
236 return -ESOCKTNOSUPPORT; 236 return -ESOCKTNOSUPPORT;
237 237
238 sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto); 238 sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, kern);
239 if (!sk) 239 if (!sk)
240 return -ENOMEM; 240 return -ENOMEM;
241 241
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index dad419782a12..45fffa413642 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1601,7 +1601,7 @@ int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user)
1601 1601
1602 hci_dev_lock(hdev); 1602 hci_dev_lock(hdev);
1603 1603
1604 if (user->list.next || user->list.prev) { 1604 if (!list_empty(&user->list)) {
1605 ret = -EINVAL; 1605 ret = -EINVAL;
1606 goto out_unlock; 1606 goto out_unlock;
1607 } 1607 }
@@ -1631,12 +1631,10 @@ void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user)
1631 1631
1632 hci_dev_lock(hdev); 1632 hci_dev_lock(hdev);
1633 1633
1634 if (!user->list.next || !user->list.prev) 1634 if (list_empty(&user->list))
1635 goto out_unlock; 1635 goto out_unlock;
1636 1636
1637 list_del(&user->list); 1637 list_del_init(&user->list);
1638 user->list.next = NULL;
1639 user->list.prev = NULL;
1640 user->remove(conn, user); 1638 user->remove(conn, user);
1641 1639
1642out_unlock: 1640out_unlock:
@@ -1650,9 +1648,7 @@ static void l2cap_unregister_all_users(struct l2cap_conn *conn)
1650 1648
1651 while (!list_empty(&conn->users)) { 1649 while (!list_empty(&conn->users)) {
1652 user = list_first_entry(&conn->users, struct l2cap_user, list); 1650 user = list_first_entry(&conn->users, struct l2cap_user, list);
1653 list_del(&user->list); 1651 list_del_init(&user->list);
1654 user->list.next = NULL;
1655 user->list.prev = NULL;
1656 user->remove(conn, user); 1652 user->remove(conn, user);
1657 } 1653 }
1658} 1654}
@@ -7442,7 +7438,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
7442 mutex_unlock(&conn->chan_lock); 7438 mutex_unlock(&conn->chan_lock);
7443} 7439}
7444 7440
7445int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) 7441void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
7446{ 7442{
7447 struct l2cap_conn *conn = hcon->l2cap_data; 7443 struct l2cap_conn *conn = hcon->l2cap_data;
7448 struct l2cap_hdr *hdr; 7444 struct l2cap_hdr *hdr;
@@ -7485,7 +7481,7 @@ int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
7485 if (len == skb->len) { 7481 if (len == skb->len) {
7486 /* Complete frame received */ 7482 /* Complete frame received */
7487 l2cap_recv_frame(conn, skb); 7483 l2cap_recv_frame(conn, skb);
7488 return 0; 7484 return;
7489 } 7485 }
7490 7486
7491 BT_DBG("Start: total len %d, frag len %d", len, skb->len); 7487 BT_DBG("Start: total len %d, frag len %d", len, skb->len);
@@ -7544,7 +7540,6 @@ int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
7544 7540
7545drop: 7541drop:
7546 kfree_skb(skb); 7542 kfree_skb(skb);
7547 return 0;
7548} 7543}
7549 7544
7550static struct hci_cb l2cap_cb = { 7545static struct hci_cb l2cap_cb = {
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index a7278f05eafb..244287706f91 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -43,7 +43,7 @@ static struct bt_sock_list l2cap_sk_list = {
43static const struct proto_ops l2cap_sock_ops; 43static const struct proto_ops l2cap_sock_ops;
44static void l2cap_sock_init(struct sock *sk, struct sock *parent); 44static void l2cap_sock_init(struct sock *sk, struct sock *parent);
45static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, 45static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
46 int proto, gfp_t prio); 46 int proto, gfp_t prio, int kern);
47 47
48bool l2cap_is_socket(struct socket *sock) 48bool l2cap_is_socket(struct socket *sock)
49{ 49{
@@ -1193,7 +1193,7 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan)
1193 } 1193 }
1194 1194
1195 sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, 1195 sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP,
1196 GFP_ATOMIC); 1196 GFP_ATOMIC, 0);
1197 if (!sk) { 1197 if (!sk) {
1198 release_sock(parent); 1198 release_sock(parent);
1199 return NULL; 1199 return NULL;
@@ -1523,12 +1523,12 @@ static struct proto l2cap_proto = {
1523}; 1523};
1524 1524
1525static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, 1525static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
1526 int proto, gfp_t prio) 1526 int proto, gfp_t prio, int kern)
1527{ 1527{
1528 struct sock *sk; 1528 struct sock *sk;
1529 struct l2cap_chan *chan; 1529 struct l2cap_chan *chan;
1530 1530
1531 sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto); 1531 sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, kern);
1532 if (!sk) 1532 if (!sk)
1533 return NULL; 1533 return NULL;
1534 1534
@@ -1574,7 +1574,7 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol,
1574 1574
1575 sock->ops = &l2cap_sock_ops; 1575 sock->ops = &l2cap_sock_ops;
1576 1576
1577 sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC); 1577 sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern);
1578 if (!sk) 1578 if (!sk)
1579 return -ENOMEM; 1579 return -ENOMEM;
1580 1580
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 7fd87e7135b5..7998fb279165 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -38,7 +38,7 @@
38#include "mgmt_util.h" 38#include "mgmt_util.h"
39 39
40#define MGMT_VERSION 1 40#define MGMT_VERSION 1
41#define MGMT_REVISION 9 41#define MGMT_REVISION 10
42 42
43static const u16 mgmt_commands[] = { 43static const u16 mgmt_commands[] = {
44 MGMT_OP_READ_INDEX_LIST, 44 MGMT_OP_READ_INDEX_LIST,
@@ -832,6 +832,20 @@ static struct mgmt_pending_cmd *pending_find_data(u16 opcode,
832 return mgmt_pending_find_data(HCI_CHANNEL_CONTROL, opcode, hdev, data); 832 return mgmt_pending_find_data(HCI_CHANNEL_CONTROL, opcode, hdev, data);
833} 833}
834 834
835static u8 get_current_adv_instance(struct hci_dev *hdev)
836{
837 /* The "Set Advertising" setting supersedes the "Add Advertising"
838 * setting. Here we set the advertising data based on which
839 * setting was set. When neither apply, default to the global settings,
840 * represented by instance "0".
841 */
842 if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) &&
843 !hci_dev_test_flag(hdev, HCI_ADVERTISING))
844 return hdev->cur_adv_instance;
845
846 return 0x00;
847}
848
835static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) 849static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr)
836{ 850{
837 u8 ad_len = 0; 851 u8 ad_len = 0;
@@ -858,19 +872,25 @@ static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr)
858 return ad_len; 872 return ad_len;
859} 873}
860 874
861static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) 875static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance,
876 u8 *ptr)
862{ 877{
878 struct adv_info *adv_instance;
879
880 adv_instance = hci_find_adv_instance(hdev, instance);
881 if (!adv_instance)
882 return 0;
883
863 /* TODO: Set the appropriate entries based on advertising instance flags 884 /* TODO: Set the appropriate entries based on advertising instance flags
864 * here once flags other than 0 are supported. 885 * here once flags other than 0 are supported.
865 */ 886 */
866 memcpy(ptr, hdev->adv_instance.scan_rsp_data, 887 memcpy(ptr, adv_instance->scan_rsp_data,
867 hdev->adv_instance.scan_rsp_len); 888 adv_instance->scan_rsp_len);
868 889
869 return hdev->adv_instance.scan_rsp_len; 890 return adv_instance->scan_rsp_len;
870} 891}
871 892
872static void update_scan_rsp_data_for_instance(struct hci_request *req, 893static void update_inst_scan_rsp_data(struct hci_request *req, u8 instance)
873 u8 instance)
874{ 894{
875 struct hci_dev *hdev = req->hdev; 895 struct hci_dev *hdev = req->hdev;
876 struct hci_cp_le_set_scan_rsp_data cp; 896 struct hci_cp_le_set_scan_rsp_data cp;
@@ -882,7 +902,7 @@ static void update_scan_rsp_data_for_instance(struct hci_request *req,
882 memset(&cp, 0, sizeof(cp)); 902 memset(&cp, 0, sizeof(cp));
883 903
884 if (instance) 904 if (instance)
885 len = create_instance_scan_rsp_data(hdev, cp.data); 905 len = create_instance_scan_rsp_data(hdev, instance, cp.data);
886 else 906 else
887 len = create_default_scan_rsp_data(hdev, cp.data); 907 len = create_default_scan_rsp_data(hdev, cp.data);
888 908
@@ -900,21 +920,7 @@ static void update_scan_rsp_data_for_instance(struct hci_request *req,
900 920
901static void update_scan_rsp_data(struct hci_request *req) 921static void update_scan_rsp_data(struct hci_request *req)
902{ 922{
903 struct hci_dev *hdev = req->hdev; 923 update_inst_scan_rsp_data(req, get_current_adv_instance(req->hdev));
904 u8 instance;
905
906 /* The "Set Advertising" setting supersedes the "Add Advertising"
907 * setting. Here we set the scan response data based on which
908 * setting was set. When neither apply, default to the global settings,
909 * represented by instance "0".
910 */
911 if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) &&
912 !hci_dev_test_flag(hdev, HCI_ADVERTISING))
913 instance = 0x01;
914 else
915 instance = 0x00;
916
917 update_scan_rsp_data_for_instance(req, instance);
918} 924}
919 925
920static u8 get_adv_discov_flags(struct hci_dev *hdev) 926static u8 get_adv_discov_flags(struct hci_dev *hdev)
@@ -941,20 +947,6 @@ static u8 get_adv_discov_flags(struct hci_dev *hdev)
941 return 0; 947 return 0;
942} 948}
943 949
944static u8 get_current_adv_instance(struct hci_dev *hdev)
945{
946 /* The "Set Advertising" setting supersedes the "Add Advertising"
947 * setting. Here we set the advertising data based on which
948 * setting was set. When neither apply, default to the global settings,
949 * represented by instance "0".
950 */
951 if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) &&
952 !hci_dev_test_flag(hdev, HCI_ADVERTISING))
953 return 0x01;
954
955 return 0x00;
956}
957
958static bool get_connectable(struct hci_dev *hdev) 950static bool get_connectable(struct hci_dev *hdev)
959{ 951{
960 struct mgmt_pending_cmd *cmd; 952 struct mgmt_pending_cmd *cmd;
@@ -975,41 +967,65 @@ static bool get_connectable(struct hci_dev *hdev)
975static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance) 967static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance)
976{ 968{
977 u32 flags; 969 u32 flags;
970 struct adv_info *adv_instance;
978 971
979 if (instance > 0x01) 972 if (instance == 0x00) {
980 return 0; 973 /* Instance 0 always manages the "Tx Power" and "Flags"
974 * fields
975 */
976 flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS;
981 977
982 if (instance == 0x01) 978 /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting
983 return hdev->adv_instance.flags; 979 * corresponds to the "connectable" instance flag.
980 */
981 if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE))
982 flags |= MGMT_ADV_FLAG_CONNECTABLE;
984 983
985 /* Instance 0 always manages the "Tx Power" and "Flags" fields */ 984 return flags;
986 flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS; 985 }
987 986
988 /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting corresponds 987 adv_instance = hci_find_adv_instance(hdev, instance);
989 * to the "connectable" instance flag.
990 */
991 if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE))
992 flags |= MGMT_ADV_FLAG_CONNECTABLE;
993 988
994 return flags; 989 /* Return 0 when we got an invalid instance identifier. */
990 if (!adv_instance)
991 return 0;
992
993 return adv_instance->flags;
995} 994}
996 995
997static u8 get_adv_instance_scan_rsp_len(struct hci_dev *hdev, u8 instance) 996static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev)
998{ 997{
999 /* Ignore instance 0 and other unsupported instances */ 998 u8 instance = get_current_adv_instance(hdev);
1000 if (instance != 0x01) 999 struct adv_info *adv_instance;
1000
1001 /* Ignore instance 0 */
1002 if (instance == 0x00)
1003 return 0;
1004
1005 adv_instance = hci_find_adv_instance(hdev, instance);
1006 if (!adv_instance)
1001 return 0; 1007 return 0;
1002 1008
1003 /* TODO: Take into account the "appearance" and "local-name" flags here. 1009 /* TODO: Take into account the "appearance" and "local-name" flags here.
1004 * These are currently being ignored as they are not supported. 1010 * These are currently being ignored as they are not supported.
1005 */ 1011 */
1006 return hdev->adv_instance.scan_rsp_len; 1012 return adv_instance->scan_rsp_len;
1007} 1013}
1008 1014
1009static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) 1015static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
1010{ 1016{
1017 struct adv_info *adv_instance = NULL;
1011 u8 ad_len = 0, flags = 0; 1018 u8 ad_len = 0, flags = 0;
1012 u32 instance_flags = get_adv_instance_flags(hdev, instance); 1019 u32 instance_flags;
1020
1021 /* Return 0 when the current instance identifier is invalid. */
1022 if (instance) {
1023 adv_instance = hci_find_adv_instance(hdev, instance);
1024 if (!adv_instance)
1025 return 0;
1026 }
1027
1028 instance_flags = get_adv_instance_flags(hdev, instance);
1013 1029
1014 /* The Add Advertising command allows userspace to set both the general 1030 /* The Add Advertising command allows userspace to set both the general
1015 * and limited discoverable flags. 1031 * and limited discoverable flags.
@@ -1043,12 +1059,11 @@ static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
1043 } 1059 }
1044 } 1060 }
1045 1061
1046 if (instance) { 1062 if (adv_instance) {
1047 memcpy(ptr, hdev->adv_instance.adv_data, 1063 memcpy(ptr, adv_instance->adv_data,
1048 hdev->adv_instance.adv_data_len); 1064 adv_instance->adv_data_len);
1049 1065 ad_len += adv_instance->adv_data_len;
1050 ad_len += hdev->adv_instance.adv_data_len; 1066 ptr += adv_instance->adv_data_len;
1051 ptr += hdev->adv_instance.adv_data_len;
1052 } 1067 }
1053 1068
1054 /* Provide Tx Power only if we can provide a valid value for it */ 1069 /* Provide Tx Power only if we can provide a valid value for it */
@@ -1065,7 +1080,7 @@ static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
1065 return ad_len; 1080 return ad_len;
1066} 1081}
1067 1082
1068static void update_adv_data_for_instance(struct hci_request *req, u8 instance) 1083static void update_inst_adv_data(struct hci_request *req, u8 instance)
1069{ 1084{
1070 struct hci_dev *hdev = req->hdev; 1085 struct hci_dev *hdev = req->hdev;
1071 struct hci_cp_le_set_adv_data cp; 1086 struct hci_cp_le_set_adv_data cp;
@@ -1093,10 +1108,7 @@ static void update_adv_data_for_instance(struct hci_request *req, u8 instance)
1093 1108
1094static void update_adv_data(struct hci_request *req) 1109static void update_adv_data(struct hci_request *req)
1095{ 1110{
1096 struct hci_dev *hdev = req->hdev; 1111 update_inst_adv_data(req, get_current_adv_instance(req->hdev));
1097 u8 instance = get_current_adv_instance(hdev);
1098
1099 update_adv_data_for_instance(req, instance);
1100} 1112}
1101 1113
1102int mgmt_update_adv_data(struct hci_dev *hdev) 1114int mgmt_update_adv_data(struct hci_dev *hdev)
@@ -1277,7 +1289,7 @@ static void enable_advertising(struct hci_request *req)
1277 1289
1278 if (connectable) 1290 if (connectable)
1279 cp.type = LE_ADV_IND; 1291 cp.type = LE_ADV_IND;
1280 else if (get_adv_instance_scan_rsp_len(hdev, instance)) 1292 else if (get_cur_adv_instance_scan_rsp_len(hdev))
1281 cp.type = LE_ADV_SCAN_IND; 1293 cp.type = LE_ADV_SCAN_IND;
1282 else 1294 else
1283 cp.type = LE_ADV_NONCONN_IND; 1295 cp.type = LE_ADV_NONCONN_IND;
@@ -1459,27 +1471,141 @@ static void advertising_removed(struct sock *sk, struct hci_dev *hdev,
1459 mgmt_event(MGMT_EV_ADVERTISING_REMOVED, hdev, &ev, sizeof(ev), sk); 1471 mgmt_event(MGMT_EV_ADVERTISING_REMOVED, hdev, &ev, sizeof(ev), sk);
1460} 1472}
1461 1473
1462static void clear_adv_instance(struct hci_dev *hdev) 1474static int schedule_adv_instance(struct hci_request *req, u8 instance,
1475 bool force) {
1476 struct hci_dev *hdev = req->hdev;
1477 struct adv_info *adv_instance = NULL;
1478 u16 timeout;
1479
1480 if (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
1481 !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))
1482 return -EPERM;
1483
1484 if (hdev->adv_instance_timeout)
1485 return -EBUSY;
1486
1487 adv_instance = hci_find_adv_instance(hdev, instance);
1488 if (!adv_instance)
1489 return -ENOENT;
1490
1491 /* A zero timeout means unlimited advertising. As long as there is
1492 * only one instance, duration should be ignored. We still set a timeout
1493 * in case further instances are being added later on.
1494 *
1495 * If the remaining lifetime of the instance is more than the duration
1496 * then the timeout corresponds to the duration, otherwise it will be
1497 * reduced to the remaining instance lifetime.
1498 */
1499 if (adv_instance->timeout == 0 ||
1500 adv_instance->duration <= adv_instance->remaining_time)
1501 timeout = adv_instance->duration;
1502 else
1503 timeout = adv_instance->remaining_time;
1504
1505 /* The remaining time is being reduced unless the instance is being
1506 * advertised without time limit.
1507 */
1508 if (adv_instance->timeout)
1509 adv_instance->remaining_time =
1510 adv_instance->remaining_time - timeout;
1511
1512 hdev->adv_instance_timeout = timeout;
1513 queue_delayed_work(hdev->workqueue,
1514 &hdev->adv_instance_expire,
1515 msecs_to_jiffies(timeout * 1000));
1516
1517 /* If we're just re-scheduling the same instance again then do not
1518 * execute any HCI commands. This happens when a single instance is
1519 * being advertised.
1520 */
1521 if (!force && hdev->cur_adv_instance == instance &&
1522 hci_dev_test_flag(hdev, HCI_LE_ADV))
1523 return 0;
1524
1525 hdev->cur_adv_instance = instance;
1526 update_adv_data(req);
1527 update_scan_rsp_data(req);
1528 enable_advertising(req);
1529
1530 return 0;
1531}
1532
1533static void cancel_adv_timeout(struct hci_dev *hdev)
1463{ 1534{
1464 struct hci_request req; 1535 if (hdev->adv_instance_timeout) {
1536 hdev->adv_instance_timeout = 0;
1537 cancel_delayed_work(&hdev->adv_instance_expire);
1538 }
1539}
1465 1540
1466 if (!hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) 1541/* For a single instance:
1467 return; 1542 * - force == true: The instance will be removed even when its remaining
1543 * lifetime is not zero.
1544 * - force == false: the instance will be deactivated but kept stored unless
1545 * the remaining lifetime is zero.
1546 *
1547 * For instance == 0x00:
1548 * - force == true: All instances will be removed regardless of their timeout
1549 * setting.
1550 * - force == false: Only instances that have a timeout will be removed.
1551 */
1552static void clear_adv_instance(struct hci_dev *hdev, struct hci_request *req,
1553 u8 instance, bool force)
1554{
1555 struct adv_info *adv_instance, *n, *next_instance = NULL;
1556 int err;
1557 u8 rem_inst;
1468 1558
1469 if (hdev->adv_instance.timeout) 1559 /* Cancel any timeout concerning the removed instance(s). */
1470 cancel_delayed_work(&hdev->adv_instance.timeout_exp); 1560 if (!instance || hdev->cur_adv_instance == instance)
1561 cancel_adv_timeout(hdev);
1471 1562
1472 memset(&hdev->adv_instance, 0, sizeof(hdev->adv_instance)); 1563 /* Get the next instance to advertise BEFORE we remove
1473 advertising_removed(NULL, hdev, 1); 1564 * the current one. This can be the same instance again
1474 hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); 1565 * if there is only one instance.
1566 */
1567 if (instance && hdev->cur_adv_instance == instance)
1568 next_instance = hci_get_next_instance(hdev, instance);
1475 1569
1476 if (!hdev_is_powered(hdev) || 1570 if (instance == 0x00) {
1571 list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances,
1572 list) {
1573 if (!(force || adv_instance->timeout))
1574 continue;
1575
1576 rem_inst = adv_instance->instance;
1577 err = hci_remove_adv_instance(hdev, rem_inst);
1578 if (!err)
1579 advertising_removed(NULL, hdev, rem_inst);
1580 }
1581 hdev->cur_adv_instance = 0x00;
1582 } else {
1583 adv_instance = hci_find_adv_instance(hdev, instance);
1584
1585 if (force || (adv_instance && adv_instance->timeout &&
1586 !adv_instance->remaining_time)) {
1587 /* Don't advertise a removed instance. */
1588 if (next_instance &&
1589 next_instance->instance == instance)
1590 next_instance = NULL;
1591
1592 err = hci_remove_adv_instance(hdev, instance);
1593 if (!err)
1594 advertising_removed(NULL, hdev, instance);
1595 }
1596 }
1597
1598 if (list_empty(&hdev->adv_instances)) {
1599 hdev->cur_adv_instance = 0x00;
1600 hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE);
1601 }
1602
1603 if (!req || !hdev_is_powered(hdev) ||
1477 hci_dev_test_flag(hdev, HCI_ADVERTISING)) 1604 hci_dev_test_flag(hdev, HCI_ADVERTISING))
1478 return; 1605 return;
1479 1606
1480 hci_req_init(&req, hdev); 1607 if (next_instance)
1481 disable_advertising(&req); 1608 schedule_adv_instance(req, next_instance->instance, false);
1482 hci_req_run(&req, NULL);
1483} 1609}
1484 1610
1485static int clean_up_hci_state(struct hci_dev *hdev) 1611static int clean_up_hci_state(struct hci_dev *hdev)
@@ -1497,8 +1623,7 @@ static int clean_up_hci_state(struct hci_dev *hdev)
1497 hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); 1623 hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
1498 } 1624 }
1499 1625
1500 if (hdev->adv_instance.timeout) 1626 clear_adv_instance(hdev, NULL, 0x00, false);
1501 clear_adv_instance(hdev);
1502 1627
1503 if (hci_dev_test_flag(hdev, HCI_LE_ADV)) 1628 if (hci_dev_test_flag(hdev, HCI_LE_ADV))
1504 disable_advertising(&req); 1629 disable_advertising(&req);
@@ -2453,6 +2578,9 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
2453 val = !!cp->val; 2578 val = !!cp->val;
2454 enabled = lmp_host_le_capable(hdev); 2579 enabled = lmp_host_le_capable(hdev);
2455 2580
2581 if (!val)
2582 clear_adv_instance(hdev, NULL, 0x00, true);
2583
2456 if (!hdev_is_powered(hdev) || val == enabled) { 2584 if (!hdev_is_powered(hdev) || val == enabled) {
2457 bool changed = false; 2585 bool changed = false;
2458 2586
@@ -4087,6 +4215,7 @@ static bool trigger_le_scan(struct hci_request *req, u16 interval, u8 *status)
4087 return false; 4215 return false;
4088 } 4216 }
4089 4217
4218 cancel_adv_timeout(hdev);
4090 disable_advertising(req); 4219 disable_advertising(req);
4091 } 4220 }
4092 4221
@@ -4669,6 +4798,9 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status,
4669{ 4798{
4670 struct cmd_lookup match = { NULL, hdev }; 4799 struct cmd_lookup match = { NULL, hdev };
4671 struct hci_request req; 4800 struct hci_request req;
4801 u8 instance;
4802 struct adv_info *adv_instance;
4803 int err;
4672 4804
4673 hci_dev_lock(hdev); 4805 hci_dev_lock(hdev);
4674 4806
@@ -4694,18 +4826,31 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status,
4694 sock_put(match.sk); 4826 sock_put(match.sk);
4695 4827
4696 /* If "Set Advertising" was just disabled and instance advertising was 4828 /* If "Set Advertising" was just disabled and instance advertising was
4697 * set up earlier, then enable the advertising instance. 4829 * set up earlier, then re-enable multi-instance advertising.
4698 */ 4830 */
4699 if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || 4831 if (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
4700 !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) 4832 !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) ||
4833 list_empty(&hdev->adv_instances))
4701 goto unlock; 4834 goto unlock;
4702 4835
4836 instance = hdev->cur_adv_instance;
4837 if (!instance) {
4838 adv_instance = list_first_entry_or_null(&hdev->adv_instances,
4839 struct adv_info, list);
4840 if (!adv_instance)
4841 goto unlock;
4842
4843 instance = adv_instance->instance;
4844 }
4845
4703 hci_req_init(&req, hdev); 4846 hci_req_init(&req, hdev);
4704 4847
4705 update_adv_data(&req); 4848 err = schedule_adv_instance(&req, instance, true);
4706 enable_advertising(&req); 4849
4850 if (!err)
4851 err = hci_req_run(&req, enable_advertising_instance);
4707 4852
4708 if (hci_req_run(&req, enable_advertising_instance) < 0) 4853 if (err)
4709 BT_ERR("Failed to re-configure advertising"); 4854 BT_ERR("Failed to re-configure advertising");
4710 4855
4711unlock: 4856unlock:
@@ -4790,10 +4935,15 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
4790 else 4935 else
4791 hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE); 4936 hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE);
4792 4937
4938 cancel_adv_timeout(hdev);
4939
4793 if (val) { 4940 if (val) {
4794 /* Switch to instance "0" for the Set Advertising setting. */ 4941 /* Switch to instance "0" for the Set Advertising setting.
4795 update_adv_data_for_instance(&req, 0); 4942 * We cannot use update_[adv|scan_rsp]_data() here as the
4796 update_scan_rsp_data_for_instance(&req, 0); 4943 * HCI_ADVERTISING flag is not yet set.
4944 */
4945 update_inst_adv_data(&req, 0x00);
4946 update_inst_scan_rsp_data(&req, 0x00);
4797 enable_advertising(&req); 4947 enable_advertising(&req);
4798 } else { 4948 } else {
4799 disable_advertising(&req); 4949 disable_advertising(&req);
@@ -6781,8 +6931,9 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
6781{ 6931{
6782 struct mgmt_rp_read_adv_features *rp; 6932 struct mgmt_rp_read_adv_features *rp;
6783 size_t rp_len; 6933 size_t rp_len;
6784 int err; 6934 int err, i;
6785 bool instance; 6935 bool instance;
6936 struct adv_info *adv_instance;
6786 u32 supported_flags; 6937 u32 supported_flags;
6787 6938
6788 BT_DBG("%s", hdev->name); 6939 BT_DBG("%s", hdev->name);
@@ -6795,12 +6946,9 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
6795 6946
6796 rp_len = sizeof(*rp); 6947 rp_len = sizeof(*rp);
6797 6948
6798 /* Currently only one instance is supported, so just add 1 to the
6799 * response length.
6800 */
6801 instance = hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE); 6949 instance = hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE);
6802 if (instance) 6950 if (instance)
6803 rp_len++; 6951 rp_len += hdev->adv_instance_cnt;
6804 6952
6805 rp = kmalloc(rp_len, GFP_ATOMIC); 6953 rp = kmalloc(rp_len, GFP_ATOMIC);
6806 if (!rp) { 6954 if (!rp) {
@@ -6813,14 +6961,18 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
6813 rp->supported_flags = cpu_to_le32(supported_flags); 6961 rp->supported_flags = cpu_to_le32(supported_flags);
6814 rp->max_adv_data_len = HCI_MAX_AD_LENGTH; 6962 rp->max_adv_data_len = HCI_MAX_AD_LENGTH;
6815 rp->max_scan_rsp_len = HCI_MAX_AD_LENGTH; 6963 rp->max_scan_rsp_len = HCI_MAX_AD_LENGTH;
6816 rp->max_instances = 1; 6964 rp->max_instances = HCI_MAX_ADV_INSTANCES;
6817 6965
6818 /* Currently only one instance is supported, so simply return the
6819 * current instance number.
6820 */
6821 if (instance) { 6966 if (instance) {
6822 rp->num_instances = 1; 6967 i = 0;
6823 rp->instance[0] = 1; 6968 list_for_each_entry(adv_instance, &hdev->adv_instances, list) {
6969 if (i >= hdev->adv_instance_cnt)
6970 break;
6971
6972 rp->instance[i] = adv_instance->instance;
6973 i++;
6974 }
6975 rp->num_instances = hdev->adv_instance_cnt;
6824 } else { 6976 } else {
6825 rp->num_instances = 0; 6977 rp->num_instances = 0;
6826 } 6978 }
@@ -6882,7 +7034,10 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status,
6882 u16 opcode) 7034 u16 opcode)
6883{ 7035{
6884 struct mgmt_pending_cmd *cmd; 7036 struct mgmt_pending_cmd *cmd;
7037 struct mgmt_cp_add_advertising *cp;
6885 struct mgmt_rp_add_advertising rp; 7038 struct mgmt_rp_add_advertising rp;
7039 struct adv_info *adv_instance, *n;
7040 u8 instance;
6886 7041
6887 BT_DBG("status %d", status); 7042 BT_DBG("status %d", status);
6888 7043
@@ -6890,16 +7045,32 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status,
6890 7045
6891 cmd = pending_find(MGMT_OP_ADD_ADVERTISING, hdev); 7046 cmd = pending_find(MGMT_OP_ADD_ADVERTISING, hdev);
6892 7047
6893 if (status) { 7048 if (status)
6894 hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); 7049 hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE);
6895 memset(&hdev->adv_instance, 0, sizeof(hdev->adv_instance)); 7050
6896 advertising_removed(cmd ? cmd->sk : NULL, hdev, 1); 7051 list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) {
7052 if (!adv_instance->pending)
7053 continue;
7054
7055 if (!status) {
7056 adv_instance->pending = false;
7057 continue;
7058 }
7059
7060 instance = adv_instance->instance;
7061
7062 if (hdev->cur_adv_instance == instance)
7063 cancel_adv_timeout(hdev);
7064
7065 hci_remove_adv_instance(hdev, instance);
7066 advertising_removed(cmd ? cmd->sk : NULL, hdev, instance);
6897 } 7067 }
6898 7068
6899 if (!cmd) 7069 if (!cmd)
6900 goto unlock; 7070 goto unlock;
6901 7071
6902 rp.instance = 0x01; 7072 cp = cmd->param;
7073 rp.instance = cp->instance;
6903 7074
6904 if (status) 7075 if (status)
6905 mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, 7076 mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode,
@@ -6914,15 +7085,28 @@ unlock:
6914 hci_dev_unlock(hdev); 7085 hci_dev_unlock(hdev);
6915} 7086}
6916 7087
6917static void adv_timeout_expired(struct work_struct *work) 7088void mgmt_adv_timeout_expired(struct hci_dev *hdev)
6918{ 7089{
6919 struct hci_dev *hdev = container_of(work, struct hci_dev, 7090 u8 instance;
6920 adv_instance.timeout_exp.work); 7091 struct hci_request req;
7092
7093 hdev->adv_instance_timeout = 0;
6921 7094
6922 hdev->adv_instance.timeout = 0; 7095 instance = get_current_adv_instance(hdev);
7096 if (instance == 0x00)
7097 return;
6923 7098
6924 hci_dev_lock(hdev); 7099 hci_dev_lock(hdev);
6925 clear_adv_instance(hdev); 7100 hci_req_init(&req, hdev);
7101
7102 clear_adv_instance(hdev, &req, instance, false);
7103
7104 if (list_empty(&hdev->adv_instances))
7105 disable_advertising(&req);
7106
7107 if (!skb_queue_empty(&req.cmd_q))
7108 hci_req_run(&req, NULL);
7109
6926 hci_dev_unlock(hdev); 7110 hci_dev_unlock(hdev);
6927} 7111}
6928 7112
@@ -6934,7 +7118,10 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
6934 u32 flags; 7118 u32 flags;
6935 u32 supported_flags; 7119 u32 supported_flags;
6936 u8 status; 7120 u8 status;
6937 u16 timeout; 7121 u16 timeout, duration;
7122 unsigned int prev_instance_cnt = hdev->adv_instance_cnt;
7123 u8 schedule_instance = 0;
7124 struct adv_info *next_instance;
6938 int err; 7125 int err;
6939 struct mgmt_pending_cmd *cmd; 7126 struct mgmt_pending_cmd *cmd;
6940 struct hci_request req; 7127 struct hci_request req;
@@ -6948,12 +7135,13 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
6948 7135
6949 flags = __le32_to_cpu(cp->flags); 7136 flags = __le32_to_cpu(cp->flags);
6950 timeout = __le16_to_cpu(cp->timeout); 7137 timeout = __le16_to_cpu(cp->timeout);
7138 duration = __le16_to_cpu(cp->duration);
6951 7139
6952 /* The current implementation only supports adding one instance and only 7140 /* The current implementation only supports a subset of the specified
6953 * a subset of the specified flags. 7141 * flags.
6954 */ 7142 */
6955 supported_flags = get_supported_adv_flags(hdev); 7143 supported_flags = get_supported_adv_flags(hdev);
6956 if (cp->instance != 0x01 || (flags & ~supported_flags)) 7144 if (flags & ~supported_flags)
6957 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, 7145 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
6958 MGMT_STATUS_INVALID_PARAMS); 7146 MGMT_STATUS_INVALID_PARAMS);
6959 7147
@@ -6981,38 +7169,51 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
6981 goto unlock; 7169 goto unlock;
6982 } 7170 }
6983 7171
6984 INIT_DELAYED_WORK(&hdev->adv_instance.timeout_exp, adv_timeout_expired); 7172 err = hci_add_adv_instance(hdev, cp->instance, flags,
6985 7173 cp->adv_data_len, cp->data,
6986 hdev->adv_instance.flags = flags; 7174 cp->scan_rsp_len,
6987 hdev->adv_instance.adv_data_len = cp->adv_data_len; 7175 cp->data + cp->adv_data_len,
6988 hdev->adv_instance.scan_rsp_len = cp->scan_rsp_len; 7176 timeout, duration);
6989 7177 if (err < 0) {
6990 if (cp->adv_data_len) 7178 err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
6991 memcpy(hdev->adv_instance.adv_data, cp->data, cp->adv_data_len); 7179 MGMT_STATUS_FAILED);
6992 7180 goto unlock;
6993 if (cp->scan_rsp_len) 7181 }
6994 memcpy(hdev->adv_instance.scan_rsp_data,
6995 cp->data + cp->adv_data_len, cp->scan_rsp_len);
6996
6997 if (hdev->adv_instance.timeout)
6998 cancel_delayed_work(&hdev->adv_instance.timeout_exp);
6999 7182
7000 hdev->adv_instance.timeout = timeout; 7183 /* Only trigger an advertising added event if a new instance was
7184 * actually added.
7185 */
7186 if (hdev->adv_instance_cnt > prev_instance_cnt)
7187 advertising_added(sk, hdev, cp->instance);
7001 7188
7002 if (timeout) 7189 hci_dev_set_flag(hdev, HCI_ADVERTISING_INSTANCE);
7003 queue_delayed_work(hdev->workqueue,
7004 &hdev->adv_instance.timeout_exp,
7005 msecs_to_jiffies(timeout * 1000));
7006 7190
7007 if (!hci_dev_test_and_set_flag(hdev, HCI_ADVERTISING_INSTANCE)) 7191 if (hdev->cur_adv_instance == cp->instance) {
7008 advertising_added(sk, hdev, 1); 7192 /* If the currently advertised instance is being changed then
7193 * cancel the current advertising and schedule the next
7194 * instance. If there is only one instance then the overridden
7195 * advertising data will be visible right away.
7196 */
7197 cancel_adv_timeout(hdev);
7198
7199 next_instance = hci_get_next_instance(hdev, cp->instance);
7200 if (next_instance)
7201 schedule_instance = next_instance->instance;
7202 } else if (!hdev->adv_instance_timeout) {
7203 /* Immediately advertise the new instance if no other
7204 * instance is currently being advertised.
7205 */
7206 schedule_instance = cp->instance;
7207 }
7009 7208
7010 /* If the HCI_ADVERTISING flag is set or the device isn't powered then 7209 /* If the HCI_ADVERTISING flag is set or the device isn't powered or
7011 * we have no HCI communication to make. Simply return. 7210 * there is no instance to be advertised then we have no HCI
7211 * communication to make. Simply return.
7012 */ 7212 */
7013 if (!hdev_is_powered(hdev) || 7213 if (!hdev_is_powered(hdev) ||
7014 hci_dev_test_flag(hdev, HCI_ADVERTISING)) { 7214 hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
7015 rp.instance = 0x01; 7215 !schedule_instance) {
7216 rp.instance = cp->instance;
7016 err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, 7217 err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
7017 MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); 7218 MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
7018 goto unlock; 7219 goto unlock;
@@ -7030,11 +7231,11 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
7030 7231
7031 hci_req_init(&req, hdev); 7232 hci_req_init(&req, hdev);
7032 7233
7033 update_adv_data(&req); 7234 err = schedule_adv_instance(&req, schedule_instance, true);
7034 update_scan_rsp_data(&req); 7235
7035 enable_advertising(&req); 7236 if (!err)
7237 err = hci_req_run(&req, add_advertising_complete);
7036 7238
7037 err = hci_req_run(&req, add_advertising_complete);
7038 if (err < 0) 7239 if (err < 0)
7039 mgmt_pending_remove(cmd); 7240 mgmt_pending_remove(cmd);
7040 7241
@@ -7048,6 +7249,7 @@ static void remove_advertising_complete(struct hci_dev *hdev, u8 status,
7048 u16 opcode) 7249 u16 opcode)
7049{ 7250{
7050 struct mgmt_pending_cmd *cmd; 7251 struct mgmt_pending_cmd *cmd;
7252 struct mgmt_cp_remove_advertising *cp;
7051 struct mgmt_rp_remove_advertising rp; 7253 struct mgmt_rp_remove_advertising rp;
7052 7254
7053 BT_DBG("status %d", status); 7255 BT_DBG("status %d", status);
@@ -7062,7 +7264,8 @@ static void remove_advertising_complete(struct hci_dev *hdev, u8 status,
7062 if (!cmd) 7264 if (!cmd)
7063 goto unlock; 7265 goto unlock;
7064 7266
7065 rp.instance = 1; 7267 cp = cmd->param;
7268 rp.instance = cp->instance;
7066 7269
7067 mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, MGMT_STATUS_SUCCESS, 7270 mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, MGMT_STATUS_SUCCESS,
7068 &rp, sizeof(rp)); 7271 &rp, sizeof(rp));
@@ -7077,21 +7280,21 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
7077{ 7280{
7078 struct mgmt_cp_remove_advertising *cp = data; 7281 struct mgmt_cp_remove_advertising *cp = data;
7079 struct mgmt_rp_remove_advertising rp; 7282 struct mgmt_rp_remove_advertising rp;
7080 int err;
7081 struct mgmt_pending_cmd *cmd; 7283 struct mgmt_pending_cmd *cmd;
7082 struct hci_request req; 7284 struct hci_request req;
7285 int err;
7083 7286
7084 BT_DBG("%s", hdev->name); 7287 BT_DBG("%s", hdev->name);
7085 7288
7086 /* The current implementation only allows modifying instance no 1. A
7087 * value of 0 indicates that all instances should be cleared.
7088 */
7089 if (cp->instance > 1)
7090 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING,
7091 MGMT_STATUS_INVALID_PARAMS);
7092
7093 hci_dev_lock(hdev); 7289 hci_dev_lock(hdev);
7094 7290
7291 if (cp->instance && !hci_find_adv_instance(hdev, cp->instance)) {
7292 err = mgmt_cmd_status(sk, hdev->id,
7293 MGMT_OP_REMOVE_ADVERTISING,
7294 MGMT_STATUS_INVALID_PARAMS);
7295 goto unlock;
7296 }
7297
7095 if (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) || 7298 if (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) ||
7096 pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) || 7299 pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) ||
7097 pending_find(MGMT_OP_SET_LE, hdev)) { 7300 pending_find(MGMT_OP_SET_LE, hdev)) {
@@ -7106,21 +7309,21 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
7106 goto unlock; 7309 goto unlock;
7107 } 7310 }
7108 7311
7109 if (hdev->adv_instance.timeout) 7312 hci_req_init(&req, hdev);
7110 cancel_delayed_work(&hdev->adv_instance.timeout_exp);
7111
7112 memset(&hdev->adv_instance, 0, sizeof(hdev->adv_instance));
7113 7313
7114 advertising_removed(sk, hdev, 1); 7314 clear_adv_instance(hdev, &req, cp->instance, true);
7115 7315
7116 hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); 7316 if (list_empty(&hdev->adv_instances))
7317 disable_advertising(&req);
7117 7318
7118 /* If the HCI_ADVERTISING flag is set or the device isn't powered then 7319 /* If no HCI commands have been collected so far or the HCI_ADVERTISING
7119 * we have no HCI communication to make. Simply return. 7320 * flag is set or the device isn't powered then we have no HCI
7321 * communication to make. Simply return.
7120 */ 7322 */
7121 if (!hdev_is_powered(hdev) || 7323 if (skb_queue_empty(&req.cmd_q) ||
7324 !hdev_is_powered(hdev) ||
7122 hci_dev_test_flag(hdev, HCI_ADVERTISING)) { 7325 hci_dev_test_flag(hdev, HCI_ADVERTISING)) {
7123 rp.instance = 1; 7326 rp.instance = cp->instance;
7124 err = mgmt_cmd_complete(sk, hdev->id, 7327 err = mgmt_cmd_complete(sk, hdev->id,
7125 MGMT_OP_REMOVE_ADVERTISING, 7328 MGMT_OP_REMOVE_ADVERTISING,
7126 MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); 7329 MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
@@ -7134,9 +7337,6 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
7134 goto unlock; 7337 goto unlock;
7135 } 7338 }
7136 7339
7137 hci_req_init(&req, hdev);
7138 disable_advertising(&req);
7139
7140 err = hci_req_run(&req, remove_advertising_complete); 7340 err = hci_req_run(&req, remove_advertising_complete);
7141 if (err < 0) 7341 if (err < 0)
7142 mgmt_pending_remove(cmd); 7342 mgmt_pending_remove(cmd);
@@ -7361,6 +7561,7 @@ static void powered_complete(struct hci_dev *hdev, u8 status, u16 opcode)
7361static int powered_update_hci(struct hci_dev *hdev) 7561static int powered_update_hci(struct hci_dev *hdev)
7362{ 7562{
7363 struct hci_request req; 7563 struct hci_request req;
7564 struct adv_info *adv_instance;
7364 u8 link_sec; 7565 u8 link_sec;
7365 7566
7366 hci_req_init(&req, hdev); 7567 hci_req_init(&req, hdev);
@@ -7400,14 +7601,27 @@ static int powered_update_hci(struct hci_dev *hdev)
7400 * advertising data. This also applies to the case 7601 * advertising data. This also applies to the case
7401 * where BR/EDR was toggled during the AUTO_OFF phase. 7602 * where BR/EDR was toggled during the AUTO_OFF phase.
7402 */ 7603 */
7403 if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { 7604 if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) &&
7605 (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
7606 !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))) {
7404 update_adv_data(&req); 7607 update_adv_data(&req);
7405 update_scan_rsp_data(&req); 7608 update_scan_rsp_data(&req);
7406 } 7609 }
7407 7610
7408 if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || 7611 if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) &&
7409 hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) 7612 hdev->cur_adv_instance == 0x00 &&
7613 !list_empty(&hdev->adv_instances)) {
7614 adv_instance = list_first_entry(&hdev->adv_instances,
7615 struct adv_info, list);
7616 hdev->cur_adv_instance = adv_instance->instance;
7617 }
7618
7619 if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
7410 enable_advertising(&req); 7620 enable_advertising(&req);
7621 else if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) &&
7622 hdev->cur_adv_instance)
7623 schedule_adv_instance(&req, hdev->cur_adv_instance,
7624 true);
7411 7625
7412 restart_le_actions(&req); 7626 restart_le_actions(&req);
7413 } 7627 }
@@ -7577,7 +7791,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
7577 memset(&ev, 0, sizeof(ev)); 7791 memset(&ev, 0, sizeof(ev));
7578 7792
7579 /* Devices using resolvable or non-resolvable random addresses 7793 /* Devices using resolvable or non-resolvable random addresses
7580 * without providing an indentity resolving key don't require 7794 * without providing an identity resolving key don't require
7581 * to store long term keys. Their addresses will change the 7795 * to store long term keys. Their addresses will change the
7582 * next time around. 7796 * next time around.
7583 * 7797 *
@@ -7603,7 +7817,12 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
7603 if (key->type == SMP_LTK) 7817 if (key->type == SMP_LTK)
7604 ev.key.master = 1; 7818 ev.key.master = 1;
7605 7819
7606 memcpy(ev.key.val, key->val, sizeof(key->val)); 7820 /* Make sure we copy only the significant bytes based on the
7821 * encryption key size, and set the rest of the value to zeroes.
7822 */
7823 memcpy(ev.key.val, key->val, sizeof(key->enc_size));
7824 memset(ev.key.val + key->enc_size, 0,
7825 sizeof(ev.key.val) - key->enc_size);
7607 7826
7608 mgmt_event(MGMT_EV_NEW_LONG_TERM_KEY, hdev, &ev, sizeof(ev), NULL); 7827 mgmt_event(MGMT_EV_NEW_LONG_TERM_KEY, hdev, &ev, sizeof(ev), NULL);
7609} 7828}
@@ -7617,7 +7836,7 @@ void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk)
7617 /* For identity resolving keys from devices that are already 7836 /* For identity resolving keys from devices that are already
7618 * using a public address or static random address, do not 7837 * using a public address or static random address, do not
7619 * ask for storing this key. The identity resolving key really 7838 * ask for storing this key. The identity resolving key really
7620 * is only mandatory for devices using resovlable random 7839 * is only mandatory for devices using resolvable random
7621 * addresses. 7840 * addresses.
7622 * 7841 *
7623 * Storing all identity resolving keys has the downside that 7842 * Storing all identity resolving keys has the downside that
@@ -7646,7 +7865,7 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk,
7646 memset(&ev, 0, sizeof(ev)); 7865 memset(&ev, 0, sizeof(ev));
7647 7866
7648 /* Devices using resolvable or non-resolvable random addresses 7867 /* Devices using resolvable or non-resolvable random addresses
7649 * without providing an indentity resolving key don't require 7868 * without providing an identity resolving key don't require
7650 * to store signature resolving keys. Their addresses will change 7869 * to store signature resolving keys. Their addresses will change
7651 * the next time around. 7870 * the next time around.
7652 * 7871 *
@@ -8387,13 +8606,24 @@ static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode)
8387void mgmt_reenable_advertising(struct hci_dev *hdev) 8606void mgmt_reenable_advertising(struct hci_dev *hdev)
8388{ 8607{
8389 struct hci_request req; 8608 struct hci_request req;
8609 u8 instance;
8390 8610
8391 if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && 8611 if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) &&
8392 !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) 8612 !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))
8393 return; 8613 return;
8394 8614
8615 instance = get_current_adv_instance(hdev);
8616
8395 hci_req_init(&req, hdev); 8617 hci_req_init(&req, hdev);
8396 enable_advertising(&req); 8618
8619 if (instance) {
8620 schedule_adv_instance(&req, instance, true);
8621 } else {
8622 update_adv_data(&req);
8623 update_scan_rsp_data(&req);
8624 enable_advertising(&req);
8625 }
8626
8397 hci_req_run(&req, adv_enable_complete); 8627 hci_req_run(&req, adv_enable_complete);
8398} 8628}
8399 8629
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 4fea24275b17..29709fbfd1f5 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -200,7 +200,7 @@ static int rfcomm_l2sock_create(struct socket **sock)
200 200
201 BT_DBG(""); 201 BT_DBG("");
202 202
203 err = sock_create_kern(PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_L2CAP, sock); 203 err = sock_create_kern(&init_net, PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_L2CAP, sock);
204 if (!err) { 204 if (!err) {
205 struct sock *sk = (*sock)->sk; 205 struct sock *sk = (*sock)->sk;
206 sk->sk_data_ready = rfcomm_l2data_ready; 206 sk->sk_data_ready = rfcomm_l2data_ready;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 825e8fb5114b..7511df72347f 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -269,12 +269,12 @@ static struct proto rfcomm_proto = {
269 .obj_size = sizeof(struct rfcomm_pinfo) 269 .obj_size = sizeof(struct rfcomm_pinfo)
270}; 270};
271 271
272static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) 272static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern)
273{ 273{
274 struct rfcomm_dlc *d; 274 struct rfcomm_dlc *d;
275 struct sock *sk; 275 struct sock *sk;
276 276
277 sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto); 277 sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, kern);
278 if (!sk) 278 if (!sk)
279 return NULL; 279 return NULL;
280 280
@@ -324,7 +324,7 @@ static int rfcomm_sock_create(struct net *net, struct socket *sock,
324 324
325 sock->ops = &rfcomm_sock_ops; 325 sock->ops = &rfcomm_sock_ops;
326 326
327 sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC); 327 sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern);
328 if (!sk) 328 if (!sk)
329 return -ENOMEM; 329 return -ENOMEM;
330 330
@@ -334,16 +334,19 @@ static int rfcomm_sock_create(struct net *net, struct socket *sock,
334 334
335static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) 335static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
336{ 336{
337 struct sockaddr_rc *sa = (struct sockaddr_rc *) addr; 337 struct sockaddr_rc sa;
338 struct sock *sk = sock->sk; 338 struct sock *sk = sock->sk;
339 int chan = sa->rc_channel; 339 int len, err = 0;
340 int err = 0;
341
342 BT_DBG("sk %p %pMR", sk, &sa->rc_bdaddr);
343 340
344 if (!addr || addr->sa_family != AF_BLUETOOTH) 341 if (!addr || addr->sa_family != AF_BLUETOOTH)
345 return -EINVAL; 342 return -EINVAL;
346 343
344 memset(&sa, 0, sizeof(sa));
345 len = min_t(unsigned int, sizeof(sa), addr_len);
346 memcpy(&sa, addr, len);
347
348 BT_DBG("sk %p %pMR", sk, &sa.rc_bdaddr);
349
347 lock_sock(sk); 350 lock_sock(sk);
348 351
349 if (sk->sk_state != BT_OPEN) { 352 if (sk->sk_state != BT_OPEN) {
@@ -358,12 +361,13 @@ static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr
358 361
359 write_lock(&rfcomm_sk_list.lock); 362 write_lock(&rfcomm_sk_list.lock);
360 363
361 if (chan && __rfcomm_get_listen_sock_by_addr(chan, &sa->rc_bdaddr)) { 364 if (sa.rc_channel &&
365 __rfcomm_get_listen_sock_by_addr(sa.rc_channel, &sa.rc_bdaddr)) {
362 err = -EADDRINUSE; 366 err = -EADDRINUSE;
363 } else { 367 } else {
364 /* Save source address */ 368 /* Save source address */
365 bacpy(&rfcomm_pi(sk)->src, &sa->rc_bdaddr); 369 bacpy(&rfcomm_pi(sk)->src, &sa.rc_bdaddr);
366 rfcomm_pi(sk)->channel = chan; 370 rfcomm_pi(sk)->channel = sa.rc_channel;
367 sk->sk_state = BT_BOUND; 371 sk->sk_state = BT_BOUND;
368 } 372 }
369 373
@@ -969,7 +973,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
969 goto done; 973 goto done;
970 } 974 }
971 975
972 sk = rfcomm_sock_alloc(sock_net(parent), NULL, BTPROTO_RFCOMM, GFP_ATOMIC); 976 sk = rfcomm_sock_alloc(sock_net(parent), NULL, BTPROTO_RFCOMM, GFP_ATOMIC, 0);
973 if (!sk) 977 if (!sk)
974 goto done; 978 goto done;
975 979
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 4322c833e748..688a040c5626 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -460,11 +460,11 @@ static struct proto sco_proto = {
460 .obj_size = sizeof(struct sco_pinfo) 460 .obj_size = sizeof(struct sco_pinfo)
461}; 461};
462 462
463static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) 463static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern)
464{ 464{
465 struct sock *sk; 465 struct sock *sk;
466 466
467 sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto); 467 sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, kern);
468 if (!sk) 468 if (!sk)
469 return NULL; 469 return NULL;
470 470
@@ -501,7 +501,7 @@ static int sco_sock_create(struct net *net, struct socket *sock, int protocol,
501 501
502 sock->ops = &sco_sock_ops; 502 sock->ops = &sco_sock_ops;
503 503
504 sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC); 504 sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern);
505 if (!sk) 505 if (!sk)
506 return -ENOMEM; 506 return -ENOMEM;
507 507
@@ -1026,7 +1026,7 @@ static void sco_conn_ready(struct sco_conn *conn)
1026 bh_lock_sock(parent); 1026 bh_lock_sock(parent);
1027 1027
1028 sk = sco_sock_alloc(sock_net(parent), NULL, 1028 sk = sco_sock_alloc(sock_net(parent), NULL,
1029 BTPROTO_SCO, GFP_ATOMIC); 1029 BTPROTO_SCO, GFP_ATOMIC, 0);
1030 if (!sk) { 1030 if (!sk) {
1031 bh_unlock_sock(parent); 1031 bh_unlock_sock(parent);
1032 sco_conn_unlock(conn); 1032 sco_conn_unlock(conn);
@@ -1110,7 +1110,7 @@ static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
1110 sco_conn_del(hcon, bt_to_errno(reason)); 1110 sco_conn_del(hcon, bt_to_errno(reason));
1111} 1111}
1112 1112
1113int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) 1113void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb)
1114{ 1114{
1115 struct sco_conn *conn = hcon->sco_data; 1115 struct sco_conn *conn = hcon->sco_data;
1116 1116
@@ -1121,12 +1121,11 @@ int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb)
1121 1121
1122 if (skb->len) { 1122 if (skb->len) {
1123 sco_recv_frame(conn, skb); 1123 sco_recv_frame(conn, skb);
1124 return 0; 1124 return;
1125 } 1125 }
1126 1126
1127drop: 1127drop:
1128 kfree_skb(skb); 1128 kfree_skb(skb);
1129 return 0;
1130} 1129}
1131 1130
1132static struct hci_cb sco_cb = { 1131static struct hci_cb sco_cb = {
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 1ab3dc9c8f99..3d0f7d2a0616 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -33,6 +33,9 @@
33#include "ecc.h" 33#include "ecc.h"
34#include "smp.h" 34#include "smp.h"
35 35
36#define SMP_DEV(hdev) \
37 ((struct smp_dev *)((struct l2cap_chan *)((hdev)->smp_data))->data)
38
36/* Low-level debug macros to be used for stuff that we don't want 39/* Low-level debug macros to be used for stuff that we don't want
37 * accidentially in dmesg, i.e. the values of the various crypto keys 40 * accidentially in dmesg, i.e. the values of the various crypto keys
38 * and the inputs & outputs of crypto functions. 41 * and the inputs & outputs of crypto functions.
@@ -81,6 +84,9 @@ struct smp_dev {
81 u8 local_rand[16]; 84 u8 local_rand[16];
82 bool debug_key; 85 bool debug_key;
83 86
87 u8 min_key_size;
88 u8 max_key_size;
89
84 struct crypto_blkcipher *tfm_aes; 90 struct crypto_blkcipher *tfm_aes;
85 struct crypto_hash *tfm_cmac; 91 struct crypto_hash *tfm_cmac;
86}; 92};
@@ -371,6 +377,8 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
371 uint8_t tmp[16], data[16]; 377 uint8_t tmp[16], data[16];
372 int err; 378 int err;
373 379
380 SMP_DBG("k %16phN r %16phN", k, r);
381
374 if (!tfm) { 382 if (!tfm) {
375 BT_ERR("tfm %p", tfm); 383 BT_ERR("tfm %p", tfm);
376 return -EINVAL; 384 return -EINVAL;
@@ -400,6 +408,8 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
400 /* Most significant octet of encryptedData corresponds to data[0] */ 408 /* Most significant octet of encryptedData corresponds to data[0] */
401 swap_buf(data, r, 16); 409 swap_buf(data, r, 16);
402 410
411 SMP_DBG("r %16phN", r);
412
403 return err; 413 return err;
404} 414}
405 415
@@ -410,6 +420,10 @@ static int smp_c1(struct crypto_blkcipher *tfm_aes, const u8 k[16],
410 u8 p1[16], p2[16]; 420 u8 p1[16], p2[16];
411 int err; 421 int err;
412 422
423 SMP_DBG("k %16phN r %16phN", k, r);
424 SMP_DBG("iat %u ia %6phN rat %u ra %6phN", _iat, ia, _rat, ra);
425 SMP_DBG("preq %7phN pres %7phN", preq, pres);
426
413 memset(p1, 0, 16); 427 memset(p1, 0, 16);
414 428
415 /* p1 = pres || preq || _rat || _iat */ 429 /* p1 = pres || preq || _rat || _iat */
@@ -418,10 +432,7 @@ static int smp_c1(struct crypto_blkcipher *tfm_aes, const u8 k[16],
418 memcpy(p1 + 2, preq, 7); 432 memcpy(p1 + 2, preq, 7);
419 memcpy(p1 + 9, pres, 7); 433 memcpy(p1 + 9, pres, 7);
420 434
421 /* p2 = padding || ia || ra */ 435 SMP_DBG("p1 %16phN", p1);
422 memcpy(p2, ra, 6);
423 memcpy(p2 + 6, ia, 6);
424 memset(p2 + 12, 0, 4);
425 436
426 /* res = r XOR p1 */ 437 /* res = r XOR p1 */
427 u128_xor((u128 *) res, (u128 *) r, (u128 *) p1); 438 u128_xor((u128 *) res, (u128 *) r, (u128 *) p1);
@@ -433,6 +444,13 @@ static int smp_c1(struct crypto_blkcipher *tfm_aes, const u8 k[16],
433 return err; 444 return err;
434 } 445 }
435 446
447 /* p2 = padding || ia || ra */
448 memcpy(p2, ra, 6);
449 memcpy(p2 + 6, ia, 6);
450 memset(p2 + 12, 0, 4);
451
452 SMP_DBG("p2 %16phN", p2);
453
436 /* res = res XOR p2 */ 454 /* res = res XOR p2 */
437 u128_xor((u128 *) res, (u128 *) res, (u128 *) p2); 455 u128_xor((u128 *) res, (u128 *) res, (u128 *) p2);
438 456
@@ -696,7 +714,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn,
696 if (rsp == NULL) { 714 if (rsp == NULL) {
697 req->io_capability = conn->hcon->io_capability; 715 req->io_capability = conn->hcon->io_capability;
698 req->oob_flag = oob_flag; 716 req->oob_flag = oob_flag;
699 req->max_key_size = SMP_MAX_ENC_KEY_SIZE; 717 req->max_key_size = SMP_DEV(hdev)->max_key_size;
700 req->init_key_dist = local_dist; 718 req->init_key_dist = local_dist;
701 req->resp_key_dist = remote_dist; 719 req->resp_key_dist = remote_dist;
702 req->auth_req = (authreq & AUTH_REQ_MASK(hdev)); 720 req->auth_req = (authreq & AUTH_REQ_MASK(hdev));
@@ -707,7 +725,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn,
707 725
708 rsp->io_capability = conn->hcon->io_capability; 726 rsp->io_capability = conn->hcon->io_capability;
709 rsp->oob_flag = oob_flag; 727 rsp->oob_flag = oob_flag;
710 rsp->max_key_size = SMP_MAX_ENC_KEY_SIZE; 728 rsp->max_key_size = SMP_DEV(hdev)->max_key_size;
711 rsp->init_key_dist = req->init_key_dist & remote_dist; 729 rsp->init_key_dist = req->init_key_dist & remote_dist;
712 rsp->resp_key_dist = req->resp_key_dist & local_dist; 730 rsp->resp_key_dist = req->resp_key_dist & local_dist;
713 rsp->auth_req = (authreq & AUTH_REQ_MASK(hdev)); 731 rsp->auth_req = (authreq & AUTH_REQ_MASK(hdev));
@@ -718,10 +736,11 @@ static void build_pairing_cmd(struct l2cap_conn *conn,
718static u8 check_enc_key_size(struct l2cap_conn *conn, __u8 max_key_size) 736static u8 check_enc_key_size(struct l2cap_conn *conn, __u8 max_key_size)
719{ 737{
720 struct l2cap_chan *chan = conn->smp; 738 struct l2cap_chan *chan = conn->smp;
739 struct hci_dev *hdev = conn->hcon->hdev;
721 struct smp_chan *smp = chan->data; 740 struct smp_chan *smp = chan->data;
722 741
723 if ((max_key_size > SMP_MAX_ENC_KEY_SIZE) || 742 if (max_key_size > SMP_DEV(hdev)->max_key_size ||
724 (max_key_size < SMP_MIN_ENC_KEY_SIZE)) 743 max_key_size < SMP_MIN_ENC_KEY_SIZE)
725 return SMP_ENC_KEY_SIZE; 744 return SMP_ENC_KEY_SIZE;
726 745
727 smp->enc_key_size = max_key_size; 746 smp->enc_key_size = max_key_size;
@@ -985,13 +1004,10 @@ static u8 smp_random(struct smp_chan *smp)
985 1004
986 smp_s1(smp->tfm_aes, smp->tk, smp->rrnd, smp->prnd, stk); 1005 smp_s1(smp->tfm_aes, smp->tk, smp->rrnd, smp->prnd, stk);
987 1006
988 memset(stk + smp->enc_key_size, 0,
989 SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size);
990
991 if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags)) 1007 if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags))
992 return SMP_UNSPECIFIED; 1008 return SMP_UNSPECIFIED;
993 1009
994 hci_le_start_enc(hcon, ediv, rand, stk); 1010 hci_le_start_enc(hcon, ediv, rand, stk, smp->enc_key_size);
995 hcon->enc_key_size = smp->enc_key_size; 1011 hcon->enc_key_size = smp->enc_key_size;
996 set_bit(HCI_CONN_STK_ENCRYPT, &hcon->flags); 1012 set_bit(HCI_CONN_STK_ENCRYPT, &hcon->flags);
997 } else { 1013 } else {
@@ -1004,9 +1020,6 @@ static u8 smp_random(struct smp_chan *smp)
1004 1020
1005 smp_s1(smp->tfm_aes, smp->tk, smp->prnd, smp->rrnd, stk); 1021 smp_s1(smp->tfm_aes, smp->tk, smp->prnd, smp->rrnd, stk);
1006 1022
1007 memset(stk + smp->enc_key_size, 0,
1008 SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size);
1009
1010 if (hcon->pending_sec_level == BT_SECURITY_HIGH) 1023 if (hcon->pending_sec_level == BT_SECURITY_HIGH)
1011 auth = 1; 1024 auth = 1;
1012 else 1025 else
@@ -1144,9 +1157,6 @@ static void sc_add_ltk(struct smp_chan *smp)
1144 else 1157 else
1145 auth = 0; 1158 auth = 0;
1146 1159
1147 memset(smp->tk + smp->enc_key_size, 0,
1148 SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size);
1149
1150 smp->ltk = hci_add_ltk(hcon->hdev, &hcon->dst, hcon->dst_type, 1160 smp->ltk = hci_add_ltk(hcon->hdev, &hcon->dst, hcon->dst_type,
1151 key_type, auth, smp->tk, smp->enc_key_size, 1161 key_type, auth, smp->tk, smp->enc_key_size,
1152 0, 0); 1162 0, 0);
@@ -1268,7 +1278,14 @@ static void smp_distribute_keys(struct smp_chan *smp)
1268 __le16 ediv; 1278 __le16 ediv;
1269 __le64 rand; 1279 __le64 rand;
1270 1280
1271 get_random_bytes(enc.ltk, sizeof(enc.ltk)); 1281 /* Make sure we generate only the significant amount of
1282 * bytes based on the encryption key size, and set the rest
1283 * of the value to zeroes.
1284 */
1285 get_random_bytes(enc.ltk, smp->enc_key_size);
1286 memset(enc.ltk + smp->enc_key_size, 0,
1287 sizeof(enc.ltk) - smp->enc_key_size);
1288
1272 get_random_bytes(&ediv, sizeof(ediv)); 1289 get_random_bytes(&ediv, sizeof(ediv));
1273 get_random_bytes(&rand, sizeof(rand)); 1290 get_random_bytes(&rand, sizeof(rand));
1274 1291
@@ -1688,7 +1705,7 @@ static void build_bredr_pairing_cmd(struct smp_chan *smp,
1688 1705
1689 req->init_key_dist = local_dist; 1706 req->init_key_dist = local_dist;
1690 req->resp_key_dist = remote_dist; 1707 req->resp_key_dist = remote_dist;
1691 req->max_key_size = SMP_MAX_ENC_KEY_SIZE; 1708 req->max_key_size = conn->hcon->enc_key_size;
1692 1709
1693 smp->remote_key_dist = remote_dist; 1710 smp->remote_key_dist = remote_dist;
1694 1711
@@ -1697,7 +1714,7 @@ static void build_bredr_pairing_cmd(struct smp_chan *smp,
1697 1714
1698 memset(rsp, 0, sizeof(*rsp)); 1715 memset(rsp, 0, sizeof(*rsp));
1699 1716
1700 rsp->max_key_size = SMP_MAX_ENC_KEY_SIZE; 1717 rsp->max_key_size = conn->hcon->enc_key_size;
1701 rsp->init_key_dist = req->init_key_dist & remote_dist; 1718 rsp->init_key_dist = req->init_key_dist & remote_dist;
1702 rsp->resp_key_dist = req->resp_key_dist & local_dist; 1719 rsp->resp_key_dist = req->resp_key_dist & local_dist;
1703 1720
@@ -2190,7 +2207,7 @@ static bool smp_ltk_encrypt(struct l2cap_conn *conn, u8 sec_level)
2190 if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags)) 2207 if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags))
2191 return true; 2208 return true;
2192 2209
2193 hci_le_start_enc(hcon, key->ediv, key->rand, key->val); 2210 hci_le_start_enc(hcon, key->ediv, key->rand, key->val, key->enc_size);
2194 hcon->enc_key_size = key->enc_size; 2211 hcon->enc_key_size = key->enc_size;
2195 2212
2196 /* We never store STKs for master role, so clear this flag */ 2213 /* We never store STKs for master role, so clear this flag */
@@ -2738,7 +2755,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb)
2738 sc_add_ltk(smp); 2755 sc_add_ltk(smp);
2739 2756
2740 if (hcon->out) { 2757 if (hcon->out) {
2741 hci_le_start_enc(hcon, 0, 0, smp->tk); 2758 hci_le_start_enc(hcon, 0, 0, smp->tk, smp->enc_key_size);
2742 hcon->enc_key_size = smp->enc_key_size; 2759 hcon->enc_key_size = smp->enc_key_size;
2743 } 2760 }
2744 2761
@@ -3120,6 +3137,8 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
3120 3137
3121 smp->tfm_aes = tfm_aes; 3138 smp->tfm_aes = tfm_aes;
3122 smp->tfm_cmac = tfm_cmac; 3139 smp->tfm_cmac = tfm_cmac;
3140 smp->min_key_size = SMP_MIN_ENC_KEY_SIZE;
3141 smp->max_key_size = SMP_MAX_ENC_KEY_SIZE;
3123 3142
3124create_chan: 3143create_chan:
3125 chan = l2cap_chan_create(); 3144 chan = l2cap_chan_create();
@@ -3242,6 +3261,94 @@ static const struct file_operations force_bredr_smp_fops = {
3242 .llseek = default_llseek, 3261 .llseek = default_llseek,
3243}; 3262};
3244 3263
3264static ssize_t le_min_key_size_read(struct file *file,
3265 char __user *user_buf,
3266 size_t count, loff_t *ppos)
3267{
3268 struct hci_dev *hdev = file->private_data;
3269 char buf[4];
3270
3271 snprintf(buf, sizeof(buf), "%2u\n", SMP_DEV(hdev)->min_key_size);
3272
3273 return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
3274}
3275
3276static ssize_t le_min_key_size_write(struct file *file,
3277 const char __user *user_buf,
3278 size_t count, loff_t *ppos)
3279{
3280 struct hci_dev *hdev = file->private_data;
3281 char buf[32];
3282 size_t buf_size = min(count, (sizeof(buf) - 1));
3283 u8 key_size;
3284
3285 if (copy_from_user(buf, user_buf, buf_size))
3286 return -EFAULT;
3287
3288 buf[buf_size] = '\0';
3289
3290 sscanf(buf, "%hhu", &key_size);
3291
3292 if (key_size > SMP_DEV(hdev)->max_key_size ||
3293 key_size < SMP_MIN_ENC_KEY_SIZE)
3294 return -EINVAL;
3295
3296 SMP_DEV(hdev)->min_key_size = key_size;
3297
3298 return count;
3299}
3300
3301static const struct file_operations le_min_key_size_fops = {
3302 .open = simple_open,
3303 .read = le_min_key_size_read,
3304 .write = le_min_key_size_write,
3305 .llseek = default_llseek,
3306};
3307
3308static ssize_t le_max_key_size_read(struct file *file,
3309 char __user *user_buf,
3310 size_t count, loff_t *ppos)
3311{
3312 struct hci_dev *hdev = file->private_data;
3313 char buf[4];
3314
3315 snprintf(buf, sizeof(buf), "%2u\n", SMP_DEV(hdev)->max_key_size);
3316
3317 return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
3318}
3319
3320static ssize_t le_max_key_size_write(struct file *file,
3321 const char __user *user_buf,
3322 size_t count, loff_t *ppos)
3323{
3324 struct hci_dev *hdev = file->private_data;
3325 char buf[32];
3326 size_t buf_size = min(count, (sizeof(buf) - 1));
3327 u8 key_size;
3328
3329 if (copy_from_user(buf, user_buf, buf_size))
3330 return -EFAULT;
3331
3332 buf[buf_size] = '\0';
3333
3334 sscanf(buf, "%hhu", &key_size);
3335
3336 if (key_size > SMP_MAX_ENC_KEY_SIZE ||
3337 key_size < SMP_DEV(hdev)->min_key_size)
3338 return -EINVAL;
3339
3340 SMP_DEV(hdev)->max_key_size = key_size;
3341
3342 return count;
3343}
3344
3345static const struct file_operations le_max_key_size_fops = {
3346 .open = simple_open,
3347 .read = le_max_key_size_read,
3348 .write = le_max_key_size_write,
3349 .llseek = default_llseek,
3350};
3351
3245int smp_register(struct hci_dev *hdev) 3352int smp_register(struct hci_dev *hdev)
3246{ 3353{
3247 struct l2cap_chan *chan; 3354 struct l2cap_chan *chan;
@@ -3266,6 +3373,11 @@ int smp_register(struct hci_dev *hdev)
3266 3373
3267 hdev->smp_data = chan; 3374 hdev->smp_data = chan;
3268 3375
3376 debugfs_create_file("le_min_key_size", 0644, hdev->debugfs, hdev,
3377 &le_min_key_size_fops);
3378 debugfs_create_file("le_max_key_size", 0644, hdev->debugfs, hdev,
3379 &le_max_key_size_fops);
3380
3269 /* If the controller does not support BR/EDR Secure Connections 3381 /* If the controller does not support BR/EDR Secure Connections
3270 * feature, then the BR/EDR SMP channel shall not be present. 3382 * feature, then the BR/EDR SMP channel shall not be present.
3271 * 3383 *
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index fd7ee03c59b3..a1cda5d4718d 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -12,6 +12,8 @@ bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
12 12
13bridge-$(subst m,y,$(CONFIG_BRIDGE_NETFILTER)) += br_nf_core.o 13bridge-$(subst m,y,$(CONFIG_BRIDGE_NETFILTER)) += br_nf_core.o
14 14
15br_netfilter-y := br_netfilter_hooks.o
16br_netfilter-$(subst m,y,$(CONFIG_IPV6)) += br_netfilter_ipv6.o
15obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o 17obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
16 18
17bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o 19bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 02c24cf63c34..a1abe4936fe1 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -121,13 +121,13 @@ static struct notifier_block br_device_notifier = {
121 .notifier_call = br_device_event 121 .notifier_call = br_device_event
122}; 122};
123 123
124static int br_netdev_switch_event(struct notifier_block *unused, 124static int br_switchdev_event(struct notifier_block *unused,
125 unsigned long event, void *ptr) 125 unsigned long event, void *ptr)
126{ 126{
127 struct net_device *dev = netdev_switch_notifier_info_to_dev(ptr); 127 struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
128 struct net_bridge_port *p; 128 struct net_bridge_port *p;
129 struct net_bridge *br; 129 struct net_bridge *br;
130 struct netdev_switch_notifier_fdb_info *fdb_info; 130 struct switchdev_notifier_fdb_info *fdb_info;
131 int err = NOTIFY_DONE; 131 int err = NOTIFY_DONE;
132 132
133 rtnl_lock(); 133 rtnl_lock();
@@ -138,14 +138,14 @@ static int br_netdev_switch_event(struct notifier_block *unused,
138 br = p->br; 138 br = p->br;
139 139
140 switch (event) { 140 switch (event) {
141 case NETDEV_SWITCH_FDB_ADD: 141 case SWITCHDEV_FDB_ADD:
142 fdb_info = ptr; 142 fdb_info = ptr;
143 err = br_fdb_external_learn_add(br, p, fdb_info->addr, 143 err = br_fdb_external_learn_add(br, p, fdb_info->addr,
144 fdb_info->vid); 144 fdb_info->vid);
145 if (err) 145 if (err)
146 err = notifier_from_errno(err); 146 err = notifier_from_errno(err);
147 break; 147 break;
148 case NETDEV_SWITCH_FDB_DEL: 148 case SWITCHDEV_FDB_DEL:
149 fdb_info = ptr; 149 fdb_info = ptr;
150 err = br_fdb_external_learn_del(br, p, fdb_info->addr, 150 err = br_fdb_external_learn_del(br, p, fdb_info->addr,
151 fdb_info->vid); 151 fdb_info->vid);
@@ -159,8 +159,8 @@ out:
159 return err; 159 return err;
160} 160}
161 161
162static struct notifier_block br_netdev_switch_notifier = { 162static struct notifier_block br_switchdev_notifier = {
163 .notifier_call = br_netdev_switch_event, 163 .notifier_call = br_switchdev_event,
164}; 164};
165 165
166static void __net_exit br_net_exit(struct net *net) 166static void __net_exit br_net_exit(struct net *net)
@@ -214,7 +214,7 @@ static int __init br_init(void)
214 if (err) 214 if (err)
215 goto err_out3; 215 goto err_out3;
216 216
217 err = register_netdev_switch_notifier(&br_netdev_switch_notifier); 217 err = register_switchdev_notifier(&br_switchdev_notifier);
218 if (err) 218 if (err)
219 goto err_out4; 219 goto err_out4;
220 220
@@ -235,7 +235,7 @@ static int __init br_init(void)
235 return 0; 235 return 0;
236 236
237err_out5: 237err_out5:
238 unregister_netdev_switch_notifier(&br_netdev_switch_notifier); 238 unregister_switchdev_notifier(&br_switchdev_notifier);
239err_out4: 239err_out4:
240 unregister_netdevice_notifier(&br_device_notifier); 240 unregister_netdevice_notifier(&br_device_notifier);
241err_out3: 241err_out3:
@@ -253,7 +253,7 @@ static void __exit br_deinit(void)
253{ 253{
254 stp_proto_unregister(&br_stp_proto); 254 stp_proto_unregister(&br_stp_proto);
255 br_netlink_fini(); 255 br_netlink_fini();
256 unregister_netdev_switch_notifier(&br_netdev_switch_notifier); 256 unregister_switchdev_notifier(&br_switchdev_notifier);
257 unregister_netdevice_notifier(&br_device_notifier); 257 unregister_netdevice_notifier(&br_device_notifier);
258 brioctl_set(NULL); 258 brioctl_set(NULL);
259 unregister_pernet_subsys(&br_net_ops); 259 unregister_pernet_subsys(&br_net_ops);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 659fb96672e4..9e9875da0a4f 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -24,6 +24,7 @@
24#include <linux/atomic.h> 24#include <linux/atomic.h>
25#include <asm/unaligned.h> 25#include <asm/unaligned.h>
26#include <linux/if_vlan.h> 26#include <linux/if_vlan.h>
27#include <net/switchdev.h>
27#include "br_private.h" 28#include "br_private.h"
28 29
29static struct kmem_cache *br_fdb_cache __read_mostly; 30static struct kmem_cache *br_fdb_cache __read_mostly;
@@ -130,11 +131,27 @@ static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr)
130 } 131 }
131} 132}
132 133
134static void fdb_del_external_learn(struct net_bridge_fdb_entry *f)
135{
136 struct switchdev_obj obj = {
137 .id = SWITCHDEV_OBJ_PORT_FDB,
138 .u.fdb = {
139 .addr = f->addr.addr,
140 .vid = f->vlan_id,
141 },
142 };
143
144 switchdev_port_obj_del(f->dst->dev, &obj);
145}
146
133static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) 147static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
134{ 148{
135 if (f->is_static) 149 if (f->is_static)
136 fdb_del_hw_addr(br, f->addr.addr); 150 fdb_del_hw_addr(br, f->addr.addr);
137 151
152 if (f->added_by_external_learn)
153 fdb_del_external_learn(f);
154
138 hlist_del_rcu(&f->hlist); 155 hlist_del_rcu(&f->hlist);
139 fdb_notify(br, f, RTM_DELNEIGH); 156 fdb_notify(br, f, RTM_DELNEIGH);
140 call_rcu(&f->rcu, fdb_rcu_free); 157 call_rcu(&f->rcu, fdb_rcu_free);
@@ -313,9 +330,11 @@ void br_fdb_flush(struct net_bridge *br)
313 330
314/* Flush all entries referring to a specific port. 331/* Flush all entries referring to a specific port.
315 * if do_all is set also flush static entries 332 * if do_all is set also flush static entries
333 * if vid is set delete all entries that match the vlan_id
316 */ 334 */
317void br_fdb_delete_by_port(struct net_bridge *br, 335void br_fdb_delete_by_port(struct net_bridge *br,
318 const struct net_bridge_port *p, 336 const struct net_bridge_port *p,
337 u16 vid,
319 int do_all) 338 int do_all)
320{ 339{
321 int i; 340 int i;
@@ -330,8 +349,9 @@ void br_fdb_delete_by_port(struct net_bridge *br,
330 if (f->dst != p) 349 if (f->dst != p)
331 continue; 350 continue;
332 351
333 if (f->is_static && !do_all) 352 if (!do_all)
334 continue; 353 if (f->is_static || (vid && f->vlan_id != vid))
354 continue;
335 355
336 if (f->is_local) 356 if (f->is_local)
337 fdb_delete_local(br, p, f); 357 fdb_delete_local(br, p, f);
@@ -736,6 +756,12 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
736 struct net_bridge_fdb_entry *fdb; 756 struct net_bridge_fdb_entry *fdb;
737 bool modified = false; 757 bool modified = false;
738 758
759 /* If the port cannot learn allow only local and static entries */
760 if (!(state & NUD_PERMANENT) && !(state & NUD_NOARP) &&
761 !(source->state == BR_STATE_LEARNING ||
762 source->state == BR_STATE_FORWARDING))
763 return -EPERM;
764
739 fdb = fdb_find(head, addr, vid); 765 fdb = fdb_find(head, addr, vid);
740 if (fdb == NULL) { 766 if (fdb == NULL) {
741 if (!(flags & NLM_F_CREATE)) 767 if (!(flags & NLM_F_CREATE))
@@ -867,13 +893,15 @@ out:
867 return err; 893 return err;
868} 894}
869 895
870static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vlan) 896static int fdb_delete_by_addr_and_port(struct net_bridge_port *p,
897 const u8 *addr, u16 vlan)
871{ 898{
899 struct net_bridge *br = p->br;
872 struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)]; 900 struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)];
873 struct net_bridge_fdb_entry *fdb; 901 struct net_bridge_fdb_entry *fdb;
874 902
875 fdb = fdb_find(head, addr, vlan); 903 fdb = fdb_find(head, addr, vlan);
876 if (!fdb) 904 if (!fdb || fdb->dst != p)
877 return -ENOENT; 905 return -ENOENT;
878 906
879 fdb_delete(br, fdb); 907 fdb_delete(br, fdb);
@@ -886,7 +914,7 @@ static int __br_fdb_delete(struct net_bridge_port *p,
886 int err; 914 int err;
887 915
888 spin_lock_bh(&p->br->hash_lock); 916 spin_lock_bh(&p->br->hash_lock);
889 err = fdb_delete_by_addr(p->br, addr, vid); 917 err = fdb_delete_by_addr_and_port(p, addr, vid);
890 spin_unlock_bh(&p->br->hash_lock); 918 spin_unlock_bh(&p->br->hash_lock);
891 919
892 return err; 920 return err;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index e97572b5d2cc..0ff6e1bbca91 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -42,6 +42,7 @@ int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb)
42 } else { 42 } else {
43 skb_push(skb, ETH_HLEN); 43 skb_push(skb, ETH_HLEN);
44 br_drop_fake_rtable(skb); 44 br_drop_fake_rtable(skb);
45 skb_sender_cpu_clear(skb);
45 dev_queue_xmit(skb); 46 dev_queue_xmit(skb);
46 } 47 }
47 48
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 1849d96b3c91..a538cb1199a3 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -249,7 +249,7 @@ static void del_nbp(struct net_bridge_port *p)
249 list_del_rcu(&p->list); 249 list_del_rcu(&p->list);
250 250
251 nbp_vlan_flush(p); 251 nbp_vlan_flush(p);
252 br_fdb_delete_by_port(br, p, 1); 252 br_fdb_delete_by_port(br, p, 0, 1);
253 nbp_update_port_count(br); 253 nbp_update_port_count(br);
254 254
255 netdev_upper_dev_unlink(dev, br->dev); 255 netdev_upper_dev_unlink(dev, br->dev);
@@ -278,7 +278,7 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)
278 del_nbp(p); 278 del_nbp(p);
279 } 279 }
280 280
281 br_fdb_delete_by_port(br, NULL, 1); 281 br_fdb_delete_by_port(br, NULL, 0, 1);
282 282
283 br_vlan_flush(br); 283 br_vlan_flush(br);
284 del_timer_sync(&br->gc_timer); 284 del_timer_sync(&br->gc_timer);
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index a9a4a1b7863d..8d423bc649b9 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -247,9 +247,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
247 if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) 247 if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
248 return -EPERM; 248 return -EPERM;
249 249
250 spin_lock_bh(&br->lock);
251 br_stp_set_bridge_priority(br, args[1]); 250 br_stp_set_bridge_priority(br, args[1]);
252 spin_unlock_bh(&br->lock);
253 return 0; 251 return 0;
254 252
255 case BRCTL_SET_PORT_PRIORITY: 253 case BRCTL_SET_PORT_PRIORITY:
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index e29ad70b3000..c11cf2611db0 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -323,6 +323,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
323 struct net_bridge_port_group *p; 323 struct net_bridge_port_group *p;
324 struct net_bridge_port_group __rcu **pp; 324 struct net_bridge_port_group __rcu **pp;
325 struct net_bridge_mdb_htable *mdb; 325 struct net_bridge_mdb_htable *mdb;
326 unsigned long now = jiffies;
326 int err; 327 int err;
327 328
328 mdb = mlock_dereference(br->mdb, br); 329 mdb = mlock_dereference(br->mdb, br);
@@ -347,6 +348,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
347 if (unlikely(!p)) 348 if (unlikely(!p))
348 return -ENOMEM; 349 return -ENOMEM;
349 rcu_assign_pointer(*pp, p); 350 rcu_assign_pointer(*pp, p);
351 if (state == MDB_TEMPORARY)
352 mod_timer(&p->timer, now + br->multicast_membership_interval);
350 353
351 br_mdb_notify(br->dev, port, group, RTM_NEWMDB); 354 br_mdb_notify(br->dev, port, group, RTM_NEWMDB);
352 return 0; 355 return 0;
@@ -371,6 +374,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
371 if (!p || p->br != br || p->state == BR_STATE_DISABLED) 374 if (!p || p->br != br || p->state == BR_STATE_DISABLED)
372 return -EINVAL; 375 return -EINVAL;
373 376
377 memset(&ip, 0, sizeof(ip));
374 ip.proto = entry->addr.proto; 378 ip.proto = entry->addr.proto;
375 if (ip.proto == htons(ETH_P_IP)) 379 if (ip.proto == htons(ETH_P_IP))
376 ip.u.ip4 = entry->addr.u.ip4; 380 ip.u.ip4 = entry->addr.u.ip4;
@@ -417,20 +421,14 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
417 if (!netif_running(br->dev) || br->multicast_disabled) 421 if (!netif_running(br->dev) || br->multicast_disabled)
418 return -EINVAL; 422 return -EINVAL;
419 423
424 memset(&ip, 0, sizeof(ip));
420 ip.proto = entry->addr.proto; 425 ip.proto = entry->addr.proto;
421 if (ip.proto == htons(ETH_P_IP)) { 426 if (ip.proto == htons(ETH_P_IP))
422 if (timer_pending(&br->ip4_other_query.timer))
423 return -EBUSY;
424
425 ip.u.ip4 = entry->addr.u.ip4; 427 ip.u.ip4 = entry->addr.u.ip4;
426#if IS_ENABLED(CONFIG_IPV6) 428#if IS_ENABLED(CONFIG_IPV6)
427 } else { 429 else
428 if (timer_pending(&br->ip6_other_query.timer))
429 return -EBUSY;
430
431 ip.u.ip6 = entry->addr.u.ip6; 430 ip.u.ip6 = entry->addr.u.ip6;
432#endif 431#endif
433 }
434 432
435 spin_lock_bh(&br->multicast_lock); 433 spin_lock_bh(&br->multicast_lock);
436 mdb = mlock_dereference(br->mdb, br); 434 mdb = mlock_dereference(br->mdb, br);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index ff667e18b2d6..742a6c27d7a2 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -37,6 +37,8 @@
37 37
38static void br_multicast_start_querier(struct net_bridge *br, 38static void br_multicast_start_querier(struct net_bridge *br,
39 struct bridge_mcast_own_query *query); 39 struct bridge_mcast_own_query *query);
40static void br_multicast_add_router(struct net_bridge *br,
41 struct net_bridge_port *port);
40unsigned int br_mdb_rehash_seq; 42unsigned int br_mdb_rehash_seq;
41 43
42static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) 44static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
@@ -936,6 +938,8 @@ void br_multicast_enable_port(struct net_bridge_port *port)
936#if IS_ENABLED(CONFIG_IPV6) 938#if IS_ENABLED(CONFIG_IPV6)
937 br_multicast_enable(&port->ip6_own_query); 939 br_multicast_enable(&port->ip6_own_query);
938#endif 940#endif
941 if (port->multicast_router == 2 && hlist_unhashed(&port->rlist))
942 br_multicast_add_router(br, port);
939 943
940out: 944out:
941 spin_unlock(&br->multicast_lock); 945 spin_unlock(&br->multicast_lock);
@@ -975,9 +979,6 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
975 int err = 0; 979 int err = 0;
976 __be32 group; 980 __be32 group;
977 981
978 if (!pskb_may_pull(skb, sizeof(*ih)))
979 return -EINVAL;
980
981 ih = igmpv3_report_hdr(skb); 982 ih = igmpv3_report_hdr(skb);
982 num = ntohs(ih->ngrec); 983 num = ntohs(ih->ngrec);
983 len = sizeof(*ih); 984 len = sizeof(*ih);
@@ -1247,25 +1248,14 @@ static int br_ip4_multicast_query(struct net_bridge *br,
1247 max_delay = 10 * HZ; 1248 max_delay = 10 * HZ;
1248 group = 0; 1249 group = 0;
1249 } 1250 }
1250 } else { 1251 } else if (skb->len >= sizeof(*ih3)) {
1251 if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) {
1252 err = -EINVAL;
1253 goto out;
1254 }
1255
1256 ih3 = igmpv3_query_hdr(skb); 1252 ih3 = igmpv3_query_hdr(skb);
1257 if (ih3->nsrcs) 1253 if (ih3->nsrcs)
1258 goto out; 1254 goto out;
1259 1255
1260 max_delay = ih3->code ? 1256 max_delay = ih3->code ?
1261 IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; 1257 IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1;
1262 } 1258 } else {
1263
1264 /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer
1265 * all-systems destination addresses (224.0.0.1) for general queries
1266 */
1267 if (!group && iph->daddr != htonl(INADDR_ALLHOSTS_GROUP)) {
1268 err = -EINVAL;
1269 goto out; 1259 goto out;
1270 } 1260 }
1271 1261
@@ -1328,12 +1318,6 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1328 (port && port->state == BR_STATE_DISABLED)) 1318 (port && port->state == BR_STATE_DISABLED))
1329 goto out; 1319 goto out;
1330 1320
1331 /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */
1332 if (!(ipv6_addr_type(&ip6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
1333 err = -EINVAL;
1334 goto out;
1335 }
1336
1337 if (skb->len == sizeof(*mld)) { 1321 if (skb->len == sizeof(*mld)) {
1338 if (!pskb_may_pull(skb, sizeof(*mld))) { 1322 if (!pskb_may_pull(skb, sizeof(*mld))) {
1339 err = -EINVAL; 1323 err = -EINVAL;
@@ -1357,14 +1341,6 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1357 1341
1358 is_general_query = group && ipv6_addr_any(group); 1342 is_general_query = group && ipv6_addr_any(group);
1359 1343
1360 /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer
1361 * all-nodes destination address (ff02::1) for general queries
1362 */
1363 if (is_general_query && !ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) {
1364 err = -EINVAL;
1365 goto out;
1366 }
1367
1368 if (is_general_query) { 1344 if (is_general_query) {
1369 saddr.proto = htons(ETH_P_IPV6); 1345 saddr.proto = htons(ETH_P_IPV6);
1370 saddr.u.ip6 = ip6h->saddr; 1346 saddr.u.ip6 = ip6h->saddr;
@@ -1556,74 +1532,22 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
1556 struct sk_buff *skb, 1532 struct sk_buff *skb,
1557 u16 vid) 1533 u16 vid)
1558{ 1534{
1559 struct sk_buff *skb2 = skb; 1535 struct sk_buff *skb_trimmed = NULL;
1560 const struct iphdr *iph;
1561 struct igmphdr *ih; 1536 struct igmphdr *ih;
1562 unsigned int len;
1563 unsigned int offset;
1564 int err; 1537 int err;
1565 1538
1566 /* We treat OOM as packet loss for now. */ 1539 err = ip_mc_check_igmp(skb, &skb_trimmed);
1567 if (!pskb_may_pull(skb, sizeof(*iph)))
1568 return -EINVAL;
1569
1570 iph = ip_hdr(skb);
1571
1572 if (iph->ihl < 5 || iph->version != 4)
1573 return -EINVAL;
1574
1575 if (!pskb_may_pull(skb, ip_hdrlen(skb)))
1576 return -EINVAL;
1577
1578 iph = ip_hdr(skb);
1579 1540
1580 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) 1541 if (err == -ENOMSG) {
1581 return -EINVAL; 1542 if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr))
1582
1583 if (iph->protocol != IPPROTO_IGMP) {
1584 if (!ipv4_is_local_multicast(iph->daddr))
1585 BR_INPUT_SKB_CB(skb)->mrouters_only = 1; 1543 BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
1586 return 0; 1544 return 0;
1545 } else if (err < 0) {
1546 return err;
1587 } 1547 }
1588 1548
1589 len = ntohs(iph->tot_len);
1590 if (skb->len < len || len < ip_hdrlen(skb))
1591 return -EINVAL;
1592
1593 if (skb->len > len) {
1594 skb2 = skb_clone(skb, GFP_ATOMIC);
1595 if (!skb2)
1596 return -ENOMEM;
1597
1598 err = pskb_trim_rcsum(skb2, len);
1599 if (err)
1600 goto err_out;
1601 }
1602
1603 len -= ip_hdrlen(skb2);
1604 offset = skb_network_offset(skb2) + ip_hdrlen(skb2);
1605 __skb_pull(skb2, offset);
1606 skb_reset_transport_header(skb2);
1607
1608 err = -EINVAL;
1609 if (!pskb_may_pull(skb2, sizeof(*ih)))
1610 goto out;
1611
1612 switch (skb2->ip_summed) {
1613 case CHECKSUM_COMPLETE:
1614 if (!csum_fold(skb2->csum))
1615 break;
1616 /* fall through */
1617 case CHECKSUM_NONE:
1618 skb2->csum = 0;
1619 if (skb_checksum_complete(skb2))
1620 goto out;
1621 }
1622
1623 err = 0;
1624
1625 BR_INPUT_SKB_CB(skb)->igmp = 1; 1549 BR_INPUT_SKB_CB(skb)->igmp = 1;
1626 ih = igmp_hdr(skb2); 1550 ih = igmp_hdr(skb);
1627 1551
1628 switch (ih->type) { 1552 switch (ih->type) {
1629 case IGMP_HOST_MEMBERSHIP_REPORT: 1553 case IGMP_HOST_MEMBERSHIP_REPORT:
@@ -1632,21 +1556,19 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
1632 err = br_ip4_multicast_add_group(br, port, ih->group, vid); 1556 err = br_ip4_multicast_add_group(br, port, ih->group, vid);
1633 break; 1557 break;
1634 case IGMPV3_HOST_MEMBERSHIP_REPORT: 1558 case IGMPV3_HOST_MEMBERSHIP_REPORT:
1635 err = br_ip4_multicast_igmp3_report(br, port, skb2, vid); 1559 err = br_ip4_multicast_igmp3_report(br, port, skb_trimmed, vid);
1636 break; 1560 break;
1637 case IGMP_HOST_MEMBERSHIP_QUERY: 1561 case IGMP_HOST_MEMBERSHIP_QUERY:
1638 err = br_ip4_multicast_query(br, port, skb2, vid); 1562 err = br_ip4_multicast_query(br, port, skb_trimmed, vid);
1639 break; 1563 break;
1640 case IGMP_HOST_LEAVE_MESSAGE: 1564 case IGMP_HOST_LEAVE_MESSAGE:
1641 br_ip4_multicast_leave_group(br, port, ih->group, vid); 1565 br_ip4_multicast_leave_group(br, port, ih->group, vid);
1642 break; 1566 break;
1643 } 1567 }
1644 1568
1645out: 1569 if (skb_trimmed)
1646 __skb_push(skb2, offset); 1570 kfree_skb(skb_trimmed);
1647err_out: 1571
1648 if (skb2 != skb)
1649 kfree_skb(skb2);
1650 return err; 1572 return err;
1651} 1573}
1652 1574
@@ -1656,138 +1578,42 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
1656 struct sk_buff *skb, 1578 struct sk_buff *skb,
1657 u16 vid) 1579 u16 vid)
1658{ 1580{
1659 struct sk_buff *skb2; 1581 struct sk_buff *skb_trimmed = NULL;
1660 const struct ipv6hdr *ip6h; 1582 struct mld_msg *mld;
1661 u8 icmp6_type;
1662 u8 nexthdr;
1663 __be16 frag_off;
1664 unsigned int len;
1665 int offset;
1666 int err; 1583 int err;
1667 1584
1668 if (!pskb_may_pull(skb, sizeof(*ip6h))) 1585 err = ipv6_mc_check_mld(skb, &skb_trimmed);
1669 return -EINVAL;
1670
1671 ip6h = ipv6_hdr(skb);
1672
1673 /*
1674 * We're interested in MLD messages only.
1675 * - Version is 6
1676 * - MLD has always Router Alert hop-by-hop option
1677 * - But we do not support jumbrograms.
1678 */
1679 if (ip6h->version != 6)
1680 return 0;
1681 1586
1682 /* Prevent flooding this packet if there is no listener present */ 1587 if (err == -ENOMSG) {
1683 if (!ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) 1588 if (!ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr))
1684 BR_INPUT_SKB_CB(skb)->mrouters_only = 1; 1589 BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
1685
1686 if (ip6h->nexthdr != IPPROTO_HOPOPTS ||
1687 ip6h->payload_len == 0)
1688 return 0;
1689
1690 len = ntohs(ip6h->payload_len) + sizeof(*ip6h);
1691 if (skb->len < len)
1692 return -EINVAL;
1693
1694 nexthdr = ip6h->nexthdr;
1695 offset = ipv6_skip_exthdr(skb, sizeof(*ip6h), &nexthdr, &frag_off);
1696
1697 if (offset < 0 || nexthdr != IPPROTO_ICMPV6)
1698 return 0; 1590 return 0;
1699 1591 } else if (err < 0) {
1700 /* Okay, we found ICMPv6 header */ 1592 return err;
1701 skb2 = skb_clone(skb, GFP_ATOMIC);
1702 if (!skb2)
1703 return -ENOMEM;
1704
1705 err = -EINVAL;
1706 if (!pskb_may_pull(skb2, offset + sizeof(struct icmp6hdr)))
1707 goto out;
1708
1709 len -= offset - skb_network_offset(skb2);
1710
1711 __skb_pull(skb2, offset);
1712 skb_reset_transport_header(skb2);
1713 skb_postpull_rcsum(skb2, skb_network_header(skb2),
1714 skb_network_header_len(skb2));
1715
1716 icmp6_type = icmp6_hdr(skb2)->icmp6_type;
1717
1718 switch (icmp6_type) {
1719 case ICMPV6_MGM_QUERY:
1720 case ICMPV6_MGM_REPORT:
1721 case ICMPV6_MGM_REDUCTION:
1722 case ICMPV6_MLD2_REPORT:
1723 break;
1724 default:
1725 err = 0;
1726 goto out;
1727 }
1728
1729 /* Okay, we found MLD message. Check further. */
1730 if (skb2->len > len) {
1731 err = pskb_trim_rcsum(skb2, len);
1732 if (err)
1733 goto out;
1734 err = -EINVAL;
1735 }
1736
1737 ip6h = ipv6_hdr(skb2);
1738
1739 switch (skb2->ip_summed) {
1740 case CHECKSUM_COMPLETE:
1741 if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb2->len,
1742 IPPROTO_ICMPV6, skb2->csum))
1743 break;
1744 /*FALLTHROUGH*/
1745 case CHECKSUM_NONE:
1746 skb2->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
1747 &ip6h->daddr,
1748 skb2->len,
1749 IPPROTO_ICMPV6, 0));
1750 if (__skb_checksum_complete(skb2))
1751 goto out;
1752 } 1593 }
1753 1594
1754 err = 0;
1755
1756 BR_INPUT_SKB_CB(skb)->igmp = 1; 1595 BR_INPUT_SKB_CB(skb)->igmp = 1;
1596 mld = (struct mld_msg *)skb_transport_header(skb);
1757 1597
1758 switch (icmp6_type) { 1598 switch (mld->mld_type) {
1759 case ICMPV6_MGM_REPORT: 1599 case ICMPV6_MGM_REPORT:
1760 {
1761 struct mld_msg *mld;
1762 if (!pskb_may_pull(skb2, sizeof(*mld))) {
1763 err = -EINVAL;
1764 goto out;
1765 }
1766 mld = (struct mld_msg *)skb_transport_header(skb2);
1767 BR_INPUT_SKB_CB(skb)->mrouters_only = 1; 1600 BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
1768 err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid); 1601 err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid);
1769 break; 1602 break;
1770 }
1771 case ICMPV6_MLD2_REPORT: 1603 case ICMPV6_MLD2_REPORT:
1772 err = br_ip6_multicast_mld2_report(br, port, skb2, vid); 1604 err = br_ip6_multicast_mld2_report(br, port, skb_trimmed, vid);
1773 break; 1605 break;
1774 case ICMPV6_MGM_QUERY: 1606 case ICMPV6_MGM_QUERY:
1775 err = br_ip6_multicast_query(br, port, skb2, vid); 1607 err = br_ip6_multicast_query(br, port, skb_trimmed, vid);
1776 break; 1608 break;
1777 case ICMPV6_MGM_REDUCTION: 1609 case ICMPV6_MGM_REDUCTION:
1778 {
1779 struct mld_msg *mld;
1780 if (!pskb_may_pull(skb2, sizeof(*mld))) {
1781 err = -EINVAL;
1782 goto out;
1783 }
1784 mld = (struct mld_msg *)skb_transport_header(skb2);
1785 br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid); 1610 br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid);
1786 } 1611 break;
1787 } 1612 }
1788 1613
1789out: 1614 if (skb_trimmed)
1790 kfree_skb(skb2); 1615 kfree_skb(skb_trimmed);
1616
1791 return err; 1617 return err;
1792} 1618}
1793#endif 1619#endif
@@ -1949,11 +1775,9 @@ out:
1949 1775
1950int br_multicast_set_router(struct net_bridge *br, unsigned long val) 1776int br_multicast_set_router(struct net_bridge *br, unsigned long val)
1951{ 1777{
1952 int err = -ENOENT; 1778 int err = -EINVAL;
1953 1779
1954 spin_lock_bh(&br->multicast_lock); 1780 spin_lock_bh(&br->multicast_lock);
1955 if (!netif_running(br->dev))
1956 goto unlock;
1957 1781
1958 switch (val) { 1782 switch (val) {
1959 case 0: 1783 case 0:
@@ -1964,13 +1788,8 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
1964 br->multicast_router = val; 1788 br->multicast_router = val;
1965 err = 0; 1789 err = 0;
1966 break; 1790 break;
1967
1968 default:
1969 err = -EINVAL;
1970 break;
1971 } 1791 }
1972 1792
1973unlock:
1974 spin_unlock_bh(&br->multicast_lock); 1793 spin_unlock_bh(&br->multicast_lock);
1975 1794
1976 return err; 1795 return err;
@@ -1979,11 +1798,9 @@ unlock:
1979int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val) 1798int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
1980{ 1799{
1981 struct net_bridge *br = p->br; 1800 struct net_bridge *br = p->br;
1982 int err = -ENOENT; 1801 int err = -EINVAL;
1983 1802
1984 spin_lock(&br->multicast_lock); 1803 spin_lock(&br->multicast_lock);
1985 if (!netif_running(br->dev) || p->state == BR_STATE_DISABLED)
1986 goto unlock;
1987 1804
1988 switch (val) { 1805 switch (val) {
1989 case 0: 1806 case 0:
@@ -2005,13 +1822,8 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
2005 1822
2006 br_multicast_add_router(br, p); 1823 br_multicast_add_router(br, p);
2007 break; 1824 break;
2008
2009 default:
2010 err = -EINVAL;
2011 break;
2012 } 1825 }
2013 1826
2014unlock:
2015 spin_unlock(&br->multicast_lock); 1827 spin_unlock(&br->multicast_lock);
2016 1828
2017 return err; 1829 return err;
@@ -2116,15 +1928,11 @@ unlock:
2116 1928
2117int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val) 1929int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val)
2118{ 1930{
2119 int err = -ENOENT; 1931 int err = -EINVAL;
2120 u32 old; 1932 u32 old;
2121 struct net_bridge_mdb_htable *mdb; 1933 struct net_bridge_mdb_htable *mdb;
2122 1934
2123 spin_lock_bh(&br->multicast_lock); 1935 spin_lock_bh(&br->multicast_lock);
2124 if (!netif_running(br->dev))
2125 goto unlock;
2126
2127 err = -EINVAL;
2128 if (!is_power_of_2(val)) 1936 if (!is_power_of_2(val))
2129 goto unlock; 1937 goto unlock;
2130 1938
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter_hooks.c
index 60ddfbeb47f5..c8b9bcfe997e 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -34,6 +34,7 @@
34 34
35#include <net/ip.h> 35#include <net/ip.h>
36#include <net/ipv6.h> 36#include <net/ipv6.h>
37#include <net/addrconf.h>
37#include <net/route.h> 38#include <net/route.h>
38#include <net/netfilter/br_netfilter.h> 39#include <net/netfilter/br_netfilter.h>
39 40
@@ -110,27 +111,24 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
110/* largest possible L2 header, see br_nf_dev_queue_xmit() */ 111/* largest possible L2 header, see br_nf_dev_queue_xmit() */
111#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) 112#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
112 113
113#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) 114#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
114struct brnf_frag_data { 115struct brnf_frag_data {
115 char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; 116 char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
116 u8 encap_size; 117 u8 encap_size;
117 u8 size; 118 u8 size;
119 u16 vlan_tci;
120 __be16 vlan_proto;
118}; 121};
119 122
120static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); 123static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
121#endif 124#endif
122 125
123static struct nf_bridge_info *nf_bridge_info_get(const struct sk_buff *skb) 126static void nf_bridge_info_free(struct sk_buff *skb)
124{ 127{
125 return skb->nf_bridge; 128 if (skb->nf_bridge) {
126} 129 nf_bridge_put(skb->nf_bridge);
127 130 skb->nf_bridge = NULL;
128static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) 131 }
129{
130 struct net_bridge_port *port;
131
132 port = br_port_get_rcu(dev);
133 return port ? &port->br->fake_rtable : NULL;
134} 132}
135 133
136static inline struct net_device *bridge_parent(const struct net_device *dev) 134static inline struct net_device *bridge_parent(const struct net_device *dev)
@@ -141,15 +139,6 @@ static inline struct net_device *bridge_parent(const struct net_device *dev)
141 return port ? port->br->dev : NULL; 139 return port ? port->br->dev : NULL;
142} 140}
143 141
144static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
145{
146 skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC);
147 if (likely(skb->nf_bridge))
148 atomic_set(&(skb->nf_bridge->use), 1);
149
150 return skb->nf_bridge;
151}
152
153static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) 142static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
154{ 143{
155 struct nf_bridge_info *nf_bridge = skb->nf_bridge; 144 struct nf_bridge_info *nf_bridge = skb->nf_bridge;
@@ -167,7 +156,7 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
167 return nf_bridge; 156 return nf_bridge;
168} 157}
169 158
170static unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) 159unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
171{ 160{
172 switch (skb->protocol) { 161 switch (skb->protocol) {
173 case __cpu_to_be16(ETH_P_8021Q): 162 case __cpu_to_be16(ETH_P_8021Q):
@@ -179,14 +168,6 @@ static unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
179 } 168 }
180} 169}
181 170
182static inline void nf_bridge_push_encap_header(struct sk_buff *skb)
183{
184 unsigned int len = nf_bridge_encap_header_len(skb);
185
186 skb_push(skb, len);
187 skb->network_header -= len;
188}
189
190static inline void nf_bridge_pull_encap_header(struct sk_buff *skb) 171static inline void nf_bridge_pull_encap_header(struct sk_buff *skb)
191{ 172{
192 unsigned int len = nf_bridge_encap_header_len(skb); 173 unsigned int len = nf_bridge_encap_header_len(skb);
@@ -208,7 +189,7 @@ static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
208 * expected format 189 * expected format
209 */ 190 */
210 191
211static int br_parse_ip_options(struct sk_buff *skb) 192static int br_validate_ipv4(struct sk_buff *skb)
212{ 193{
213 const struct iphdr *iph; 194 const struct iphdr *iph;
214 struct net_device *dev = skb->dev; 195 struct net_device *dev = skb->dev;
@@ -256,7 +237,7 @@ drop:
256 return -1; 237 return -1;
257} 238}
258 239
259static void nf_bridge_update_protocol(struct sk_buff *skb) 240void nf_bridge_update_protocol(struct sk_buff *skb)
260{ 241{
261 switch (skb->nf_bridge->orig_proto) { 242 switch (skb->nf_bridge->orig_proto) {
262 case BRNF_PROTO_8021Q: 243 case BRNF_PROTO_8021Q:
@@ -270,43 +251,12 @@ static void nf_bridge_update_protocol(struct sk_buff *skb)
270 } 251 }
271} 252}
272 253
273/* PF_BRIDGE/PRE_ROUTING *********************************************/
274/* Undo the changes made for ip6tables PREROUTING and continue the
275 * bridge PRE_ROUTING hook. */
276static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
277{
278 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
279 struct rtable *rt;
280
281 if (nf_bridge->pkt_otherhost) {
282 skb->pkt_type = PACKET_OTHERHOST;
283 nf_bridge->pkt_otherhost = false;
284 }
285 nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
286
287 rt = bridge_parent_rtable(nf_bridge->physindev);
288 if (!rt) {
289 kfree_skb(skb);
290 return 0;
291 }
292 skb_dst_set_noref(skb, &rt->dst);
293
294 skb->dev = nf_bridge->physindev;
295 nf_bridge_update_protocol(skb);
296 nf_bridge_push_encap_header(skb);
297 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb,
298 skb->dev, NULL,
299 br_handle_frame_finish, 1);
300
301 return 0;
302}
303
304/* Obtain the correct destination MAC address, while preserving the original 254/* Obtain the correct destination MAC address, while preserving the original
305 * source MAC address. If we already know this address, we just copy it. If we 255 * source MAC address. If we already know this address, we just copy it. If we
306 * don't, we use the neighbour framework to find out. In both cases, we make 256 * don't, we use the neighbour framework to find out. In both cases, we make
307 * sure that br_handle_frame_finish() is called afterwards. 257 * sure that br_handle_frame_finish() is called afterwards.
308 */ 258 */
309static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) 259int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
310{ 260{
311 struct neighbour *neigh; 261 struct neighbour *neigh;
312 struct dst_entry *dst; 262 struct dst_entry *dst;
@@ -346,8 +296,9 @@ free_skb:
346 return 0; 296 return 0;
347} 297}
348 298
349static bool daddr_was_changed(const struct sk_buff *skb, 299static inline bool
350 const struct nf_bridge_info *nf_bridge) 300br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb,
301 const struct nf_bridge_info *nf_bridge)
351{ 302{
352 return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; 303 return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr;
353} 304}
@@ -398,17 +349,15 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
398 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 349 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
399 struct rtable *rt; 350 struct rtable *rt;
400 int err; 351 int err;
401 int frag_max_size;
402 352
403 frag_max_size = IPCB(skb)->frag_max_size; 353 nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;
404 BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size;
405 354
406 if (nf_bridge->pkt_otherhost) { 355 if (nf_bridge->pkt_otherhost) {
407 skb->pkt_type = PACKET_OTHERHOST; 356 skb->pkt_type = PACKET_OTHERHOST;
408 nf_bridge->pkt_otherhost = false; 357 nf_bridge->pkt_otherhost = false;
409 } 358 }
410 nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; 359 nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING;
411 if (daddr_was_changed(skb, nf_bridge)) { 360 if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) {
412 if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { 361 if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
413 struct in_device *in_dev = __in_dev_get_rcu(dev); 362 struct in_device *in_dev = __in_dev_get_rcu(dev);
414 363
@@ -486,7 +435,7 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct
486} 435}
487 436
488/* Some common code for IPv4/IPv6 */ 437/* Some common code for IPv4/IPv6 */
489static struct net_device *setup_pre_routing(struct sk_buff *skb) 438struct net_device *setup_pre_routing(struct sk_buff *skb)
490{ 439{
491 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 440 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
492 441
@@ -509,106 +458,6 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)
509 return skb->dev; 458 return skb->dev;
510} 459}
511 460
512/* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */
513static int check_hbh_len(struct sk_buff *skb)
514{
515 unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1);
516 u32 pkt_len;
517 const unsigned char *nh = skb_network_header(skb);
518 int off = raw - nh;
519 int len = (raw[1] + 1) << 3;
520
521 if ((raw + len) - skb->data > skb_headlen(skb))
522 goto bad;
523
524 off += 2;
525 len -= 2;
526
527 while (len > 0) {
528 int optlen = nh[off + 1] + 2;
529
530 switch (nh[off]) {
531 case IPV6_TLV_PAD1:
532 optlen = 1;
533 break;
534
535 case IPV6_TLV_PADN:
536 break;
537
538 case IPV6_TLV_JUMBO:
539 if (nh[off + 1] != 4 || (off & 3) != 2)
540 goto bad;
541 pkt_len = ntohl(*(__be32 *) (nh + off + 2));
542 if (pkt_len <= IPV6_MAXPLEN ||
543 ipv6_hdr(skb)->payload_len)
544 goto bad;
545 if (pkt_len > skb->len - sizeof(struct ipv6hdr))
546 goto bad;
547 if (pskb_trim_rcsum(skb,
548 pkt_len + sizeof(struct ipv6hdr)))
549 goto bad;
550 nh = skb_network_header(skb);
551 break;
552 default:
553 if (optlen > len)
554 goto bad;
555 break;
556 }
557 off += optlen;
558 len -= optlen;
559 }
560 if (len == 0)
561 return 0;
562bad:
563 return -1;
564
565}
566
567/* Replicate the checks that IPv6 does on packet reception and pass the packet
568 * to ip6tables, which doesn't support NAT, so things are fairly simple. */
569static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
570 struct sk_buff *skb,
571 const struct nf_hook_state *state)
572{
573 const struct ipv6hdr *hdr;
574 u32 pkt_len;
575
576 if (skb->len < sizeof(struct ipv6hdr))
577 return NF_DROP;
578
579 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
580 return NF_DROP;
581
582 hdr = ipv6_hdr(skb);
583
584 if (hdr->version != 6)
585 return NF_DROP;
586
587 pkt_len = ntohs(hdr->payload_len);
588
589 if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
590 if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
591 return NF_DROP;
592 if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
593 return NF_DROP;
594 }
595 if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
596 return NF_DROP;
597
598 nf_bridge_put(skb->nf_bridge);
599 if (!nf_bridge_alloc(skb))
600 return NF_DROP;
601 if (!setup_pre_routing(skb))
602 return NF_DROP;
603
604 skb->protocol = htons(ETH_P_IPV6);
605 NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb,
606 skb->dev, NULL,
607 br_nf_pre_routing_finish_ipv6);
608
609 return NF_STOLEN;
610}
611
612/* Direct IPv6 traffic to br_nf_pre_routing_ipv6. 461/* Direct IPv6 traffic to br_nf_pre_routing_ipv6.
613 * Replicate the checks that IPv4 does on packet reception. 462 * Replicate the checks that IPv4 does on packet reception.
614 * Set skb->dev to the bridge device (i.e. parent of the 463 * Set skb->dev to the bridge device (i.e. parent of the
@@ -648,7 +497,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
648 497
649 nf_bridge_pull_encap_header_rcsum(skb); 498 nf_bridge_pull_encap_header_rcsum(skb);
650 499
651 if (br_parse_ip_options(skb)) 500 if (br_validate_ipv4(skb))
652 return NF_DROP; 501 return NF_DROP;
653 502
654 nf_bridge_put(skb->nf_bridge); 503 nf_bridge_put(skb->nf_bridge);
@@ -692,12 +541,12 @@ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
692 struct net_device *in; 541 struct net_device *in;
693 542
694 if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { 543 if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) {
695 int frag_max_size;
696 544
697 if (skb->protocol == htons(ETH_P_IP)) { 545 if (skb->protocol == htons(ETH_P_IP))
698 frag_max_size = IPCB(skb)->frag_max_size; 546 nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;
699 BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size; 547
700 } 548 if (skb->protocol == htons(ETH_P_IPV6))
549 nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
701 550
702 in = nf_bridge->physindev; 551 in = nf_bridge->physindev;
703 if (nf_bridge->pkt_otherhost) { 552 if (nf_bridge->pkt_otherhost) {
@@ -760,12 +609,15 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
760 } 609 }
761 610
762 if (pf == NFPROTO_IPV4) { 611 if (pf == NFPROTO_IPV4) {
763 int frag_max = BR_INPUT_SKB_CB(skb)->frag_max_size; 612 if (br_validate_ipv4(skb))
764
765 if (br_parse_ip_options(skb))
766 return NF_DROP; 613 return NF_DROP;
614 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
615 }
767 616
768 IPCB(skb)->frag_max_size = frag_max; 617 if (pf == NFPROTO_IPV6) {
618 if (br_validate_ipv6(skb))
619 return NF_DROP;
620 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
769 } 621 }
770 622
771 nf_bridge->physoutdev = skb->dev; 623 nf_bridge->physoutdev = skb->dev;
@@ -815,7 +667,7 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
815 return NF_STOLEN; 667 return NF_STOLEN;
816} 668}
817 669
818#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) 670#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
819static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) 671static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
820{ 672{
821 struct brnf_frag_data *data; 673 struct brnf_frag_data *data;
@@ -829,56 +681,120 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
829 return 0; 681 return 0;
830 } 682 }
831 683
684 if (data->vlan_tci) {
685 skb->vlan_tci = data->vlan_tci;
686 skb->vlan_proto = data->vlan_proto;
687 }
688
832 skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); 689 skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size);
833 __skb_push(skb, data->encap_size); 690 __skb_push(skb, data->encap_size);
834 691
692 nf_bridge_info_free(skb);
835 return br_dev_queue_push_xmit(sk, skb); 693 return br_dev_queue_push_xmit(sk, skb);
836} 694}
695#endif
696
697#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
698static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb,
699 int (*output)(struct sock *, struct sk_buff *))
700{
701 unsigned int mtu = ip_skb_dst_mtu(skb);
702 struct iphdr *iph = ip_hdr(skb);
703 struct rtable *rt = skb_rtable(skb);
704 struct net_device *dev = rt->dst.dev;
705
706 if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
707 (IPCB(skb)->frag_max_size &&
708 IPCB(skb)->frag_max_size > mtu))) {
709 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
710 kfree_skb(skb);
711 return -EMSGSIZE;
712 }
713
714 return ip_do_fragment(sk, skb, output);
715}
716#endif
717
718static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
719{
720 if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
721 return PPPOE_SES_HLEN;
722 return 0;
723}
837 724
838static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) 725static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
839{ 726{
840 int ret; 727 struct nf_bridge_info *nf_bridge;
841 int frag_max_size;
842 unsigned int mtu_reserved; 728 unsigned int mtu_reserved;
843 729
844 if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP)) 730 mtu_reserved = nf_bridge_mtu_reduction(skb);
731
732 if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) {
733 nf_bridge_info_free(skb);
845 return br_dev_queue_push_xmit(sk, skb); 734 return br_dev_queue_push_xmit(sk, skb);
735 }
846 736
847 mtu_reserved = nf_bridge_mtu_reduction(skb); 737 nf_bridge = nf_bridge_info_get(skb);
738
739#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
848 /* This is wrong! We should preserve the original fragment 740 /* This is wrong! We should preserve the original fragment
849 * boundaries by preserving frag_list rather than refragmenting. 741 * boundaries by preserving frag_list rather than refragmenting.
850 */ 742 */
851 if (skb->len + mtu_reserved > skb->dev->mtu) { 743 if (skb->protocol == htons(ETH_P_IP)) {
852 struct brnf_frag_data *data; 744 struct brnf_frag_data *data;
853 745
854 frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; 746 if (br_validate_ipv4(skb))
855 if (br_parse_ip_options(skb)) 747 goto drop;
856 /* Drop invalid packet */ 748
857 return NF_DROP; 749 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
858 IPCB(skb)->frag_max_size = frag_max_size;
859 750
860 nf_bridge_update_protocol(skb); 751 nf_bridge_update_protocol(skb);
861 752
862 data = this_cpu_ptr(&brnf_frag_data_storage); 753 data = this_cpu_ptr(&brnf_frag_data_storage);
754
755 data->vlan_tci = skb->vlan_tci;
756 data->vlan_proto = skb->vlan_proto;
863 data->encap_size = nf_bridge_encap_header_len(skb); 757 data->encap_size = nf_bridge_encap_header_len(skb);
864 data->size = ETH_HLEN + data->encap_size; 758 data->size = ETH_HLEN + data->encap_size;
865 759
866 skb_copy_from_linear_data_offset(skb, -data->size, data->mac, 760 skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
867 data->size); 761 data->size);
868 762
869 ret = ip_fragment(sk, skb, br_nf_push_frag_xmit); 763 return br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit);
870 } else {
871 ret = br_dev_queue_push_xmit(sk, skb);
872 } 764 }
765#endif
766#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
767 if (skb->protocol == htons(ETH_P_IPV6)) {
768 const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
769 struct brnf_frag_data *data;
873 770
874 return ret; 771 if (br_validate_ipv6(skb))
875} 772 goto drop;
876#else 773
877static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) 774 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
878{ 775
879 return br_dev_queue_push_xmit(sk, skb); 776 nf_bridge_update_protocol(skb);
880} 777
778 data = this_cpu_ptr(&brnf_frag_data_storage);
779 data->encap_size = nf_bridge_encap_header_len(skb);
780 data->size = ETH_HLEN + data->encap_size;
781
782 skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
783 data->size);
784
785 if (v6ops)
786 return v6ops->fragment(sk, skb, br_nf_push_frag_xmit);
787
788 kfree_skb(skb);
789 return -EMSGSIZE;
790 }
881#endif 791#endif
792 nf_bridge_info_free(skb);
793 return br_dev_queue_push_xmit(sk, skb);
794 drop:
795 kfree_skb(skb);
796 return 0;
797}
882 798
883/* PF_BRIDGE/POST_ROUTING ********************************************/ 799/* PF_BRIDGE/POST_ROUTING ********************************************/
884static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, 800static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
@@ -964,6 +880,8 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
964 nf_bridge->neigh_header, 880 nf_bridge->neigh_header,
965 ETH_HLEN - ETH_ALEN); 881 ETH_HLEN - ETH_ALEN);
966 skb->dev = nf_bridge->physindev; 882 skb->dev = nf_bridge->physindev;
883
884 nf_bridge->physoutdev = NULL;
967 br_handle_frame_finish(NULL, skb); 885 br_handle_frame_finish(NULL, skb);
968} 886}
969 887
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
new file mode 100644
index 000000000000..13b7d1e3d185
--- /dev/null
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -0,0 +1,245 @@
1/*
2 * Handle firewalling
3 * Linux ethernet bridge
4 *
5 * Authors:
6 * Lennert Buytenhek <buytenh@gnu.org>
7 * Bart De Schuymer <bdschuym@pandora.be>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 *
14 * Lennert dedicates this file to Kerstin Wurdinger.
15 */
16
17#include <linux/module.h>
18#include <linux/kernel.h>
19#include <linux/slab.h>
20#include <linux/ip.h>
21#include <linux/netdevice.h>
22#include <linux/skbuff.h>
23#include <linux/if_arp.h>
24#include <linux/if_ether.h>
25#include <linux/if_vlan.h>
26#include <linux/if_pppox.h>
27#include <linux/ppp_defs.h>
28#include <linux/netfilter_bridge.h>
29#include <linux/netfilter_ipv4.h>
30#include <linux/netfilter_ipv6.h>
31#include <linux/netfilter_arp.h>
32#include <linux/in_route.h>
33#include <linux/inetdevice.h>
34
35#include <net/ip.h>
36#include <net/ipv6.h>
37#include <net/addrconf.h>
38#include <net/route.h>
39#include <net/netfilter/br_netfilter.h>
40
41#include <asm/uaccess.h>
42#include "br_private.h"
43#ifdef CONFIG_SYSCTL
44#include <linux/sysctl.h>
45#endif
46
47/* We only check the length. A bridge shouldn't do any hop-by-hop stuff
48 * anyway
49 */
50static int br_nf_check_hbh_len(struct sk_buff *skb)
51{
52 unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1);
53 u32 pkt_len;
54 const unsigned char *nh = skb_network_header(skb);
55 int off = raw - nh;
56 int len = (raw[1] + 1) << 3;
57
58 if ((raw + len) - skb->data > skb_headlen(skb))
59 goto bad;
60
61 off += 2;
62 len -= 2;
63
64 while (len > 0) {
65 int optlen = nh[off + 1] + 2;
66
67 switch (nh[off]) {
68 case IPV6_TLV_PAD1:
69 optlen = 1;
70 break;
71
72 case IPV6_TLV_PADN:
73 break;
74
75 case IPV6_TLV_JUMBO:
76 if (nh[off + 1] != 4 || (off & 3) != 2)
77 goto bad;
78 pkt_len = ntohl(*(__be32 *)(nh + off + 2));
79 if (pkt_len <= IPV6_MAXPLEN ||
80 ipv6_hdr(skb)->payload_len)
81 goto bad;
82 if (pkt_len > skb->len - sizeof(struct ipv6hdr))
83 goto bad;
84 if (pskb_trim_rcsum(skb,
85 pkt_len + sizeof(struct ipv6hdr)))
86 goto bad;
87 nh = skb_network_header(skb);
88 break;
89 default:
90 if (optlen > len)
91 goto bad;
92 break;
93 }
94 off += optlen;
95 len -= optlen;
96 }
97 if (len == 0)
98 return 0;
99bad:
100 return -1;
101}
102
103int br_validate_ipv6(struct sk_buff *skb)
104{
105 const struct ipv6hdr *hdr;
106 struct net_device *dev = skb->dev;
107 struct inet6_dev *idev = __in6_dev_get(skb->dev);
108 u32 pkt_len;
109 u8 ip6h_len = sizeof(struct ipv6hdr);
110
111 if (!pskb_may_pull(skb, ip6h_len))
112 goto inhdr_error;
113
114 if (skb->len < ip6h_len)
115 goto drop;
116
117 hdr = ipv6_hdr(skb);
118
119 if (hdr->version != 6)
120 goto inhdr_error;
121
122 pkt_len = ntohs(hdr->payload_len);
123
124 if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
125 if (pkt_len + ip6h_len > skb->len) {
126 IP6_INC_STATS_BH(dev_net(dev), idev,
127 IPSTATS_MIB_INTRUNCATEDPKTS);
128 goto drop;
129 }
130 if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) {
131 IP6_INC_STATS_BH(dev_net(dev), idev,
132 IPSTATS_MIB_INDISCARDS);
133 goto drop;
134 }
135 }
136 if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb))
137 goto drop;
138
139 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
140 /* No IP options in IPv6 header; however it should be
141 * checked if some next headers need special treatment
142 */
143 return 0;
144
145inhdr_error:
146 IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS);
147drop:
148 return -1;
149}
150
151static inline bool
152br_nf_ipv6_daddr_was_changed(const struct sk_buff *skb,
153 const struct nf_bridge_info *nf_bridge)
154{
155 return memcmp(&nf_bridge->ipv6_daddr, &ipv6_hdr(skb)->daddr,
156 sizeof(ipv6_hdr(skb)->daddr)) != 0;
157}
158
159/* PF_BRIDGE/PRE_ROUTING: Undo the changes made for ip6tables
160 * PREROUTING and continue the bridge PRE_ROUTING hook. See comment
161 * for br_nf_pre_routing_finish(), same logic is used here but
162 * equivalent IPv6 function ip6_route_input() called indirectly.
163 */
164static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
165{
166 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
167 struct rtable *rt;
168 struct net_device *dev = skb->dev;
169 const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
170
171 nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
172
173 if (nf_bridge->pkt_otherhost) {
174 skb->pkt_type = PACKET_OTHERHOST;
175 nf_bridge->pkt_otherhost = false;
176 }
177 nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING;
178 if (br_nf_ipv6_daddr_was_changed(skb, nf_bridge)) {
179 skb_dst_drop(skb);
180 v6ops->route_input(skb);
181
182 if (skb_dst(skb)->error) {
183 kfree_skb(skb);
184 return 0;
185 }
186
187 if (skb_dst(skb)->dev == dev) {
188 skb->dev = nf_bridge->physindev;
189 nf_bridge_update_protocol(skb);
190 nf_bridge_push_encap_header(skb);
191 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
192 sk, skb, skb->dev, NULL,
193 br_nf_pre_routing_finish_bridge,
194 1);
195 return 0;
196 }
197 ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
198 skb->pkt_type = PACKET_HOST;
199 } else {
200 rt = bridge_parent_rtable(nf_bridge->physindev);
201 if (!rt) {
202 kfree_skb(skb);
203 return 0;
204 }
205 skb_dst_set_noref(skb, &rt->dst);
206 }
207
208 skb->dev = nf_bridge->physindev;
209 nf_bridge_update_protocol(skb);
210 nf_bridge_push_encap_header(skb);
211 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb,
212 skb->dev, NULL,
213 br_handle_frame_finish, 1);
214
215 return 0;
216}
217
218/* Replicate the checks that IPv6 does on packet reception and pass the packet
219 * to ip6tables.
220 */
221unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
222 struct sk_buff *skb,
223 const struct nf_hook_state *state)
224{
225 struct nf_bridge_info *nf_bridge;
226
227 if (br_validate_ipv6(skb))
228 return NF_DROP;
229
230 nf_bridge_put(skb->nf_bridge);
231 if (!nf_bridge_alloc(skb))
232 return NF_DROP;
233 if (!setup_pre_routing(skb))
234 return NF_DROP;
235
236 nf_bridge = nf_bridge_info_get(skb);
237 nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr;
238
239 skb->protocol = htons(ETH_P_IPV6);
240 NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb,
241 skb->dev, NULL,
242 br_nf_pre_routing_finish_ipv6);
243
244 return NF_STOLEN;
245}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 4b5c236998ff..364bdc98bd9b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -457,6 +457,8 @@ static int br_afspec(struct net_bridge *br,
457 if (nla_len(attr) != sizeof(struct bridge_vlan_info)) 457 if (nla_len(attr) != sizeof(struct bridge_vlan_info))
458 return -EINVAL; 458 return -EINVAL;
459 vinfo = nla_data(attr); 459 vinfo = nla_data(attr);
460 if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
461 return -EINVAL;
460 if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { 462 if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
461 if (vinfo_start) 463 if (vinfo_start)
462 return -EINVAL; 464 return -EINVAL;
@@ -586,7 +588,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
586 struct nlattr *afspec; 588 struct nlattr *afspec;
587 struct net_bridge_port *p; 589 struct net_bridge_port *p;
588 struct nlattr *tb[IFLA_BRPORT_MAX + 1]; 590 struct nlattr *tb[IFLA_BRPORT_MAX + 1];
589 int err = 0, ret_offload = 0; 591 int err = 0;
590 592
591 protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); 593 protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO);
592 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); 594 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
@@ -628,16 +630,6 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
628 afspec, RTM_SETLINK); 630 afspec, RTM_SETLINK);
629 } 631 }
630 632
631 if (p && !(flags & BRIDGE_FLAGS_SELF)) {
632 /* set bridge attributes in hardware if supported
633 */
634 ret_offload = netdev_switch_port_bridge_setlink(dev, nlh,
635 flags);
636 if (ret_offload && ret_offload != -EOPNOTSUPP)
637 br_warn(p->br, "error setting attrs on port %u(%s)\n",
638 (unsigned int)p->port_no, p->dev->name);
639 }
640
641 if (err == 0) 633 if (err == 0)
642 br_ifinfo_notify(RTM_NEWLINK, p); 634 br_ifinfo_notify(RTM_NEWLINK, p);
643out: 635out:
@@ -649,7 +641,7 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
649{ 641{
650 struct nlattr *afspec; 642 struct nlattr *afspec;
651 struct net_bridge_port *p; 643 struct net_bridge_port *p;
652 int err = 0, ret_offload = 0; 644 int err = 0;
653 645
654 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); 646 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
655 if (!afspec) 647 if (!afspec)
@@ -668,16 +660,6 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
668 */ 660 */
669 br_ifinfo_notify(RTM_NEWLINK, p); 661 br_ifinfo_notify(RTM_NEWLINK, p);
670 662
671 if (p && !(flags & BRIDGE_FLAGS_SELF)) {
672 /* del bridge attributes in hardware
673 */
674 ret_offload = netdev_switch_port_bridge_dellink(dev, nlh,
675 flags);
676 if (ret_offload && ret_offload != -EOPNOTSUPP)
677 br_warn(p->br, "error deleting attrs on port %u (%s)\n",
678 (unsigned int)p->port_no, p->dev->name);
679 }
680
681 return err; 663 return err;
682} 664}
683static int br_validate(struct nlattr *tb[], struct nlattr *data[]) 665static int br_validate(struct nlattr *tb[], struct nlattr *data[])
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 3362c29400f1..8b21146b24a0 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -18,6 +18,7 @@
18#include <linux/netpoll.h> 18#include <linux/netpoll.h>
19#include <linux/u64_stats_sync.h> 19#include <linux/u64_stats_sync.h>
20#include <net/route.h> 20#include <net/route.h>
21#include <net/ip6_fib.h>
21#include <linux/if_vlan.h> 22#include <linux/if_vlan.h>
22 23
23#define BR_HASH_BITS 8 24#define BR_HASH_BITS 8
@@ -33,8 +34,8 @@
33 34
34/* Control of forwarding link local multicast */ 35/* Control of forwarding link local multicast */
35#define BR_GROUPFWD_DEFAULT 0 36#define BR_GROUPFWD_DEFAULT 0
36/* Don't allow forwarding control protocols like STP and LLDP */ 37/* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */
37#define BR_GROUPFWD_RESTRICTED 0x4007u 38#define BR_GROUPFWD_RESTRICTED 0x0007u
38/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */ 39/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */
39#define BR_GROUPFWD_8021AD 0xB801u 40#define BR_GROUPFWD_8021AD 0xB801u
40 41
@@ -214,7 +215,10 @@ struct net_bridge
214 spinlock_t hash_lock; 215 spinlock_t hash_lock;
215 struct hlist_head hash[BR_HASH_SIZE]; 216 struct hlist_head hash[BR_HASH_SIZE];
216#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 217#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
217 struct rtable fake_rtable; 218 union {
219 struct rtable fake_rtable;
220 struct rt6_info fake_rt6_info;
221 };
218 bool nf_call_iptables; 222 bool nf_call_iptables;
219 bool nf_call_ip6tables; 223 bool nf_call_ip6tables;
220 bool nf_call_arptables; 224 bool nf_call_arptables;
@@ -304,7 +308,6 @@ struct br_input_skb_cb {
304 int mrouters_only; 308 int mrouters_only;
305#endif 309#endif
306 310
307 u16 frag_max_size;
308 bool proxyarp_replied; 311 bool proxyarp_replied;
309 312
310#ifdef CONFIG_BRIDGE_VLAN_FILTERING 313#ifdef CONFIG_BRIDGE_VLAN_FILTERING
@@ -384,7 +387,7 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr);
384void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr); 387void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr);
385void br_fdb_cleanup(unsigned long arg); 388void br_fdb_cleanup(unsigned long arg);
386void br_fdb_delete_by_port(struct net_bridge *br, 389void br_fdb_delete_by_port(struct net_bridge *br,
387 const struct net_bridge_port *p, int do_all); 390 const struct net_bridge_port *p, u16 vid, int do_all);
388struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, 391struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
389 const unsigned char *addr, __u16 vid); 392 const unsigned char *addr, __u16 vid);
390int br_fdb_test_addr(struct net_device *dev, unsigned char *addr); 393int br_fdb_test_addr(struct net_device *dev, unsigned char *addr);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index fb3ebe615513..b4b6dab9c285 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -39,10 +39,14 @@ void br_log_state(const struct net_bridge_port *p)
39 39
40void br_set_state(struct net_bridge_port *p, unsigned int state) 40void br_set_state(struct net_bridge_port *p, unsigned int state)
41{ 41{
42 struct switchdev_attr attr = {
43 .id = SWITCHDEV_ATTR_PORT_STP_STATE,
44 .u.stp_state = state,
45 };
42 int err; 46 int err;
43 47
44 p->state = state; 48 p->state = state;
45 err = netdev_switch_port_stp_update(p->dev, state); 49 err = switchdev_port_attr_set(p->dev, &attr);
46 if (err && err != -EOPNOTSUPP) 50 if (err && err != -EOPNOTSUPP)
47 br_warn(p->br, "error setting offload STP state on port %u(%s)\n", 51 br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
48 (unsigned int) p->port_no, p->dev->name); 52 (unsigned int) p->port_no, p->dev->name);
@@ -424,7 +428,6 @@ static void br_make_forwarding(struct net_bridge_port *p)
424 else 428 else
425 br_set_state(p, BR_STATE_LEARNING); 429 br_set_state(p, BR_STATE_LEARNING);
426 430
427 br_multicast_enable_port(p);
428 br_log_state(p); 431 br_log_state(p);
429 br_ifinfo_notify(RTM_NEWLINK, p); 432 br_ifinfo_notify(RTM_NEWLINK, p);
430 433
@@ -458,6 +461,12 @@ void br_port_state_selection(struct net_bridge *br)
458 } 461 }
459 } 462 }
460 463
464 if (p->state != BR_STATE_BLOCKING)
465 br_multicast_enable_port(p);
466 /* Multicast is not disabled for the port when it goes in
467 * blocking state because the timers will expire and stop by
468 * themselves without sending more queries.
469 */
461 if (p->state == BR_STATE_FORWARDING) 470 if (p->state == BR_STATE_FORWARDING)
462 ++liveports; 471 ++liveports;
463 } 472 }
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 41146872c1b4..a2730e7196cd 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -111,7 +111,7 @@ void br_stp_disable_port(struct net_bridge_port *p)
111 del_timer(&p->forward_delay_timer); 111 del_timer(&p->forward_delay_timer);
112 del_timer(&p->hold_timer); 112 del_timer(&p->hold_timer);
113 113
114 br_fdb_delete_by_port(br, p, 0); 114 br_fdb_delete_by_port(br, p, 0, 0);
115 br_multicast_disable_port(p); 115 br_multicast_disable_port(p);
116 116
117 br_configuration_update(br); 117 br_configuration_update(br);
@@ -243,12 +243,13 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br)
243 return true; 243 return true;
244} 244}
245 245
246/* called under bridge lock */ 246/* Acquires and releases bridge lock */
247void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio) 247void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio)
248{ 248{
249 struct net_bridge_port *p; 249 struct net_bridge_port *p;
250 int wasroot; 250 int wasroot;
251 251
252 spin_lock_bh(&br->lock);
252 wasroot = br_is_root_bridge(br); 253 wasroot = br_is_root_bridge(br);
253 254
254 list_for_each_entry(p, &br->port_list, list) { 255 list_for_each_entry(p, &br->port_list, list) {
@@ -266,6 +267,7 @@ void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio)
266 br_port_state_selection(br); 267 br_port_state_selection(br);
267 if (br_is_root_bridge(br) && !wasroot) 268 if (br_is_root_bridge(br) && !wasroot)
268 br_become_root_bridge(br); 269 br_become_root_bridge(br);
270 spin_unlock_bh(&br->lock);
269} 271}
270 272
271/* called under bridge lock */ 273/* called under bridge lock */
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 4905845a94e9..efe415ad842a 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -160,7 +160,7 @@ static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL);
160 160
161static int store_flush(struct net_bridge_port *p, unsigned long v) 161static int store_flush(struct net_bridge_port *p, unsigned long v)
162{ 162{
163 br_fdb_delete_by_port(p->br, p, 0); // Don't delete local entry 163 br_fdb_delete_by_port(p->br, p, 0, 0); // Don't delete local entry
164 return 0; 164 return 0;
165} 165}
166static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); 166static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 13013fe8db24..0d41f81838ff 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -2,6 +2,7 @@
2#include <linux/netdevice.h> 2#include <linux/netdevice.h>
3#include <linux/rtnetlink.h> 3#include <linux/rtnetlink.h>
4#include <linux/slab.h> 4#include <linux/slab.h>
5#include <net/switchdev.h>
5 6
6#include "br_private.h" 7#include "br_private.h"
7 8
@@ -36,6 +37,36 @@ static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags)
36 clear_bit(vid, v->untagged_bitmap); 37 clear_bit(vid, v->untagged_bitmap);
37} 38}
38 39
40static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
41 u16 vid, u16 flags)
42{
43 const struct net_device_ops *ops = dev->netdev_ops;
44 int err;
45
46 /* If driver uses VLAN ndo ops, use 8021q to install vid
47 * on device, otherwise try switchdev ops to install vid.
48 */
49
50 if (ops->ndo_vlan_rx_add_vid) {
51 err = vlan_vid_add(dev, br->vlan_proto, vid);
52 } else {
53 struct switchdev_obj vlan_obj = {
54 .id = SWITCHDEV_OBJ_PORT_VLAN,
55 .u.vlan = {
56 .flags = flags,
57 .vid_begin = vid,
58 .vid_end = vid,
59 },
60 };
61
62 err = switchdev_port_obj_add(dev, &vlan_obj);
63 if (err == -EOPNOTSUPP)
64 err = 0;
65 }
66
67 return err;
68}
69
39static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) 70static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
40{ 71{
41 struct net_bridge_port *p = NULL; 72 struct net_bridge_port *p = NULL;
@@ -62,7 +93,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
62 * This ensures tagged traffic enters the bridge when 93 * This ensures tagged traffic enters the bridge when
63 * promiscuous mode is disabled by br_manage_promisc(). 94 * promiscuous mode is disabled by br_manage_promisc().
64 */ 95 */
65 err = vlan_vid_add(dev, br->vlan_proto, vid); 96 err = __vlan_vid_add(dev, br, vid, flags);
66 if (err) 97 if (err)
67 return err; 98 return err;
68 } 99 }
@@ -86,6 +117,30 @@ out_filt:
86 return err; 117 return err;
87} 118}
88 119
120static void __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
121 u16 vid)
122{
123 const struct net_device_ops *ops = dev->netdev_ops;
124
125 /* If driver uses VLAN ndo ops, use 8021q to delete vid
126 * on device, otherwise try switchdev ops to delete vid.
127 */
128
129 if (ops->ndo_vlan_rx_kill_vid) {
130 vlan_vid_del(dev, br->vlan_proto, vid);
131 } else {
132 struct switchdev_obj vlan_obj = {
133 .id = SWITCHDEV_OBJ_PORT_VLAN,
134 .u.vlan = {
135 .vid_begin = vid,
136 .vid_end = vid,
137 },
138 };
139
140 switchdev_port_obj_del(dev, &vlan_obj);
141 }
142}
143
89static int __vlan_del(struct net_port_vlans *v, u16 vid) 144static int __vlan_del(struct net_port_vlans *v, u16 vid)
90{ 145{
91 if (!test_bit(vid, v->vlan_bitmap)) 146 if (!test_bit(vid, v->vlan_bitmap))
@@ -96,7 +151,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid)
96 151
97 if (v->port_idx) { 152 if (v->port_idx) {
98 struct net_bridge_port *p = v->parent.port; 153 struct net_bridge_port *p = v->parent.port;
99 vlan_vid_del(p->dev, p->br->vlan_proto, vid); 154 __vlan_vid_del(p->dev, p->br, vid);
100 } 155 }
101 156
102 clear_bit(vid, v->vlan_bitmap); 157 clear_bit(vid, v->vlan_bitmap);
@@ -686,6 +741,7 @@ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
686 return -EINVAL; 741 return -EINVAL;
687 742
688 br_fdb_find_delete_local(port->br, port, port->dev->dev_addr, vid); 743 br_fdb_find_delete_local(port->br, port, port->dev->dev_addr, vid);
744 br_fdb_delete_by_port(port->br, port, vid, 0);
689 745
690 return __vlan_del(pv, vid); 746 return __vlan_del(pv, vid);
691} 747}
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 071d87214dde..0c40570069ba 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -164,8 +164,10 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
164 !(info->bitmask & EBT_STP_MASK)) 164 !(info->bitmask & EBT_STP_MASK))
165 return -EINVAL; 165 return -EINVAL;
166 /* Make sure the match only receives stp frames */ 166 /* Make sure the match only receives stp frames */
167 if (!ether_addr_equal(e->destmac, bridge_ula) || 167 if (!par->nft_compat &&
168 !ether_addr_equal(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC)) 168 (!ether_addr_equal(e->destmac, bridge_ula) ||
169 !ether_addr_equal(e->destmsk, msk) ||
170 !(e->bitmask & EBT_DESTMAC)))
169 return -EINVAL; 171 return -EINVAL;
170 172
171 return 0; 173 return 0;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 91180a7fc943..18ca4b24c418 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -6,7 +6,7 @@
6 * 6 *
7 * ebtables.c,v 2.0, July, 2002 7 * ebtables.c,v 2.0, July, 2002
8 * 8 *
9 * This code is stongly inspired on the iptables code which is 9 * This code is strongly inspired by the iptables code which is
10 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling 10 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
11 * 11 *
12 * This program is free software; you can redistribute it and/or 12 * This program is free software; you can redistribute it and/or
@@ -139,7 +139,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
139 ethproto = h->h_proto; 139 ethproto = h->h_proto;
140 140
141 if (e->bitmask & EBT_802_3) { 141 if (e->bitmask & EBT_802_3) {
142 if (FWINV2(ntohs(ethproto) >= ETH_P_802_3_MIN, EBT_IPROTO)) 142 if (FWINV2(eth_proto_is_802_3(ethproto), EBT_IPROTO))
143 return 1; 143 return 1;
144 } else if (!(e->bitmask & EBT_NOPROTO) && 144 } else if (!(e->bitmask & EBT_NOPROTO) &&
145 FWINV2(e->ethproto != ethproto, EBT_IPROTO)) 145 FWINV2(e->ethproto != ethproto, EBT_IPROTO))
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 112ad784838a..3cc71b9f5517 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1055,7 +1055,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
1055 * is really not used at all in the net/core or socket.c but the 1055 * is really not used at all in the net/core or socket.c but the
1056 * initialization makes sure that sock->state is not uninitialized. 1056 * initialization makes sure that sock->state is not uninitialized.
1057 */ 1057 */
1058 sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot); 1058 sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot, kern);
1059 if (!sk) 1059 if (!sk)
1060 return -ENOMEM; 1060 return -ENOMEM;
1061 1061
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 32d710eaf1fc..166d436196c1 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -89,6 +89,8 @@ struct timer_list can_stattimer; /* timer for statistics update */
89struct s_stats can_stats; /* packet statistics */ 89struct s_stats can_stats; /* packet statistics */
90struct s_pstats can_pstats; /* receive list statistics */ 90struct s_pstats can_pstats; /* receive list statistics */
91 91
92static atomic_t skbcounter = ATOMIC_INIT(0);
93
92/* 94/*
93 * af_can socket functions 95 * af_can socket functions
94 */ 96 */
@@ -179,7 +181,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
179 181
180 sock->ops = cp->ops; 182 sock->ops = cp->ops;
181 183
182 sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot); 184 sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot, kern);
183 if (!sk) { 185 if (!sk) {
184 err = -ENOMEM; 186 err = -ENOMEM;
185 goto errout; 187 goto errout;
@@ -679,6 +681,10 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
679 can_stats.rx_frames++; 681 can_stats.rx_frames++;
680 can_stats.rx_frames_delta++; 682 can_stats.rx_frames_delta++;
681 683
684 /* create non-zero unique skb identifier together with *skb */
685 while (!(can_skb_prv(skb)->skbcnt))
686 can_skb_prv(skb)->skbcnt = atomic_inc_return(&skbcounter);
687
682 rcu_read_lock(); 688 rcu_read_lock();
683 689
684 /* deliver the packet to sockets listening on all devices */ 690 /* deliver the packet to sockets listening on all devices */
diff --git a/net/can/bcm.c b/net/can/bcm.c
index b523453585be..a1ba6875c2a2 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -261,6 +261,7 @@ static void bcm_can_tx(struct bcm_op *op)
261 261
262 can_skb_reserve(skb); 262 can_skb_reserve(skb);
263 can_skb_prv(skb)->ifindex = dev->ifindex; 263 can_skb_prv(skb)->ifindex = dev->ifindex;
264 can_skb_prv(skb)->skbcnt = 0;
264 265
265 memcpy(skb_put(skb, CFSIZ), cf, CFSIZ); 266 memcpy(skb_put(skb, CFSIZ), cf, CFSIZ);
266 267
@@ -1217,6 +1218,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
1217 } 1218 }
1218 1219
1219 can_skb_prv(skb)->ifindex = dev->ifindex; 1220 can_skb_prv(skb)->ifindex = dev->ifindex;
1221 can_skb_prv(skb)->skbcnt = 0;
1220 skb->dev = dev; 1222 skb->dev = dev;
1221 can_skb_set_owner(skb, sk); 1223 can_skb_set_owner(skb, sk);
1222 err = can_send(skb, 1); /* send with loopback */ 1224 err = can_send(skb, 1); /* send with loopback */
diff --git a/net/can/gw.c b/net/can/gw.c
index a6f448e18ea8..455168718c2e 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -110,6 +110,7 @@ struct cf_mod {
110 void (*xor)(struct can_frame *cf, struct cgw_csum_xor *xor); 110 void (*xor)(struct can_frame *cf, struct cgw_csum_xor *xor);
111 void (*crc8)(struct can_frame *cf, struct cgw_csum_crc8 *crc8); 111 void (*crc8)(struct can_frame *cf, struct cgw_csum_crc8 *crc8);
112 } csumfunc; 112 } csumfunc;
113 u32 uid;
113}; 114};
114 115
115 116
@@ -548,6 +549,11 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
548 goto cancel; 549 goto cancel;
549 } 550 }
550 551
552 if (gwj->mod.uid) {
553 if (nla_put_u32(skb, CGW_MOD_UID, gwj->mod.uid) < 0)
554 goto cancel;
555 }
556
551 if (gwj->mod.csumfunc.crc8) { 557 if (gwj->mod.csumfunc.crc8) {
552 if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN, 558 if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN,
553 &gwj->mod.csum.crc8) < 0) 559 &gwj->mod.csum.crc8) < 0)
@@ -619,6 +625,7 @@ static const struct nla_policy cgw_policy[CGW_MAX+1] = {
619 [CGW_DST_IF] = { .type = NLA_U32 }, 625 [CGW_DST_IF] = { .type = NLA_U32 },
620 [CGW_FILTER] = { .len = sizeof(struct can_filter) }, 626 [CGW_FILTER] = { .len = sizeof(struct can_filter) },
621 [CGW_LIM_HOPS] = { .type = NLA_U8 }, 627 [CGW_LIM_HOPS] = { .type = NLA_U8 },
628 [CGW_MOD_UID] = { .type = NLA_U32 },
622}; 629};
623 630
624/* check for common and gwtype specific attributes */ 631/* check for common and gwtype specific attributes */
@@ -761,6 +768,10 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
761 else 768 else
762 mod->csumfunc.xor = cgw_csum_xor_neg; 769 mod->csumfunc.xor = cgw_csum_xor_neg;
763 } 770 }
771
772 if (tb[CGW_MOD_UID]) {
773 nla_memcpy(&mod->uid, tb[CGW_MOD_UID], sizeof(u32));
774 }
764 } 775 }
765 776
766 if (gwtype == CGW_TYPE_CAN_CAN) { 777 if (gwtype == CGW_TYPE_CAN_CAN) {
@@ -802,6 +813,8 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
802{ 813{
803 struct rtcanmsg *r; 814 struct rtcanmsg *r;
804 struct cgw_job *gwj; 815 struct cgw_job *gwj;
816 struct cf_mod mod;
817 struct can_can_gw ccgw;
805 u8 limhops = 0; 818 u8 limhops = 0;
806 int err = 0; 819 int err = 0;
807 820
@@ -819,6 +832,36 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
819 if (r->gwtype != CGW_TYPE_CAN_CAN) 832 if (r->gwtype != CGW_TYPE_CAN_CAN)
820 return -EINVAL; 833 return -EINVAL;
821 834
835 err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops);
836 if (err < 0)
837 return err;
838
839 if (mod.uid) {
840
841 ASSERT_RTNL();
842
843 /* check for updating an existing job with identical uid */
844 hlist_for_each_entry(gwj, &cgw_list, list) {
845
846 if (gwj->mod.uid != mod.uid)
847 continue;
848
849 /* interfaces & filters must be identical */
850 if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw)))
851 return -EINVAL;
852
853 /* update modifications with disabled softirq & quit */
854 local_bh_disable();
855 memcpy(&gwj->mod, &mod, sizeof(mod));
856 local_bh_enable();
857 return 0;
858 }
859 }
860
861 /* ifindex == 0 is not allowed for job creation */
862 if (!ccgw.src_idx || !ccgw.dst_idx)
863 return -ENODEV;
864
822 gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL); 865 gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL);
823 if (!gwj) 866 if (!gwj)
824 return -ENOMEM; 867 return -ENOMEM;
@@ -828,18 +871,14 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
828 gwj->deleted_frames = 0; 871 gwj->deleted_frames = 0;
829 gwj->flags = r->flags; 872 gwj->flags = r->flags;
830 gwj->gwtype = r->gwtype; 873 gwj->gwtype = r->gwtype;
874 gwj->limit_hops = limhops;
831 875
832 err = cgw_parse_attr(nlh, &gwj->mod, CGW_TYPE_CAN_CAN, &gwj->ccgw, 876 /* insert already parsed information */
833 &limhops); 877 memcpy(&gwj->mod, &mod, sizeof(mod));
834 if (err < 0) 878 memcpy(&gwj->ccgw, &ccgw, sizeof(ccgw));
835 goto out;
836 879
837 err = -ENODEV; 880 err = -ENODEV;
838 881
839 /* ifindex == 0 is not allowed for job creation */
840 if (!gwj->ccgw.src_idx || !gwj->ccgw.dst_idx)
841 goto out;
842
843 gwj->src.dev = __dev_get_by_index(&init_net, gwj->ccgw.src_idx); 882 gwj->src.dev = __dev_get_by_index(&init_net, gwj->ccgw.src_idx);
844 883
845 if (!gwj->src.dev) 884 if (!gwj->src.dev)
@@ -856,8 +895,6 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
856 if (gwj->dst.dev->type != ARPHRD_CAN) 895 if (gwj->dst.dev->type != ARPHRD_CAN)
857 goto out; 896 goto out;
858 897
859 gwj->limit_hops = limhops;
860
861 ASSERT_RTNL(); 898 ASSERT_RTNL();
862 899
863 err = cgw_register_filter(gwj); 900 err = cgw_register_filter(gwj);
@@ -931,8 +968,15 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
931 if (gwj->limit_hops != limhops) 968 if (gwj->limit_hops != limhops)
932 continue; 969 continue;
933 970
934 if (memcmp(&gwj->mod, &mod, sizeof(mod))) 971 /* we have a match when uid is enabled and identical */
935 continue; 972 if (gwj->mod.uid || mod.uid) {
973 if (gwj->mod.uid != mod.uid)
974 continue;
975 } else {
976 /* no uid => check for identical modifications */
977 if (memcmp(&gwj->mod, &mod, sizeof(mod)))
978 continue;
979 }
936 980
937 /* if (r->gwtype == CGW_TYPE_CAN_CAN) - is made sure here */ 981 /* if (r->gwtype == CGW_TYPE_CAN_CAN) - is made sure here */
938 if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) 982 if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw)))
diff --git a/net/can/raw.c b/net/can/raw.c
index 31b9748cbb4e..2e67b1423cd3 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -75,7 +75,7 @@ MODULE_ALIAS("can-proto-1");
75 */ 75 */
76 76
77struct uniqframe { 77struct uniqframe {
78 ktime_t tstamp; 78 int skbcnt;
79 const struct sk_buff *skb; 79 const struct sk_buff *skb;
80 unsigned int join_rx_count; 80 unsigned int join_rx_count;
81}; 81};
@@ -133,7 +133,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
133 133
134 /* eliminate multiple filter matches for the same skb */ 134 /* eliminate multiple filter matches for the same skb */
135 if (this_cpu_ptr(ro->uniq)->skb == oskb && 135 if (this_cpu_ptr(ro->uniq)->skb == oskb &&
136 ktime_equal(this_cpu_ptr(ro->uniq)->tstamp, oskb->tstamp)) { 136 this_cpu_ptr(ro->uniq)->skbcnt == can_skb_prv(oskb)->skbcnt) {
137 if (ro->join_filters) { 137 if (ro->join_filters) {
138 this_cpu_inc(ro->uniq->join_rx_count); 138 this_cpu_inc(ro->uniq->join_rx_count);
139 /* drop frame until all enabled filters matched */ 139 /* drop frame until all enabled filters matched */
@@ -144,7 +144,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
144 } 144 }
145 } else { 145 } else {
146 this_cpu_ptr(ro->uniq)->skb = oskb; 146 this_cpu_ptr(ro->uniq)->skb = oskb;
147 this_cpu_ptr(ro->uniq)->tstamp = oskb->tstamp; 147 this_cpu_ptr(ro->uniq)->skbcnt = can_skb_prv(oskb)->skbcnt;
148 this_cpu_ptr(ro->uniq)->join_rx_count = 1; 148 this_cpu_ptr(ro->uniq)->join_rx_count = 1;
149 /* drop first frame to check all enabled filters? */ 149 /* drop first frame to check all enabled filters? */
150 if (ro->join_filters && ro->count > 1) 150 if (ro->join_filters && ro->count > 1)
@@ -749,6 +749,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
749 749
750 can_skb_reserve(skb); 750 can_skb_reserve(skb);
751 can_skb_prv(skb)->ifindex = dev->ifindex; 751 can_skb_prv(skb)->ifindex = dev->ifindex;
752 can_skb_prv(skb)->skbcnt = 0;
752 753
753 err = memcpy_from_msg(skb_put(skb, size), msg, size); 754 err = memcpy_from_msg(skb_put(skb, size), msg, size);
754 if (err < 0) 755 if (err < 0)
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 79e8f71aef5b..f30329f72641 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -9,6 +9,7 @@
9#include <keys/ceph-type.h> 9#include <keys/ceph-type.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/mount.h> 11#include <linux/mount.h>
12#include <linux/nsproxy.h>
12#include <linux/parser.h> 13#include <linux/parser.h>
13#include <linux/sched.h> 14#include <linux/sched.h>
14#include <linux/seq_file.h> 15#include <linux/seq_file.h>
@@ -16,8 +17,6 @@
16#include <linux/statfs.h> 17#include <linux/statfs.h>
17#include <linux/string.h> 18#include <linux/string.h>
18#include <linux/vmalloc.h> 19#include <linux/vmalloc.h>
19#include <linux/nsproxy.h>
20#include <net/net_namespace.h>
21 20
22 21
23#include <linux/ceph/ceph_features.h> 22#include <linux/ceph/ceph_features.h>
@@ -131,6 +130,13 @@ int ceph_compare_options(struct ceph_options *new_opt,
131 int i; 130 int i;
132 int ret; 131 int ret;
133 132
133 /*
134 * Don't bother comparing options if network namespaces don't
135 * match.
136 */
137 if (!net_eq(current->nsproxy->net_ns, read_pnet(&client->msgr.net)))
138 return -1;
139
134 ret = memcmp(opt1, opt2, ofs); 140 ret = memcmp(opt1, opt2, ofs);
135 if (ret) 141 if (ret)
136 return ret; 142 return ret;
@@ -335,9 +341,6 @@ ceph_parse_options(char *options, const char *dev_name,
335 int err = -ENOMEM; 341 int err = -ENOMEM;
336 substring_t argstr[MAX_OPT_ARGS]; 342 substring_t argstr[MAX_OPT_ARGS];
337 343
338 if (current->nsproxy->net_ns != &init_net)
339 return ERR_PTR(-EINVAL);
340
341 opt = kzalloc(sizeof(*opt), GFP_KERNEL); 344 opt = kzalloc(sizeof(*opt), GFP_KERNEL);
342 if (!opt) 345 if (!opt)
343 return ERR_PTR(-ENOMEM); 346 return ERR_PTR(-ENOMEM);
@@ -352,8 +355,8 @@ ceph_parse_options(char *options, const char *dev_name,
352 /* start with defaults */ 355 /* start with defaults */
353 opt->flags = CEPH_OPT_DEFAULT; 356 opt->flags = CEPH_OPT_DEFAULT;
354 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; 357 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
355 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ 358 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
356 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ 359 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
357 360
358 /* get mon ip(s) */ 361 /* get mon ip(s) */
359 /* ip1[:port1][,ip2[:port2]...] */ 362 /* ip1[:port1][,ip2[:port2]...] */
@@ -439,13 +442,32 @@ ceph_parse_options(char *options, const char *dev_name,
439 pr_warn("ignoring deprecated osdtimeout option\n"); 442 pr_warn("ignoring deprecated osdtimeout option\n");
440 break; 443 break;
441 case Opt_osdkeepalivetimeout: 444 case Opt_osdkeepalivetimeout:
442 opt->osd_keepalive_timeout = intval; 445 /* 0 isn't well defined right now, reject it */
446 if (intval < 1 || intval > INT_MAX / 1000) {
447 pr_err("osdkeepalive out of range\n");
448 err = -EINVAL;
449 goto out;
450 }
451 opt->osd_keepalive_timeout =
452 msecs_to_jiffies(intval * 1000);
443 break; 453 break;
444 case Opt_osd_idle_ttl: 454 case Opt_osd_idle_ttl:
445 opt->osd_idle_ttl = intval; 455 /* 0 isn't well defined right now, reject it */
456 if (intval < 1 || intval > INT_MAX / 1000) {
457 pr_err("osd_idle_ttl out of range\n");
458 err = -EINVAL;
459 goto out;
460 }
461 opt->osd_idle_ttl = msecs_to_jiffies(intval * 1000);
446 break; 462 break;
447 case Opt_mount_timeout: 463 case Opt_mount_timeout:
448 opt->mount_timeout = intval; 464 /* 0 is "wait forever" (i.e. infinite timeout) */
465 if (intval < 0 || intval > INT_MAX / 1000) {
466 pr_err("mount_timeout out of range\n");
467 err = -EINVAL;
468 goto out;
469 }
470 opt->mount_timeout = msecs_to_jiffies(intval * 1000);
449 break; 471 break;
450 472
451 case Opt_share: 473 case Opt_share:
@@ -512,12 +534,14 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
512 seq_puts(m, "notcp_nodelay,"); 534 seq_puts(m, "notcp_nodelay,");
513 535
514 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) 536 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
515 seq_printf(m, "mount_timeout=%d,", opt->mount_timeout); 537 seq_printf(m, "mount_timeout=%d,",
538 jiffies_to_msecs(opt->mount_timeout) / 1000);
516 if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) 539 if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
517 seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl); 540 seq_printf(m, "osd_idle_ttl=%d,",
541 jiffies_to_msecs(opt->osd_idle_ttl) / 1000);
518 if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) 542 if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
519 seq_printf(m, "osdkeepalivetimeout=%d,", 543 seq_printf(m, "osdkeepalivetimeout=%d,",
520 opt->osd_keepalive_timeout); 544 jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000);
521 545
522 /* drop redundant comma */ 546 /* drop redundant comma */
523 if (m->count != pos) 547 if (m->count != pos)
@@ -587,6 +611,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
587fail_monc: 611fail_monc:
588 ceph_monc_stop(&client->monc); 612 ceph_monc_stop(&client->monc);
589fail: 613fail:
614 ceph_messenger_fini(&client->msgr);
590 kfree(client); 615 kfree(client);
591 return ERR_PTR(err); 616 return ERR_PTR(err);
592} 617}
@@ -600,8 +625,8 @@ void ceph_destroy_client(struct ceph_client *client)
600 625
601 /* unmount */ 626 /* unmount */
602 ceph_osdc_stop(&client->osdc); 627 ceph_osdc_stop(&client->osdc);
603
604 ceph_monc_stop(&client->monc); 628 ceph_monc_stop(&client->monc);
629 ceph_messenger_fini(&client->msgr);
605 630
606 ceph_debugfs_client_cleanup(client); 631 ceph_debugfs_client_cleanup(client);
607 632
@@ -626,8 +651,8 @@ static int have_mon_and_osd_map(struct ceph_client *client)
626 */ 651 */
627int __ceph_open_session(struct ceph_client *client, unsigned long started) 652int __ceph_open_session(struct ceph_client *client, unsigned long started)
628{ 653{
629 int err; 654 unsigned long timeout = client->options->mount_timeout;
630 unsigned long timeout = client->options->mount_timeout * HZ; 655 long err;
631 656
632 /* open session, and wait for mon and osd maps */ 657 /* open session, and wait for mon and osd maps */
633 err = ceph_monc_open_session(&client->monc); 658 err = ceph_monc_open_session(&client->monc);
@@ -635,16 +660,15 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
635 return err; 660 return err;
636 661
637 while (!have_mon_and_osd_map(client)) { 662 while (!have_mon_and_osd_map(client)) {
638 err = -EIO;
639 if (timeout && time_after_eq(jiffies, started + timeout)) 663 if (timeout && time_after_eq(jiffies, started + timeout))
640 return err; 664 return -ETIMEDOUT;
641 665
642 /* wait */ 666 /* wait */
643 dout("mount waiting for mon_map\n"); 667 dout("mount waiting for mon_map\n");
644 err = wait_event_interruptible_timeout(client->auth_wq, 668 err = wait_event_interruptible_timeout(client->auth_wq,
645 have_mon_and_osd_map(client) || (client->auth_err < 0), 669 have_mon_and_osd_map(client) || (client->auth_err < 0),
646 timeout); 670 ceph_timeout_jiffies(timeout));
647 if (err == -EINTR || err == -ERESTARTSYS) 671 if (err < 0)
648 return err; 672 return err;
649 if (client->auth_err < 0) 673 if (client->auth_err < 0)
650 return client->auth_err; 674 return client->auth_err;
@@ -721,5 +745,5 @@ module_exit(exit_ceph_lib);
721MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 745MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
722MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 746MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
723MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 747MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
724MODULE_DESCRIPTION("Ceph filesystem for Linux"); 748MODULE_DESCRIPTION("Ceph core library");
725MODULE_LICENSE("GPL"); 749MODULE_LICENSE("GPL");
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 9d84ce4ea0df..80d7c3a97cb8 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -1,15 +1,11 @@
1
2#ifdef __KERNEL__ 1#ifdef __KERNEL__
3# include <linux/slab.h> 2# include <linux/slab.h>
3# include <linux/crush/crush.h>
4#else 4#else
5# include <stdlib.h> 5# include "crush_compat.h"
6# include <assert.h> 6# include "crush.h"
7# define kfree(x) do { if (x) free(x); } while (0)
8# define BUG_ON(x) assert(!(x))
9#endif 7#endif
10 8
11#include <linux/crush/crush.h>
12
13const char *crush_bucket_alg_name(int alg) 9const char *crush_bucket_alg_name(int alg)
14{ 10{
15 switch (alg) { 11 switch (alg) {
@@ -134,6 +130,9 @@ void crush_destroy(struct crush_map *map)
134 kfree(map->rules); 130 kfree(map->rules);
135 } 131 }
136 132
133#ifndef __KERNEL__
134 kfree(map->choose_tries);
135#endif
137 kfree(map); 136 kfree(map);
138} 137}
139 138
diff --git a/net/ceph/crush/crush_ln_table.h b/net/ceph/crush/crush_ln_table.h
index 6192c7fc958c..aae534c901a4 100644
--- a/net/ceph/crush/crush_ln_table.h
+++ b/net/ceph/crush/crush_ln_table.h
@@ -10,20 +10,20 @@
10 * 10 *
11 */ 11 */
12 12
13#if defined(__linux__)
14#include <linux/types.h>
15#elif defined(__FreeBSD__)
16#include <sys/types.h>
17#endif
18
19#ifndef CEPH_CRUSH_LN_H 13#ifndef CEPH_CRUSH_LN_H
20#define CEPH_CRUSH_LN_H 14#define CEPH_CRUSH_LN_H
21 15
16#ifdef __KERNEL__
17# include <linux/types.h>
18#else
19# include "crush_compat.h"
20#endif
22 21
23// RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0) 22/*
24// RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0) 23 * RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0)
25 24 * RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0)
26static int64_t __RH_LH_tbl[128*2+2] = { 25 */
26static __s64 __RH_LH_tbl[128*2+2] = {
27 0x0001000000000000ll, 0x0000000000000000ll, 0x0000fe03f80fe040ll, 0x000002dfca16dde1ll, 27 0x0001000000000000ll, 0x0000000000000000ll, 0x0000fe03f80fe040ll, 0x000002dfca16dde1ll,
28 0x0000fc0fc0fc0fc1ll, 0x000005b9e5a170b4ll, 0x0000fa232cf25214ll, 0x0000088e68ea899all, 28 0x0000fc0fc0fc0fc1ll, 0x000005b9e5a170b4ll, 0x0000fa232cf25214ll, 0x0000088e68ea899all,
29 0x0000f83e0f83e0f9ll, 0x00000b5d69bac77ell, 0x0000f6603d980f67ll, 0x00000e26fd5c8555ll, 29 0x0000f83e0f83e0f9ll, 0x00000b5d69bac77ell, 0x0000f6603d980f67ll, 0x00000e26fd5c8555ll,
@@ -89,11 +89,12 @@ static int64_t __RH_LH_tbl[128*2+2] = {
89 0x0000820820820821ll, 0x0000fa2f045e7832ll, 0x000081848da8faf1ll, 0x0000fba577877d7dll, 89 0x0000820820820821ll, 0x0000fa2f045e7832ll, 0x000081848da8faf1ll, 0x0000fba577877d7dll,
90 0x0000810204081021ll, 0x0000fd1a708bbe11ll, 0x0000808080808081ll, 0x0000fe8df263f957ll, 90 0x0000810204081021ll, 0x0000fd1a708bbe11ll, 0x0000808080808081ll, 0x0000fe8df263f957ll,
91 0x0000800000000000ll, 0x0000ffff00000000ll, 91 0x0000800000000000ll, 0x0000ffff00000000ll,
92 }; 92};
93
94 93
95 // LL_tbl[k] = 2^48*log2(1.0+k/2^15); 94/*
96static int64_t __LL_tbl[256] = { 95 * LL_tbl[k] = 2^48*log2(1.0+k/2^15)
96 */
97static __s64 __LL_tbl[256] = {
97 0x0000000000000000ull, 0x00000002e2a60a00ull, 0x000000070cb64ec5ull, 0x00000009ef50ce67ull, 98 0x0000000000000000ull, 0x00000002e2a60a00ull, 0x000000070cb64ec5ull, 0x00000009ef50ce67ull,
98 0x0000000cd1e588fdull, 0x0000000fb4747e9cull, 0x0000001296fdaf5eull, 0x0000001579811b58ull, 99 0x0000000cd1e588fdull, 0x0000000fb4747e9cull, 0x0000001296fdaf5eull, 0x0000001579811b58ull,
99 0x000000185bfec2a1ull, 0x0000001b3e76a552ull, 0x0000001e20e8c380ull, 0x0000002103551d43ull, 100 0x000000185bfec2a1ull, 0x0000001b3e76a552ull, 0x0000001e20e8c380ull, 0x0000002103551d43ull,
@@ -160,7 +161,4 @@ static int64_t __LL_tbl[256] = {
160 0x000002d4562d2ec6ull, 0x000002d73330209dull, 0x000002da102d63b0ull, 0x000002dced24f814ull, 161 0x000002d4562d2ec6ull, 0x000002d73330209dull, 0x000002da102d63b0ull, 0x000002dced24f814ull,
161}; 162};
162 163
163
164
165
166#endif 164#endif
diff --git a/net/ceph/crush/hash.c b/net/ceph/crush/hash.c
index 5bb63e37a8a1..ed123af49eba 100644
--- a/net/ceph/crush/hash.c
+++ b/net/ceph/crush/hash.c
@@ -1,6 +1,8 @@
1 1#ifdef __KERNEL__
2#include <linux/types.h> 2# include <linux/crush/hash.h>
3#include <linux/crush/hash.h> 3#else
4# include "hash.h"
5#endif
4 6
5/* 7/*
6 * Robert Jenkins' function for mixing 32-bit values 8 * Robert Jenkins' function for mixing 32-bit values
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 5b47736d27d9..393bfb22d5bb 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -1,27 +1,31 @@
1/*
2 * Ceph - scalable distributed file system
3 *
4 * Copyright (C) 2015 Intel Corporation All Rights Reserved
5 *
6 * This is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License version 2.1, as published by the Free Software
9 * Foundation. See file COPYING.
10 *
11 */
1 12
2#ifdef __KERNEL__ 13#ifdef __KERNEL__
3# include <linux/string.h> 14# include <linux/string.h>
4# include <linux/slab.h> 15# include <linux/slab.h>
5# include <linux/bug.h> 16# include <linux/bug.h>
6# include <linux/kernel.h> 17# include <linux/kernel.h>
7# ifndef dprintk 18# include <linux/crush/crush.h>
8# define dprintk(args...) 19# include <linux/crush/hash.h>
9# endif
10#else 20#else
11# include <string.h> 21# include "crush_compat.h"
12# include <stdio.h> 22# include "crush.h"
13# include <stdlib.h> 23# include "hash.h"
14# include <assert.h>
15# define BUG_ON(x) assert(!(x))
16# define dprintk(args...) /* printf(args) */
17# define kmalloc(x, f) malloc(x)
18# define kfree(x) free(x)
19#endif 24#endif
20
21#include <linux/crush/crush.h>
22#include <linux/crush/hash.h>
23#include "crush_ln_table.h" 25#include "crush_ln_table.h"
24 26
27#define dprintk(args...) /* printf(args) */
28
25/* 29/*
26 * Implement the core CRUSH mapping algorithm. 30 * Implement the core CRUSH mapping algorithm.
27 */ 31 */
@@ -139,7 +143,7 @@ static int bucket_list_choose(struct crush_bucket_list *bucket,
139 int i; 143 int i;
140 144
141 for (i = bucket->h.size-1; i >= 0; i--) { 145 for (i = bucket->h.size-1; i >= 0; i--) {
142 __u64 w = crush_hash32_4(bucket->h.hash,x, bucket->h.items[i], 146 __u64 w = crush_hash32_4(bucket->h.hash, x, bucket->h.items[i],
143 r, bucket->h.id); 147 r, bucket->h.id);
144 w &= 0xffff; 148 w &= 0xffff;
145 dprintk("list_choose i=%d x=%d r=%d item %d weight %x " 149 dprintk("list_choose i=%d x=%d r=%d item %d weight %x "
@@ -238,43 +242,46 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
238 return bucket->h.items[high]; 242 return bucket->h.items[high];
239} 243}
240 244
241// compute 2^44*log2(input+1) 245/* compute 2^44*log2(input+1) */
242uint64_t crush_ln(unsigned xin) 246static __u64 crush_ln(unsigned int xin)
243{ 247{
244 unsigned x=xin, x1; 248 unsigned int x = xin, x1;
245 int iexpon, index1, index2; 249 int iexpon, index1, index2;
246 uint64_t RH, LH, LL, xl64, result; 250 __u64 RH, LH, LL, xl64, result;
247 251
248 x++; 252 x++;
249 253
250 // normalize input 254 /* normalize input */
251 iexpon = 15; 255 iexpon = 15;
252 while(!(x&0x18000)) { x<<=1; iexpon--; } 256 while (!(x & 0x18000)) {
257 x <<= 1;
258 iexpon--;
259 }
253 260
254 index1 = (x>>8)<<1; 261 index1 = (x >> 8) << 1;
255 // RH ~ 2^56/index1 262 /* RH ~ 2^56/index1 */
256 RH = __RH_LH_tbl[index1 - 256]; 263 RH = __RH_LH_tbl[index1 - 256];
257 // LH ~ 2^48 * log2(index1/256) 264 /* LH ~ 2^48 * log2(index1/256) */
258 LH = __RH_LH_tbl[index1 + 1 - 256]; 265 LH = __RH_LH_tbl[index1 + 1 - 256];
259 266
260 // RH*x ~ 2^48 * (2^15 + xf), xf<2^8 267 /* RH*x ~ 2^48 * (2^15 + xf), xf<2^8 */
261 xl64 = (int64_t)x * RH; 268 xl64 = (__s64)x * RH;
262 xl64 >>= 48; 269 xl64 >>= 48;
263 x1 = xl64; 270 x1 = xl64;
264 271
265 result = iexpon; 272 result = iexpon;
266 result <<= (12 + 32); 273 result <<= (12 + 32);
267 274
268 index2 = x1 & 0xff; 275 index2 = x1 & 0xff;
269 // LL ~ 2^48*log2(1.0+index2/2^15) 276 /* LL ~ 2^48*log2(1.0+index2/2^15) */
270 LL = __LL_tbl[index2]; 277 LL = __LL_tbl[index2];
271 278
272 LH = LH + LL; 279 LH = LH + LL;
273 280
274 LH >>= (48-12 - 32); 281 LH >>= (48 - 12 - 32);
275 result += LH; 282 result += LH;
276 283
277 return result; 284 return result;
278} 285}
279 286
280 287
@@ -290,9 +297,9 @@ uint64_t crush_ln(unsigned xin)
290static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket, 297static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
291 int x, int r) 298 int x, int r)
292{ 299{
293 unsigned i, high = 0; 300 unsigned int i, high = 0;
294 unsigned u; 301 unsigned int u;
295 unsigned w; 302 unsigned int w;
296 __s64 ln, draw, high_draw = 0; 303 __s64 ln, draw, high_draw = 0;
297 304
298 for (i = 0; i < bucket->h.size; i++) { 305 for (i = 0; i < bucket->h.size; i++) {
@@ -567,6 +574,10 @@ reject:
567 out[outpos] = item; 574 out[outpos] = item;
568 outpos++; 575 outpos++;
569 count--; 576 count--;
577#ifndef __KERNEL__
578 if (map->choose_tries && ftotal <= map->choose_total_tries)
579 map->choose_tries[ftotal]++;
580#endif
570 } 581 }
571 582
572 dprintk("CHOOSE returns %d\n", outpos); 583 dprintk("CHOOSE returns %d\n", outpos);
@@ -610,6 +621,20 @@ static void crush_choose_indep(const struct crush_map *map,
610 } 621 }
611 622
612 for (ftotal = 0; left > 0 && ftotal < tries; ftotal++) { 623 for (ftotal = 0; left > 0 && ftotal < tries; ftotal++) {
624#ifdef DEBUG_INDEP
625 if (out2 && ftotal) {
626 dprintk("%u %d a: ", ftotal, left);
627 for (rep = outpos; rep < endpos; rep++) {
628 dprintk(" %d", out[rep]);
629 }
630 dprintk("\n");
631 dprintk("%u %d b: ", ftotal, left);
632 for (rep = outpos; rep < endpos; rep++) {
633 dprintk(" %d", out2[rep]);
634 }
635 dprintk("\n");
636 }
637#endif
613 for (rep = outpos; rep < endpos; rep++) { 638 for (rep = outpos; rep < endpos; rep++) {
614 if (out[rep] != CRUSH_ITEM_UNDEF) 639 if (out[rep] != CRUSH_ITEM_UNDEF)
615 continue; 640 continue;
@@ -726,6 +751,24 @@ static void crush_choose_indep(const struct crush_map *map,
726 out2[rep] = CRUSH_ITEM_NONE; 751 out2[rep] = CRUSH_ITEM_NONE;
727 } 752 }
728 } 753 }
754#ifndef __KERNEL__
755 if (map->choose_tries && ftotal <= map->choose_total_tries)
756 map->choose_tries[ftotal]++;
757#endif
758#ifdef DEBUG_INDEP
759 if (out2) {
760 dprintk("%u %d a: ", ftotal, left);
761 for (rep = outpos; rep < endpos; rep++) {
762 dprintk(" %d", out[rep]);
763 }
764 dprintk("\n");
765 dprintk("%u %d b: ", ftotal, left);
766 for (rep = outpos; rep < endpos; rep++) {
767 dprintk(" %d", out2[rep]);
768 }
769 dprintk("\n");
770 }
771#endif
729} 772}
730 773
731/** 774/**
@@ -790,8 +833,15 @@ int crush_do_rule(const struct crush_map *map,
790 833
791 switch (curstep->op) { 834 switch (curstep->op) {
792 case CRUSH_RULE_TAKE: 835 case CRUSH_RULE_TAKE:
793 w[0] = curstep->arg1; 836 if ((curstep->arg1 >= 0 &&
794 wsize = 1; 837 curstep->arg1 < map->max_devices) ||
838 (-1-curstep->arg1 < map->max_buckets &&
839 map->buckets[-1-curstep->arg1])) {
840 w[0] = curstep->arg1;
841 wsize = 1;
842 } else {
843 dprintk(" bad take value %d\n", curstep->arg1);
844 }
795 break; 845 break;
796 846
797 case CRUSH_RULE_SET_CHOOSE_TRIES: 847 case CRUSH_RULE_SET_CHOOSE_TRIES:
@@ -877,7 +927,7 @@ int crush_do_rule(const struct crush_map *map,
877 0); 927 0);
878 } else { 928 } else {
879 out_size = ((numrep < (result_max-osize)) ? 929 out_size = ((numrep < (result_max-osize)) ?
880 numrep : (result_max-osize)); 930 numrep : (result_max-osize));
881 crush_choose_indep( 931 crush_choose_indep(
882 map, 932 map,
883 map->buckets[-1-w[i]], 933 map->buckets[-1-w[i]],
@@ -923,5 +973,3 @@ int crush_do_rule(const struct crush_map *map,
923 } 973 }
924 return result_len; 974 return result_len;
925} 975}
926
927
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 967080a9f043..e3be1d22a247 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -6,6 +6,7 @@
6#include <linux/inet.h> 6#include <linux/inet.h>
7#include <linux/kthread.h> 7#include <linux/kthread.h>
8#include <linux/net.h> 8#include <linux/net.h>
9#include <linux/nsproxy.h>
9#include <linux/slab.h> 10#include <linux/slab.h>
10#include <linux/socket.h> 11#include <linux/socket.h>
11#include <linux/string.h> 12#include <linux/string.h>
@@ -278,7 +279,6 @@ static void _ceph_msgr_exit(void)
278 ceph_msgr_slab_exit(); 279 ceph_msgr_slab_exit();
279 280
280 BUG_ON(zero_page == NULL); 281 BUG_ON(zero_page == NULL);
281 kunmap(zero_page);
282 page_cache_release(zero_page); 282 page_cache_release(zero_page);
283 zero_page = NULL; 283 zero_page = NULL;
284} 284}
@@ -480,8 +480,8 @@ static int ceph_tcp_connect(struct ceph_connection *con)
480 int ret; 480 int ret;
481 481
482 BUG_ON(con->sock); 482 BUG_ON(con->sock);
483 ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM, 483 ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
484 IPPROTO_TCP, &sock); 484 SOCK_STREAM, IPPROTO_TCP, &sock);
485 if (ret) 485 if (ret)
486 return ret; 486 return ret;
487 sock->sk->sk_allocation = GFP_NOFS; 487 sock->sk->sk_allocation = GFP_NOFS;
@@ -1545,7 +1545,7 @@ static int write_partial_message_data(struct ceph_connection *con)
1545 page = ceph_msg_data_next(&msg->cursor, &page_offset, &length, 1545 page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
1546 &last_piece); 1546 &last_piece);
1547 ret = ceph_tcp_sendpage(con->sock, page, page_offset, 1547 ret = ceph_tcp_sendpage(con->sock, page, page_offset,
1548 length, last_piece); 1548 length, !last_piece);
1549 if (ret <= 0) { 1549 if (ret <= 0) {
1550 if (do_datacrc) 1550 if (do_datacrc)
1551 msg->footer.data_crc = cpu_to_le32(crc); 1551 msg->footer.data_crc = cpu_to_le32(crc);
@@ -1732,17 +1732,17 @@ static int verify_hello(struct ceph_connection *con)
1732 1732
1733static bool addr_is_blank(struct sockaddr_storage *ss) 1733static bool addr_is_blank(struct sockaddr_storage *ss)
1734{ 1734{
1735 struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr;
1736 struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr;
1737
1735 switch (ss->ss_family) { 1738 switch (ss->ss_family) {
1736 case AF_INET: 1739 case AF_INET:
1737 return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0; 1740 return addr->s_addr == htonl(INADDR_ANY);
1738 case AF_INET6: 1741 case AF_INET6:
1739 return 1742 return ipv6_addr_any(addr6);
1740 ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 && 1743 default:
1741 ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 && 1744 return true;
1742 ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 &&
1743 ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0;
1744 } 1745 }
1745 return false;
1746} 1746}
1747 1747
1748static int addr_port(struct sockaddr_storage *ss) 1748static int addr_port(struct sockaddr_storage *ss)
@@ -2945,11 +2945,18 @@ void ceph_messenger_init(struct ceph_messenger *msgr,
2945 msgr->tcp_nodelay = tcp_nodelay; 2945 msgr->tcp_nodelay = tcp_nodelay;
2946 2946
2947 atomic_set(&msgr->stopping, 0); 2947 atomic_set(&msgr->stopping, 0);
2948 write_pnet(&msgr->net, get_net(current->nsproxy->net_ns));
2948 2949
2949 dout("%s %p\n", __func__, msgr); 2950 dout("%s %p\n", __func__, msgr);
2950} 2951}
2951EXPORT_SYMBOL(ceph_messenger_init); 2952EXPORT_SYMBOL(ceph_messenger_init);
2952 2953
2954void ceph_messenger_fini(struct ceph_messenger *msgr)
2955{
2956 put_net(read_pnet(&msgr->net));
2957}
2958EXPORT_SYMBOL(ceph_messenger_fini);
2959
2953static void clear_standby(struct ceph_connection *con) 2960static void clear_standby(struct ceph_connection *con)
2954{ 2961{
2955 /* come back from STANDBY? */ 2962 /* come back from STANDBY? */
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 2b3cf05e87b0..9d6ff1215928 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -298,21 +298,28 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
298} 298}
299EXPORT_SYMBOL(ceph_monc_request_next_osdmap); 299EXPORT_SYMBOL(ceph_monc_request_next_osdmap);
300 300
301/*
302 * Wait for an osdmap with a given epoch.
303 *
304 * @epoch: epoch to wait for
305 * @timeout: in jiffies, 0 means "wait forever"
306 */
301int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, 307int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
302 unsigned long timeout) 308 unsigned long timeout)
303{ 309{
304 unsigned long started = jiffies; 310 unsigned long started = jiffies;
305 int ret; 311 long ret;
306 312
307 mutex_lock(&monc->mutex); 313 mutex_lock(&monc->mutex);
308 while (monc->have_osdmap < epoch) { 314 while (monc->have_osdmap < epoch) {
309 mutex_unlock(&monc->mutex); 315 mutex_unlock(&monc->mutex);
310 316
311 if (timeout != 0 && time_after_eq(jiffies, started + timeout)) 317 if (timeout && time_after_eq(jiffies, started + timeout))
312 return -ETIMEDOUT; 318 return -ETIMEDOUT;
313 319
314 ret = wait_event_interruptible_timeout(monc->client->auth_wq, 320 ret = wait_event_interruptible_timeout(monc->client->auth_wq,
315 monc->have_osdmap >= epoch, timeout); 321 monc->have_osdmap >= epoch,
322 ceph_timeout_jiffies(timeout));
316 if (ret < 0) 323 if (ret < 0)
317 return ret; 324 return ret;
318 325
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index c4ec9239249a..50033677c0fa 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -296,6 +296,9 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
296 case CEPH_OSD_OP_CMPXATTR: 296 case CEPH_OSD_OP_CMPXATTR:
297 ceph_osd_data_release(&op->xattr.osd_data); 297 ceph_osd_data_release(&op->xattr.osd_data);
298 break; 298 break;
299 case CEPH_OSD_OP_STAT:
300 ceph_osd_data_release(&op->raw_data_in);
301 break;
299 default: 302 default:
300 break; 303 break;
301 } 304 }
@@ -450,7 +453,7 @@ __CEPH_FORALL_OSD_OPS(GENERATE_CASE)
450 */ 453 */
451static struct ceph_osd_req_op * 454static struct ceph_osd_req_op *
452_osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, 455_osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
453 u16 opcode) 456 u16 opcode, u32 flags)
454{ 457{
455 struct ceph_osd_req_op *op; 458 struct ceph_osd_req_op *op;
456 459
@@ -460,14 +463,15 @@ _osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
460 op = &osd_req->r_ops[which]; 463 op = &osd_req->r_ops[which];
461 memset(op, 0, sizeof (*op)); 464 memset(op, 0, sizeof (*op));
462 op->op = opcode; 465 op->op = opcode;
466 op->flags = flags;
463 467
464 return op; 468 return op;
465} 469}
466 470
467void osd_req_op_init(struct ceph_osd_request *osd_req, 471void osd_req_op_init(struct ceph_osd_request *osd_req,
468 unsigned int which, u16 opcode) 472 unsigned int which, u16 opcode, u32 flags)
469{ 473{
470 (void)_osd_req_op_init(osd_req, which, opcode); 474 (void)_osd_req_op_init(osd_req, which, opcode, flags);
471} 475}
472EXPORT_SYMBOL(osd_req_op_init); 476EXPORT_SYMBOL(osd_req_op_init);
473 477
@@ -476,7 +480,8 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
476 u64 offset, u64 length, 480 u64 offset, u64 length,
477 u64 truncate_size, u32 truncate_seq) 481 u64 truncate_size, u32 truncate_seq)
478{ 482{
479 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); 483 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
484 opcode, 0);
480 size_t payload_len = 0; 485 size_t payload_len = 0;
481 486
482 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && 487 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
@@ -515,7 +520,8 @@ EXPORT_SYMBOL(osd_req_op_extent_update);
515void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, 520void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
516 u16 opcode, const char *class, const char *method) 521 u16 opcode, const char *class, const char *method)
517{ 522{
518 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); 523 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
524 opcode, 0);
519 struct ceph_pagelist *pagelist; 525 struct ceph_pagelist *pagelist;
520 size_t payload_len = 0; 526 size_t payload_len = 0;
521 size_t size; 527 size_t size;
@@ -552,7 +558,8 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
552 u16 opcode, const char *name, const void *value, 558 u16 opcode, const char *name, const void *value,
553 size_t size, u8 cmp_op, u8 cmp_mode) 559 size_t size, u8 cmp_op, u8 cmp_mode)
554{ 560{
555 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); 561 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
562 opcode, 0);
556 struct ceph_pagelist *pagelist; 563 struct ceph_pagelist *pagelist;
557 size_t payload_len; 564 size_t payload_len;
558 565
@@ -585,7 +592,8 @@ void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
585 unsigned int which, u16 opcode, 592 unsigned int which, u16 opcode,
586 u64 cookie, u64 version, int flag) 593 u64 cookie, u64 version, int flag)
587{ 594{
588 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); 595 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
596 opcode, 0);
589 597
590 BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH); 598 BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH);
591 599
@@ -602,7 +610,8 @@ void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
602 u64 expected_write_size) 610 u64 expected_write_size)
603{ 611{
604 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, 612 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
605 CEPH_OSD_OP_SETALLOCHINT); 613 CEPH_OSD_OP_SETALLOCHINT,
614 0);
606 615
607 op->alloc_hint.expected_object_size = expected_object_size; 616 op->alloc_hint.expected_object_size = expected_object_size;
608 op->alloc_hint.expected_write_size = expected_write_size; 617 op->alloc_hint.expected_write_size = expected_write_size;
@@ -786,7 +795,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
786 } 795 }
787 796
788 if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) { 797 if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) {
789 osd_req_op_init(req, which, opcode); 798 osd_req_op_init(req, which, opcode, 0);
790 } else { 799 } else {
791 u32 object_size = le32_to_cpu(layout->fl_object_size); 800 u32 object_size = le32_to_cpu(layout->fl_object_size);
792 u32 object_base = off - objoff; 801 u32 object_base = off - objoff;
@@ -1088,7 +1097,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc,
1088 BUG_ON(!list_empty(&osd->o_osd_lru)); 1097 BUG_ON(!list_empty(&osd->o_osd_lru));
1089 1098
1090 list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); 1099 list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
1091 osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ; 1100 osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl;
1092} 1101}
1093 1102
1094static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc, 1103static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc,
@@ -1199,7 +1208,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o)
1199static void __schedule_osd_timeout(struct ceph_osd_client *osdc) 1208static void __schedule_osd_timeout(struct ceph_osd_client *osdc)
1200{ 1209{
1201 schedule_delayed_work(&osdc->timeout_work, 1210 schedule_delayed_work(&osdc->timeout_work,
1202 osdc->client->options->osd_keepalive_timeout * HZ); 1211 osdc->client->options->osd_keepalive_timeout);
1203} 1212}
1204 1213
1205static void __cancel_osd_timeout(struct ceph_osd_client *osdc) 1214static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
@@ -1567,10 +1576,9 @@ static void handle_timeout(struct work_struct *work)
1567{ 1576{
1568 struct ceph_osd_client *osdc = 1577 struct ceph_osd_client *osdc =
1569 container_of(work, struct ceph_osd_client, timeout_work.work); 1578 container_of(work, struct ceph_osd_client, timeout_work.work);
1579 struct ceph_options *opts = osdc->client->options;
1570 struct ceph_osd_request *req; 1580 struct ceph_osd_request *req;
1571 struct ceph_osd *osd; 1581 struct ceph_osd *osd;
1572 unsigned long keepalive =
1573 osdc->client->options->osd_keepalive_timeout * HZ;
1574 struct list_head slow_osds; 1582 struct list_head slow_osds;
1575 dout("timeout\n"); 1583 dout("timeout\n");
1576 down_read(&osdc->map_sem); 1584 down_read(&osdc->map_sem);
@@ -1586,7 +1594,8 @@ static void handle_timeout(struct work_struct *work)
1586 */ 1594 */
1587 INIT_LIST_HEAD(&slow_osds); 1595 INIT_LIST_HEAD(&slow_osds);
1588 list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { 1596 list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) {
1589 if (time_before(jiffies, req->r_stamp + keepalive)) 1597 if (time_before(jiffies,
1598 req->r_stamp + opts->osd_keepalive_timeout))
1590 break; 1599 break;
1591 1600
1592 osd = req->r_osd; 1601 osd = req->r_osd;
@@ -1613,8 +1622,7 @@ static void handle_osds_timeout(struct work_struct *work)
1613 struct ceph_osd_client *osdc = 1622 struct ceph_osd_client *osdc =
1614 container_of(work, struct ceph_osd_client, 1623 container_of(work, struct ceph_osd_client,
1615 osds_timeout_work.work); 1624 osds_timeout_work.work);
1616 unsigned long delay = 1625 unsigned long delay = osdc->client->options->osd_idle_ttl / 4;
1617 osdc->client->options->osd_idle_ttl * HZ >> 2;
1618 1626
1619 dout("osds timeout\n"); 1627 dout("osds timeout\n");
1620 down_read(&osdc->map_sem); 1628 down_read(&osdc->map_sem);
@@ -2619,7 +2627,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
2619 osdc->event_count = 0; 2627 osdc->event_count = 0;
2620 2628
2621 schedule_delayed_work(&osdc->osds_timeout_work, 2629 schedule_delayed_work(&osdc->osds_timeout_work,
2622 round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ)); 2630 round_jiffies_relative(osdc->client->options->osd_idle_ttl));
2623 2631
2624 err = -ENOMEM; 2632 err = -ENOMEM;
2625 osdc->req_mempool = mempool_create_kmalloc_pool(10, 2633 osdc->req_mempool = mempool_create_kmalloc_pool(10,
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 15796696d64e..4a3125836b64 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -89,7 +89,7 @@ static int crush_decode_tree_bucket(void **p, void *end,
89{ 89{
90 int j; 90 int j;
91 dout("crush_decode_tree_bucket %p to %p\n", *p, end); 91 dout("crush_decode_tree_bucket %p to %p\n", *p, end);
92 ceph_decode_32_safe(p, end, b->num_nodes, bad); 92 ceph_decode_8_safe(p, end, b->num_nodes, bad);
93 b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS); 93 b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS);
94 if (b->node_weights == NULL) 94 if (b->node_weights == NULL)
95 return -ENOMEM; 95 return -ENOMEM;
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 096d91447e06..d4f5f220a8e5 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -51,10 +51,7 @@ void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
51 set_page_dirty_lock(pages[i]); 51 set_page_dirty_lock(pages[i]);
52 put_page(pages[i]); 52 put_page(pages[i]);
53 } 53 }
54 if (is_vmalloc_addr(pages)) 54 kvfree(pages);
55 vfree(pages);
56 else
57 kfree(pages);
58} 55}
59EXPORT_SYMBOL(ceph_put_page_vector); 56EXPORT_SYMBOL(ceph_put_page_vector);
60 57
diff --git a/net/core/dev.c b/net/core/dev.c
index aa82f9ab6a36..a8e4dd430285 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -135,6 +135,7 @@
135#include <linux/if_macvlan.h> 135#include <linux/if_macvlan.h>
136#include <linux/errqueue.h> 136#include <linux/errqueue.h>
137#include <linux/hrtimer.h> 137#include <linux/hrtimer.h>
138#include <linux/netfilter_ingress.h>
138 139
139#include "net-sysfs.h" 140#include "net-sysfs.h"
140 141
@@ -468,10 +469,14 @@ EXPORT_SYMBOL(dev_remove_pack);
468 */ 469 */
469void dev_add_offload(struct packet_offload *po) 470void dev_add_offload(struct packet_offload *po)
470{ 471{
471 struct list_head *head = &offload_base; 472 struct packet_offload *elem;
472 473
473 spin_lock(&offload_lock); 474 spin_lock(&offload_lock);
474 list_add_rcu(&po->list, head); 475 list_for_each_entry(elem, &offload_base, list) {
476 if (po->priority < elem->priority)
477 break;
478 }
479 list_add_rcu(&po->list, elem->list.prev);
475 spin_unlock(&offload_lock); 480 spin_unlock(&offload_lock);
476} 481}
477EXPORT_SYMBOL(dev_add_offload); 482EXPORT_SYMBOL(dev_add_offload);
@@ -672,10 +677,6 @@ int dev_get_iflink(const struct net_device *dev)
672 if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink) 677 if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
673 return dev->netdev_ops->ndo_get_iflink(dev); 678 return dev->netdev_ops->ndo_get_iflink(dev);
674 679
675 /* If dev->rtnl_link_ops is set, it's a virtual interface. */
676 if (dev->rtnl_link_ops)
677 return 0;
678
679 return dev->ifindex; 680 return dev->ifindex;
680} 681}
681EXPORT_SYMBOL(dev_get_iflink); 682EXPORT_SYMBOL(dev_get_iflink);
@@ -1630,7 +1631,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1630} 1631}
1631EXPORT_SYMBOL(call_netdevice_notifiers); 1632EXPORT_SYMBOL(call_netdevice_notifiers);
1632 1633
1633#ifdef CONFIG_NET_CLS_ACT 1634#ifdef CONFIG_NET_INGRESS
1634static struct static_key ingress_needed __read_mostly; 1635static struct static_key ingress_needed __read_mostly;
1635 1636
1636void net_inc_ingress_queue(void) 1637void net_inc_ingress_queue(void)
@@ -2343,6 +2344,34 @@ void netif_device_attach(struct net_device *dev)
2343} 2344}
2344EXPORT_SYMBOL(netif_device_attach); 2345EXPORT_SYMBOL(netif_device_attach);
2345 2346
2347/*
2348 * Returns a Tx hash based on the given packet descriptor a Tx queues' number
2349 * to be used as a distribution range.
2350 */
2351u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
2352 unsigned int num_tx_queues)
2353{
2354 u32 hash;
2355 u16 qoffset = 0;
2356 u16 qcount = num_tx_queues;
2357
2358 if (skb_rx_queue_recorded(skb)) {
2359 hash = skb_get_rx_queue(skb);
2360 while (unlikely(hash >= num_tx_queues))
2361 hash -= num_tx_queues;
2362 return hash;
2363 }
2364
2365 if (dev->num_tc) {
2366 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2367 qoffset = dev->tc_to_txq[tc].offset;
2368 qcount = dev->tc_to_txq[tc].count;
2369 }
2370
2371 return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
2372}
2373EXPORT_SYMBOL(__skb_tx_hash);
2374
2346static void skb_warn_bad_offload(const struct sk_buff *skb) 2375static void skb_warn_bad_offload(const struct sk_buff *skb)
2347{ 2376{
2348 static const netdev_features_t null_features = 0; 2377 static const netdev_features_t null_features = 0;
@@ -2901,6 +2930,84 @@ int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
2901} 2930}
2902EXPORT_SYMBOL(dev_loopback_xmit); 2931EXPORT_SYMBOL(dev_loopback_xmit);
2903 2932
2933static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
2934{
2935#ifdef CONFIG_XPS
2936 struct xps_dev_maps *dev_maps;
2937 struct xps_map *map;
2938 int queue_index = -1;
2939
2940 rcu_read_lock();
2941 dev_maps = rcu_dereference(dev->xps_maps);
2942 if (dev_maps) {
2943 map = rcu_dereference(
2944 dev_maps->cpu_map[skb->sender_cpu - 1]);
2945 if (map) {
2946 if (map->len == 1)
2947 queue_index = map->queues[0];
2948 else
2949 queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
2950 map->len)];
2951 if (unlikely(queue_index >= dev->real_num_tx_queues))
2952 queue_index = -1;
2953 }
2954 }
2955 rcu_read_unlock();
2956
2957 return queue_index;
2958#else
2959 return -1;
2960#endif
2961}
2962
2963static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
2964{
2965 struct sock *sk = skb->sk;
2966 int queue_index = sk_tx_queue_get(sk);
2967
2968 if (queue_index < 0 || skb->ooo_okay ||
2969 queue_index >= dev->real_num_tx_queues) {
2970 int new_index = get_xps_queue(dev, skb);
2971 if (new_index < 0)
2972 new_index = skb_tx_hash(dev, skb);
2973
2974 if (queue_index != new_index && sk &&
2975 rcu_access_pointer(sk->sk_dst_cache))
2976 sk_tx_queue_set(sk, new_index);
2977
2978 queue_index = new_index;
2979 }
2980
2981 return queue_index;
2982}
2983
2984struct netdev_queue *netdev_pick_tx(struct net_device *dev,
2985 struct sk_buff *skb,
2986 void *accel_priv)
2987{
2988 int queue_index = 0;
2989
2990#ifdef CONFIG_XPS
2991 if (skb->sender_cpu == 0)
2992 skb->sender_cpu = raw_smp_processor_id() + 1;
2993#endif
2994
2995 if (dev->real_num_tx_queues != 1) {
2996 const struct net_device_ops *ops = dev->netdev_ops;
2997 if (ops->ndo_select_queue)
2998 queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
2999 __netdev_pick_tx);
3000 else
3001 queue_index = __netdev_pick_tx(dev, skb);
3002
3003 if (!accel_priv)
3004 queue_index = netdev_cap_txqueue(dev, queue_index);
3005 }
3006
3007 skb_set_queue_mapping(skb, queue_index);
3008 return netdev_get_tx_queue(dev, queue_index);
3009}
3010
2904/** 3011/**
2905 * __dev_queue_xmit - transmit a buffer 3012 * __dev_queue_xmit - transmit a buffer
2906 * @skb: buffer to transmit 3013 * @skb: buffer to transmit
@@ -3341,6 +3448,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
3341 local_irq_save(flags); 3448 local_irq_save(flags);
3342 3449
3343 rps_lock(sd); 3450 rps_lock(sd);
3451 if (!netif_running(skb->dev))
3452 goto drop;
3344 qlen = skb_queue_len(&sd->input_pkt_queue); 3453 qlen = skb_queue_len(&sd->input_pkt_queue);
3345 if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { 3454 if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
3346 if (qlen) { 3455 if (qlen) {
@@ -3362,6 +3471,7 @@ enqueue:
3362 goto enqueue; 3471 goto enqueue;
3363 } 3472 }
3364 3473
3474drop:
3365 sd->dropped++; 3475 sd->dropped++;
3366 rps_unlock(sd); 3476 rps_unlock(sd);
3367 3477
@@ -3513,66 +3623,47 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev,
3513EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); 3623EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
3514#endif 3624#endif
3515 3625
3516#ifdef CONFIG_NET_CLS_ACT
3517/* TODO: Maybe we should just force sch_ingress to be compiled in
3518 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
3519 * a compare and 2 stores extra right now if we dont have it on
3520 * but have CONFIG_NET_CLS_ACT
3521 * NOTE: This doesn't stop any functionality; if you dont have
3522 * the ingress scheduler, you just can't add policies on ingress.
3523 *
3524 */
3525static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
3526{
3527 struct net_device *dev = skb->dev;
3528 u32 ttl = G_TC_RTTL(skb->tc_verd);
3529 int result = TC_ACT_OK;
3530 struct Qdisc *q;
3531
3532 if (unlikely(MAX_RED_LOOP < ttl++)) {
3533 net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
3534 skb->skb_iif, dev->ifindex);
3535 return TC_ACT_SHOT;
3536 }
3537
3538 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
3539 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3540
3541 q = rcu_dereference(rxq->qdisc);
3542 if (q != &noop_qdisc) {
3543 spin_lock(qdisc_lock(q));
3544 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
3545 result = qdisc_enqueue_root(skb, q);
3546 spin_unlock(qdisc_lock(q));
3547 }
3548
3549 return result;
3550}
3551
3552static inline struct sk_buff *handle_ing(struct sk_buff *skb, 3626static inline struct sk_buff *handle_ing(struct sk_buff *skb,
3553 struct packet_type **pt_prev, 3627 struct packet_type **pt_prev,
3554 int *ret, struct net_device *orig_dev) 3628 int *ret, struct net_device *orig_dev)
3555{ 3629{
3556 struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); 3630#ifdef CONFIG_NET_CLS_ACT
3631 struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
3632 struct tcf_result cl_res;
3557 3633
3558 if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc) 3634 /* If there's at least one ingress present somewhere (so
3635 * we get here via enabled static key), remaining devices
3636 * that are not configured with an ingress qdisc will bail
3637 * out here.
3638 */
3639 if (!cl)
3559 return skb; 3640 return skb;
3560
3561 if (*pt_prev) { 3641 if (*pt_prev) {
3562 *ret = deliver_skb(skb, *pt_prev, orig_dev); 3642 *ret = deliver_skb(skb, *pt_prev, orig_dev);
3563 *pt_prev = NULL; 3643 *pt_prev = NULL;
3564 } 3644 }
3565 3645
3566 switch (ing_filter(skb, rxq)) { 3646 qdisc_skb_cb(skb)->pkt_len = skb->len;
3647 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3648 qdisc_bstats_update_cpu(cl->q, skb);
3649
3650 switch (tc_classify(skb, cl, &cl_res)) {
3651 case TC_ACT_OK:
3652 case TC_ACT_RECLASSIFY:
3653 skb->tc_index = TC_H_MIN(cl_res.classid);
3654 break;
3567 case TC_ACT_SHOT: 3655 case TC_ACT_SHOT:
3656 qdisc_qstats_drop_cpu(cl->q);
3568 case TC_ACT_STOLEN: 3657 case TC_ACT_STOLEN:
3658 case TC_ACT_QUEUED:
3569 kfree_skb(skb); 3659 kfree_skb(skb);
3570 return NULL; 3660 return NULL;
3661 default:
3662 break;
3571 } 3663 }
3572 3664#endif /* CONFIG_NET_CLS_ACT */
3573 return skb; 3665 return skb;
3574} 3666}
3575#endif
3576 3667
3577/** 3668/**
3578 * netdev_rx_handler_register - register receive handler 3669 * netdev_rx_handler_register - register receive handler
@@ -3645,6 +3736,22 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3645 } 3736 }
3646} 3737}
3647 3738
3739static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
3740 int *ret, struct net_device *orig_dev)
3741{
3742#ifdef CONFIG_NETFILTER_INGRESS
3743 if (nf_hook_ingress_active(skb)) {
3744 if (*pt_prev) {
3745 *ret = deliver_skb(skb, *pt_prev, orig_dev);
3746 *pt_prev = NULL;
3747 }
3748
3749 return nf_hook_ingress(skb);
3750 }
3751#endif /* CONFIG_NETFILTER_INGRESS */
3752 return 0;
3753}
3754
3648static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) 3755static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
3649{ 3756{
3650 struct packet_type *ptype, *pt_prev; 3757 struct packet_type *ptype, *pt_prev;
@@ -3667,8 +3774,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
3667 3774
3668 pt_prev = NULL; 3775 pt_prev = NULL;
3669 3776
3670 rcu_read_lock();
3671
3672another_round: 3777another_round:
3673 skb->skb_iif = skb->dev->ifindex; 3778 skb->skb_iif = skb->dev->ifindex;
3674 3779
@@ -3678,7 +3783,7 @@ another_round:
3678 skb->protocol == cpu_to_be16(ETH_P_8021AD)) { 3783 skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
3679 skb = skb_vlan_untag(skb); 3784 skb = skb_vlan_untag(skb);
3680 if (unlikely(!skb)) 3785 if (unlikely(!skb))
3681 goto unlock; 3786 goto out;
3682 } 3787 }
3683 3788
3684#ifdef CONFIG_NET_CLS_ACT 3789#ifdef CONFIG_NET_CLS_ACT
@@ -3704,13 +3809,17 @@ another_round:
3704 } 3809 }
3705 3810
3706skip_taps: 3811skip_taps:
3707#ifdef CONFIG_NET_CLS_ACT 3812#ifdef CONFIG_NET_INGRESS
3708 if (static_key_false(&ingress_needed)) { 3813 if (static_key_false(&ingress_needed)) {
3709 skb = handle_ing(skb, &pt_prev, &ret, orig_dev); 3814 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
3710 if (!skb) 3815 if (!skb)
3711 goto unlock; 3816 goto out;
3712 }
3713 3817
3818 if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
3819 goto out;
3820 }
3821#endif
3822#ifdef CONFIG_NET_CLS_ACT
3714 skb->tc_verd = 0; 3823 skb->tc_verd = 0;
3715ncls: 3824ncls:
3716#endif 3825#endif
@@ -3725,7 +3834,7 @@ ncls:
3725 if (vlan_do_receive(&skb)) 3834 if (vlan_do_receive(&skb))
3726 goto another_round; 3835 goto another_round;
3727 else if (unlikely(!skb)) 3836 else if (unlikely(!skb))
3728 goto unlock; 3837 goto out;
3729 } 3838 }
3730 3839
3731 rx_handler = rcu_dereference(skb->dev->rx_handler); 3840 rx_handler = rcu_dereference(skb->dev->rx_handler);
@@ -3737,7 +3846,7 @@ ncls:
3737 switch (rx_handler(&skb)) { 3846 switch (rx_handler(&skb)) {
3738 case RX_HANDLER_CONSUMED: 3847 case RX_HANDLER_CONSUMED:
3739 ret = NET_RX_SUCCESS; 3848 ret = NET_RX_SUCCESS;
3740 goto unlock; 3849 goto out;
3741 case RX_HANDLER_ANOTHER: 3850 case RX_HANDLER_ANOTHER:
3742 goto another_round; 3851 goto another_round;
3743 case RX_HANDLER_EXACT: 3852 case RX_HANDLER_EXACT:
@@ -3791,8 +3900,7 @@ drop:
3791 ret = NET_RX_DROP; 3900 ret = NET_RX_DROP;
3792 } 3901 }
3793 3902
3794unlock: 3903out:
3795 rcu_read_unlock();
3796 return ret; 3904 return ret;
3797} 3905}
3798 3906
@@ -3823,29 +3931,30 @@ static int __netif_receive_skb(struct sk_buff *skb)
3823 3931
3824static int netif_receive_skb_internal(struct sk_buff *skb) 3932static int netif_receive_skb_internal(struct sk_buff *skb)
3825{ 3933{
3934 int ret;
3935
3826 net_timestamp_check(netdev_tstamp_prequeue, skb); 3936 net_timestamp_check(netdev_tstamp_prequeue, skb);
3827 3937
3828 if (skb_defer_rx_timestamp(skb)) 3938 if (skb_defer_rx_timestamp(skb))
3829 return NET_RX_SUCCESS; 3939 return NET_RX_SUCCESS;
3830 3940
3941 rcu_read_lock();
3942
3831#ifdef CONFIG_RPS 3943#ifdef CONFIG_RPS
3832 if (static_key_false(&rps_needed)) { 3944 if (static_key_false(&rps_needed)) {
3833 struct rps_dev_flow voidflow, *rflow = &voidflow; 3945 struct rps_dev_flow voidflow, *rflow = &voidflow;
3834 int cpu, ret; 3946 int cpu = get_rps_cpu(skb->dev, skb, &rflow);
3835
3836 rcu_read_lock();
3837
3838 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3839 3947
3840 if (cpu >= 0) { 3948 if (cpu >= 0) {
3841 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); 3949 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3842 rcu_read_unlock(); 3950 rcu_read_unlock();
3843 return ret; 3951 return ret;
3844 } 3952 }
3845 rcu_read_unlock();
3846 } 3953 }
3847#endif 3954#endif
3848 return __netif_receive_skb(skb); 3955 ret = __netif_receive_skb(skb);
3956 rcu_read_unlock();
3957 return ret;
3849} 3958}
3850 3959
3851/** 3960/**
@@ -4390,8 +4499,10 @@ static int process_backlog(struct napi_struct *napi, int quota)
4390 struct sk_buff *skb; 4499 struct sk_buff *skb;
4391 4500
4392 while ((skb = __skb_dequeue(&sd->process_queue))) { 4501 while ((skb = __skb_dequeue(&sd->process_queue))) {
4502 rcu_read_lock();
4393 local_irq_enable(); 4503 local_irq_enable();
4394 __netif_receive_skb(skb); 4504 __netif_receive_skb(skb);
4505 rcu_read_unlock();
4395 local_irq_disable(); 4506 local_irq_disable();
4396 input_queue_head_incr(sd); 4507 input_queue_head_incr(sd);
4397 if (++work >= quota) { 4508 if (++work >= quota) {
@@ -6027,6 +6138,7 @@ static void rollback_registered_many(struct list_head *head)
6027 unlist_netdevice(dev); 6138 unlist_netdevice(dev);
6028 6139
6029 dev->reg_state = NETREG_UNREGISTERING; 6140 dev->reg_state = NETREG_UNREGISTERING;
6141 on_each_cpu(flush_backlog, dev, 1);
6030 } 6142 }
6031 6143
6032 synchronize_net(); 6144 synchronize_net();
@@ -6297,7 +6409,8 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
6297 struct netdev_queue *tx; 6409 struct netdev_queue *tx;
6298 size_t sz = count * sizeof(*tx); 6410 size_t sz = count * sizeof(*tx);
6299 6411
6300 BUG_ON(count < 1 || count > 0xffff); 6412 if (count < 1 || count > 0xffff)
6413 return -EINVAL;
6301 6414
6302 tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); 6415 tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
6303 if (!tx) { 6416 if (!tx) {
@@ -6313,6 +6426,17 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
6313 return 0; 6426 return 0;
6314} 6427}
6315 6428
6429void netif_tx_stop_all_queues(struct net_device *dev)
6430{
6431 unsigned int i;
6432
6433 for (i = 0; i < dev->num_tx_queues; i++) {
6434 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
6435 netif_tx_stop_queue(txq);
6436 }
6437}
6438EXPORT_SYMBOL(netif_tx_stop_all_queues);
6439
6316/** 6440/**
6317 * register_netdevice - register a network device 6441 * register_netdevice - register a network device
6318 * @dev: device to register 6442 * @dev: device to register
@@ -6650,8 +6774,6 @@ void netdev_run_todo(void)
6650 6774
6651 dev->reg_state = NETREG_UNREGISTERED; 6775 dev->reg_state = NETREG_UNREGISTERED;
6652 6776
6653 on_each_cpu(flush_backlog, dev, 1);
6654
6655 netdev_wait_allrefs(dev); 6777 netdev_wait_allrefs(dev);
6656 6778
6657 /* paranoia */ 6779 /* paranoia */
@@ -6862,6 +6984,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6862 dev->group = INIT_NETDEV_GROUP; 6984 dev->group = INIT_NETDEV_GROUP;
6863 if (!dev->ethtool_ops) 6985 if (!dev->ethtool_ops)
6864 dev->ethtool_ops = &default_ethtool_ops; 6986 dev->ethtool_ops = &default_ethtool_ops;
6987
6988 nf_hook_ingress_init(dev);
6989
6865 return dev; 6990 return dev;
6866 6991
6867free_all: 6992free_all:
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 1d00b8922902..b495ab1797fa 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -98,7 +98,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
98 [NETIF_F_RXALL_BIT] = "rx-all", 98 [NETIF_F_RXALL_BIT] = "rx-all",
99 [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", 99 [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
100 [NETIF_F_BUSY_POLL_BIT] = "busy-poll", 100 [NETIF_F_BUSY_POLL_BIT] = "busy-poll",
101 [NETIF_F_HW_SWITCH_OFFLOAD_BIT] = "hw-switch-offload",
102}; 101};
103 102
104static const char 103static const char
@@ -107,6 +106,13 @@ rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
107 [ETH_RSS_HASH_XOR_BIT] = "xor", 106 [ETH_RSS_HASH_XOR_BIT] = "xor",
108}; 107};
109 108
109static const char
110tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
111 [ETHTOOL_ID_UNSPEC] = "Unspec",
112 [ETHTOOL_RX_COPYBREAK] = "rx-copybreak",
113 [ETHTOOL_TX_COPYBREAK] = "tx-copybreak",
114};
115
110static int ethtool_get_features(struct net_device *dev, void __user *useraddr) 116static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
111{ 117{
112 struct ethtool_gfeatures cmd = { 118 struct ethtool_gfeatures cmd = {
@@ -195,6 +201,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
195 if (sset == ETH_SS_RSS_HASH_FUNCS) 201 if (sset == ETH_SS_RSS_HASH_FUNCS)
196 return ARRAY_SIZE(rss_hash_func_strings); 202 return ARRAY_SIZE(rss_hash_func_strings);
197 203
204 if (sset == ETH_SS_TUNABLES)
205 return ARRAY_SIZE(tunable_strings);
206
198 if (ops->get_sset_count && ops->get_strings) 207 if (ops->get_sset_count && ops->get_strings)
199 return ops->get_sset_count(dev, sset); 208 return ops->get_sset_count(dev, sset);
200 else 209 else
@@ -212,6 +221,8 @@ static void __ethtool_get_strings(struct net_device *dev,
212 else if (stringset == ETH_SS_RSS_HASH_FUNCS) 221 else if (stringset == ETH_SS_RSS_HASH_FUNCS)
213 memcpy(data, rss_hash_func_strings, 222 memcpy(data, rss_hash_func_strings,
214 sizeof(rss_hash_func_strings)); 223 sizeof(rss_hash_func_strings));
224 else if (stringset == ETH_SS_TUNABLES)
225 memcpy(data, tunable_strings, sizeof(tunable_strings));
215 else 226 else
216 /* ops->get_strings is valid because checked earlier */ 227 /* ops->get_strings is valid because checked earlier */
217 ops->get_strings(dev, stringset, data); 228 ops->get_strings(dev, stringset, data);
diff --git a/net/core/filter.c b/net/core/filter.c
index bf831a85c315..be3098fb65e4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -36,6 +36,7 @@
36#include <net/netlink.h> 36#include <net/netlink.h>
37#include <linux/skbuff.h> 37#include <linux/skbuff.h>
38#include <net/sock.h> 38#include <net/sock.h>
39#include <net/flow_dissector.h>
39#include <linux/errno.h> 40#include <linux/errno.h>
40#include <linux/timer.h> 41#include <linux/timer.h>
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
@@ -45,6 +46,7 @@
45#include <linux/seccomp.h> 46#include <linux/seccomp.h>
46#include <linux/if_vlan.h> 47#include <linux/if_vlan.h>
47#include <linux/bpf.h> 48#include <linux/bpf.h>
49#include <net/sch_generic.h>
48 50
49/** 51/**
50 * sk_filter - run a packet through a socket filter 52 * sk_filter - run a packet through a socket filter
@@ -355,8 +357,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
355 * for socket filters: ctx == 'struct sk_buff *', for seccomp: 357 * for socket filters: ctx == 'struct sk_buff *', for seccomp:
356 * ctx == 'struct seccomp_data *'. 358 * ctx == 'struct seccomp_data *'.
357 */ 359 */
358int bpf_convert_filter(struct sock_filter *prog, int len, 360static int bpf_convert_filter(struct sock_filter *prog, int len,
359 struct bpf_insn *new_prog, int *new_len) 361 struct bpf_insn *new_prog, int *new_len)
360{ 362{
361 int new_flen = 0, pass = 0, target, i; 363 int new_flen = 0, pass = 0, target, i;
362 struct bpf_insn *new_insn; 364 struct bpf_insn *new_insn;
@@ -371,7 +373,8 @@ int bpf_convert_filter(struct sock_filter *prog, int len,
371 return -EINVAL; 373 return -EINVAL;
372 374
373 if (new_prog) { 375 if (new_prog) {
374 addrs = kcalloc(len, sizeof(*addrs), GFP_KERNEL); 376 addrs = kcalloc(len, sizeof(*addrs),
377 GFP_KERNEL | __GFP_NOWARN);
375 if (!addrs) 378 if (!addrs)
376 return -ENOMEM; 379 return -ENOMEM;
377 } 380 }
@@ -751,7 +754,8 @@ static bool chk_code_allowed(u16 code_to_probe)
751 * 754 *
752 * Returns 0 if the rule set is legal or -EINVAL if not. 755 * Returns 0 if the rule set is legal or -EINVAL if not.
753 */ 756 */
754int bpf_check_classic(const struct sock_filter *filter, unsigned int flen) 757static int bpf_check_classic(const struct sock_filter *filter,
758 unsigned int flen)
755{ 759{
756 bool anc_found; 760 bool anc_found;
757 int pc; 761 int pc;
@@ -825,7 +829,6 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
825 829
826 return -EINVAL; 830 return -EINVAL;
827} 831}
828EXPORT_SYMBOL(bpf_check_classic);
829 832
830static int bpf_prog_store_orig_filter(struct bpf_prog *fp, 833static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
831 const struct sock_fprog *fprog) 834 const struct sock_fprog *fprog)
@@ -839,7 +842,9 @@ static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
839 842
840 fkprog = fp->orig_prog; 843 fkprog = fp->orig_prog;
841 fkprog->len = fprog->len; 844 fkprog->len = fprog->len;
842 fkprog->filter = kmemdup(fp->insns, fsize, GFP_KERNEL); 845
846 fkprog->filter = kmemdup(fp->insns, fsize,
847 GFP_KERNEL | __GFP_NOWARN);
843 if (!fkprog->filter) { 848 if (!fkprog->filter) {
844 kfree(fp->orig_prog); 849 kfree(fp->orig_prog);
845 return -ENOMEM; 850 return -ENOMEM;
@@ -941,7 +946,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
941 * pass. At this time, the user BPF is stored in fp->insns. 946 * pass. At this time, the user BPF is stored in fp->insns.
942 */ 947 */
943 old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter), 948 old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
944 GFP_KERNEL); 949 GFP_KERNEL | __GFP_NOWARN);
945 if (!old_prog) { 950 if (!old_prog) {
946 err = -ENOMEM; 951 err = -ENOMEM;
947 goto out_err; 952 goto out_err;
@@ -988,7 +993,8 @@ out_err:
988 return ERR_PTR(err); 993 return ERR_PTR(err);
989} 994}
990 995
991static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp) 996static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
997 bpf_aux_classic_check_t trans)
992{ 998{
993 int err; 999 int err;
994 1000
@@ -1001,6 +1007,17 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp)
1001 return ERR_PTR(err); 1007 return ERR_PTR(err);
1002 } 1008 }
1003 1009
1010 /* There might be additional checks and transformations
1011 * needed on classic filters, f.e. in case of seccomp.
1012 */
1013 if (trans) {
1014 err = trans(fp->insns, fp->len);
1015 if (err) {
1016 __bpf_prog_release(fp);
1017 return ERR_PTR(err);
1018 }
1019 }
1020
1004 /* Probe if we can JIT compile the filter and if so, do 1021 /* Probe if we can JIT compile the filter and if so, do
1005 * the compilation of the filter. 1022 * the compilation of the filter.
1006 */ 1023 */
@@ -1050,7 +1067,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
1050 /* bpf_prepare_filter() already takes care of freeing 1067 /* bpf_prepare_filter() already takes care of freeing
1051 * memory in case something goes wrong. 1068 * memory in case something goes wrong.
1052 */ 1069 */
1053 fp = bpf_prepare_filter(fp); 1070 fp = bpf_prepare_filter(fp, NULL);
1054 if (IS_ERR(fp)) 1071 if (IS_ERR(fp))
1055 return PTR_ERR(fp); 1072 return PTR_ERR(fp);
1056 1073
@@ -1059,6 +1076,53 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
1059} 1076}
1060EXPORT_SYMBOL_GPL(bpf_prog_create); 1077EXPORT_SYMBOL_GPL(bpf_prog_create);
1061 1078
1079/**
1080 * bpf_prog_create_from_user - create an unattached filter from user buffer
1081 * @pfp: the unattached filter that is created
1082 * @fprog: the filter program
1083 * @trans: post-classic verifier transformation handler
1084 *
1085 * This function effectively does the same as bpf_prog_create(), only
1086 * that it builds up its insns buffer from user space provided buffer.
1087 * It also allows for passing a bpf_aux_classic_check_t handler.
1088 */
1089int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
1090 bpf_aux_classic_check_t trans)
1091{
1092 unsigned int fsize = bpf_classic_proglen(fprog);
1093 struct bpf_prog *fp;
1094
1095 /* Make sure new filter is there and in the right amounts. */
1096 if (fprog->filter == NULL)
1097 return -EINVAL;
1098
1099 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1100 if (!fp)
1101 return -ENOMEM;
1102
1103 if (copy_from_user(fp->insns, fprog->filter, fsize)) {
1104 __bpf_prog_free(fp);
1105 return -EFAULT;
1106 }
1107
1108 fp->len = fprog->len;
1109 /* Since unattached filters are not copied back to user
1110 * space through sk_get_filter(), we do not need to hold
1111 * a copy here, and can spare us the work.
1112 */
1113 fp->orig_prog = NULL;
1114
1115 /* bpf_prepare_filter() already takes care of freeing
1116 * memory in case something goes wrong.
1117 */
1118 fp = bpf_prepare_filter(fp, trans);
1119 if (IS_ERR(fp))
1120 return PTR_ERR(fp);
1121
1122 *pfp = fp;
1123 return 0;
1124}
1125
1062void bpf_prog_destroy(struct bpf_prog *fp) 1126void bpf_prog_destroy(struct bpf_prog *fp)
1063{ 1127{
1064 __bpf_prog_release(fp); 1128 __bpf_prog_release(fp);
@@ -1135,7 +1199,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1135 /* bpf_prepare_filter() already takes care of freeing 1199 /* bpf_prepare_filter() already takes care of freeing
1136 * memory in case something goes wrong. 1200 * memory in case something goes wrong.
1137 */ 1201 */
1138 prog = bpf_prepare_filter(prog); 1202 prog = bpf_prepare_filter(prog, NULL);
1139 if (IS_ERR(prog)) 1203 if (IS_ERR(prog))
1140 return PTR_ERR(prog); 1204 return PTR_ERR(prog);
1141 1205
@@ -1175,21 +1239,6 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
1175 return 0; 1239 return 0;
1176} 1240}
1177 1241
1178/**
1179 * bpf_skb_clone_not_writable - is the header of a clone not writable
1180 * @skb: buffer to check
1181 * @len: length up to which to write, can be negative
1182 *
1183 * Returns true if modifying the header part of the cloned buffer
1184 * does require the data to be copied. I.e. this version works with
1185 * negative lengths needed for eBPF case!
1186 */
1187static bool bpf_skb_clone_unwritable(const struct sk_buff *skb, int len)
1188{
1189 return skb_header_cloned(skb) ||
1190 (int) skb_headroom(skb) + len > skb->hdr_len;
1191}
1192
1193#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) 1242#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1)
1194 1243
1195static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) 1244static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
@@ -1212,9 +1261,8 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
1212 if (unlikely((u32) offset > 0xffff || len > sizeof(buf))) 1261 if (unlikely((u32) offset > 0xffff || len > sizeof(buf)))
1213 return -EFAULT; 1262 return -EFAULT;
1214 1263
1215 offset -= skb->data - skb_mac_header(skb);
1216 if (unlikely(skb_cloned(skb) && 1264 if (unlikely(skb_cloned(skb) &&
1217 bpf_skb_clone_unwritable(skb, offset + len))) 1265 !skb_clone_writable(skb, offset + len)))
1218 return -EFAULT; 1266 return -EFAULT;
1219 1267
1220 ptr = skb_header_pointer(skb, offset, len, buf); 1268 ptr = skb_header_pointer(skb, offset, len, buf);
@@ -1258,9 +1306,8 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1258 if (unlikely((u32) offset > 0xffff)) 1306 if (unlikely((u32) offset > 0xffff))
1259 return -EFAULT; 1307 return -EFAULT;
1260 1308
1261 offset -= skb->data - skb_mac_header(skb);
1262 if (unlikely(skb_cloned(skb) && 1309 if (unlikely(skb_cloned(skb) &&
1263 bpf_skb_clone_unwritable(skb, offset + sizeof(sum)))) 1310 !skb_clone_writable(skb, offset + sizeof(sum))))
1264 return -EFAULT; 1311 return -EFAULT;
1265 1312
1266 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); 1313 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1306,9 +1353,8 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1306 if (unlikely((u32) offset > 0xffff)) 1353 if (unlikely((u32) offset > 0xffff))
1307 return -EFAULT; 1354 return -EFAULT;
1308 1355
1309 offset -= skb->data - skb_mac_header(skb);
1310 if (unlikely(skb_cloned(skb) && 1356 if (unlikely(skb_cloned(skb) &&
1311 bpf_skb_clone_unwritable(skb, offset + sizeof(sum)))) 1357 !skb_clone_writable(skb, offset + sizeof(sum))))
1312 return -EFAULT; 1358 return -EFAULT;
1313 1359
1314 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); 1360 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1344,6 +1390,40 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = {
1344 .arg5_type = ARG_ANYTHING, 1390 .arg5_type = ARG_ANYTHING,
1345}; 1391};
1346 1392
1393#define BPF_IS_REDIRECT_INGRESS(flags) ((flags) & 1)
1394
1395static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
1396{
1397 struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2;
1398 struct net_device *dev;
1399
1400 dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
1401 if (unlikely(!dev))
1402 return -EINVAL;
1403
1404 if (unlikely(!(dev->flags & IFF_UP)))
1405 return -EINVAL;
1406
1407 skb2 = skb_clone(skb, GFP_ATOMIC);
1408 if (unlikely(!skb2))
1409 return -ENOMEM;
1410
1411 if (BPF_IS_REDIRECT_INGRESS(flags))
1412 return dev_forward_skb(dev, skb2);
1413
1414 skb2->dev = dev;
1415 return dev_queue_xmit(skb2);
1416}
1417
1418const struct bpf_func_proto bpf_clone_redirect_proto = {
1419 .func = bpf_clone_redirect,
1420 .gpl_only = false,
1421 .ret_type = RET_INTEGER,
1422 .arg1_type = ARG_PTR_TO_CTX,
1423 .arg2_type = ARG_ANYTHING,
1424 .arg3_type = ARG_ANYTHING,
1425};
1426
1347static const struct bpf_func_proto * 1427static const struct bpf_func_proto *
1348sk_filter_func_proto(enum bpf_func_id func_id) 1428sk_filter_func_proto(enum bpf_func_id func_id)
1349{ 1429{
@@ -1358,6 +1438,12 @@ sk_filter_func_proto(enum bpf_func_id func_id)
1358 return &bpf_get_prandom_u32_proto; 1438 return &bpf_get_prandom_u32_proto;
1359 case BPF_FUNC_get_smp_processor_id: 1439 case BPF_FUNC_get_smp_processor_id:
1360 return &bpf_get_smp_processor_id_proto; 1440 return &bpf_get_smp_processor_id_proto;
1441 case BPF_FUNC_tail_call:
1442 return &bpf_tail_call_proto;
1443 case BPF_FUNC_ktime_get_ns:
1444 return &bpf_ktime_get_ns_proto;
1445 case BPF_FUNC_trace_printk:
1446 return bpf_get_trace_printk_proto();
1361 default: 1447 default:
1362 return NULL; 1448 return NULL;
1363 } 1449 }
@@ -1373,18 +1459,15 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
1373 return &bpf_l3_csum_replace_proto; 1459 return &bpf_l3_csum_replace_proto;
1374 case BPF_FUNC_l4_csum_replace: 1460 case BPF_FUNC_l4_csum_replace:
1375 return &bpf_l4_csum_replace_proto; 1461 return &bpf_l4_csum_replace_proto;
1462 case BPF_FUNC_clone_redirect:
1463 return &bpf_clone_redirect_proto;
1376 default: 1464 default:
1377 return sk_filter_func_proto(func_id); 1465 return sk_filter_func_proto(func_id);
1378 } 1466 }
1379} 1467}
1380 1468
1381static bool sk_filter_is_valid_access(int off, int size, 1469static bool __is_valid_access(int off, int size, enum bpf_access_type type)
1382 enum bpf_access_type type)
1383{ 1470{
1384 /* only read is allowed */
1385 if (type != BPF_READ)
1386 return false;
1387
1388 /* check bounds */ 1471 /* check bounds */
1389 if (off < 0 || off >= sizeof(struct __sk_buff)) 1472 if (off < 0 || off >= sizeof(struct __sk_buff))
1390 return false; 1473 return false;
@@ -1400,8 +1483,42 @@ static bool sk_filter_is_valid_access(int off, int size,
1400 return true; 1483 return true;
1401} 1484}
1402 1485
1403static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, 1486static bool sk_filter_is_valid_access(int off, int size,
1404 struct bpf_insn *insn_buf) 1487 enum bpf_access_type type)
1488{
1489 if (type == BPF_WRITE) {
1490 switch (off) {
1491 case offsetof(struct __sk_buff, cb[0]) ...
1492 offsetof(struct __sk_buff, cb[4]):
1493 break;
1494 default:
1495 return false;
1496 }
1497 }
1498
1499 return __is_valid_access(off, size, type);
1500}
1501
1502static bool tc_cls_act_is_valid_access(int off, int size,
1503 enum bpf_access_type type)
1504{
1505 if (type == BPF_WRITE) {
1506 switch (off) {
1507 case offsetof(struct __sk_buff, mark):
1508 case offsetof(struct __sk_buff, tc_index):
1509 case offsetof(struct __sk_buff, cb[0]) ...
1510 offsetof(struct __sk_buff, cb[4]):
1511 break;
1512 default:
1513 return false;
1514 }
1515 }
1516 return __is_valid_access(off, size, type);
1517}
1518
1519static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
1520 int src_reg, int ctx_off,
1521 struct bpf_insn *insn_buf)
1405{ 1522{
1406 struct bpf_insn *insn = insn_buf; 1523 struct bpf_insn *insn = insn_buf;
1407 1524
@@ -1434,8 +1551,34 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
1434 offsetof(struct sk_buff, priority)); 1551 offsetof(struct sk_buff, priority));
1435 break; 1552 break;
1436 1553
1554 case offsetof(struct __sk_buff, ingress_ifindex):
1555 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4);
1556
1557 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
1558 offsetof(struct sk_buff, skb_iif));
1559 break;
1560
1561 case offsetof(struct __sk_buff, ifindex):
1562 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
1563
1564 *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
1565 dst_reg, src_reg,
1566 offsetof(struct sk_buff, dev));
1567 *insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1);
1568 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg,
1569 offsetof(struct net_device, ifindex));
1570 break;
1571
1437 case offsetof(struct __sk_buff, mark): 1572 case offsetof(struct __sk_buff, mark):
1438 return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn); 1573 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
1574
1575 if (type == BPF_WRITE)
1576 *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
1577 offsetof(struct sk_buff, mark));
1578 else
1579 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
1580 offsetof(struct sk_buff, mark));
1581 break;
1439 1582
1440 case offsetof(struct __sk_buff, pkt_type): 1583 case offsetof(struct __sk_buff, pkt_type):
1441 return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn); 1584 return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn);
@@ -1450,6 +1593,38 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
1450 case offsetof(struct __sk_buff, vlan_tci): 1593 case offsetof(struct __sk_buff, vlan_tci):
1451 return convert_skb_access(SKF_AD_VLAN_TAG, 1594 return convert_skb_access(SKF_AD_VLAN_TAG,
1452 dst_reg, src_reg, insn); 1595 dst_reg, src_reg, insn);
1596
1597 case offsetof(struct __sk_buff, cb[0]) ...
1598 offsetof(struct __sk_buff, cb[4]):
1599 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
1600
1601 ctx_off -= offsetof(struct __sk_buff, cb[0]);
1602 ctx_off += offsetof(struct sk_buff, cb);
1603 ctx_off += offsetof(struct qdisc_skb_cb, data);
1604 if (type == BPF_WRITE)
1605 *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
1606 else
1607 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
1608 break;
1609
1610 case offsetof(struct __sk_buff, tc_index):
1611#ifdef CONFIG_NET_SCHED
1612 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
1613
1614 if (type == BPF_WRITE)
1615 *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg,
1616 offsetof(struct sk_buff, tc_index));
1617 else
1618 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
1619 offsetof(struct sk_buff, tc_index));
1620 break;
1621#else
1622 if (type == BPF_WRITE)
1623 *insn++ = BPF_MOV64_REG(dst_reg, dst_reg);
1624 else
1625 *insn++ = BPF_MOV64_IMM(dst_reg, 0);
1626 break;
1627#endif
1453 } 1628 }
1454 1629
1455 return insn - insn_buf; 1630 return insn - insn_buf;
@@ -1458,13 +1633,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
1458static const struct bpf_verifier_ops sk_filter_ops = { 1633static const struct bpf_verifier_ops sk_filter_ops = {
1459 .get_func_proto = sk_filter_func_proto, 1634 .get_func_proto = sk_filter_func_proto,
1460 .is_valid_access = sk_filter_is_valid_access, 1635 .is_valid_access = sk_filter_is_valid_access,
1461 .convert_ctx_access = sk_filter_convert_ctx_access, 1636 .convert_ctx_access = bpf_net_convert_ctx_access,
1462}; 1637};
1463 1638
1464static const struct bpf_verifier_ops tc_cls_act_ops = { 1639static const struct bpf_verifier_ops tc_cls_act_ops = {
1465 .get_func_proto = tc_cls_act_func_proto, 1640 .get_func_proto = tc_cls_act_func_proto,
1466 .is_valid_access = sk_filter_is_valid_access, 1641 .is_valid_access = tc_cls_act_is_valid_access,
1467 .convert_ctx_access = sk_filter_convert_ctx_access, 1642 .convert_ctx_access = bpf_net_convert_ctx_access,
1468}; 1643};
1469 1644
1470static struct bpf_prog_type_list sk_filter_type __read_mostly = { 1645static struct bpf_prog_type_list sk_filter_type __read_mostly = {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 2c35c02a931e..2a834c6179b9 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1,3 +1,4 @@
1#include <linux/kernel.h>
1#include <linux/skbuff.h> 2#include <linux/skbuff.h>
2#include <linux/export.h> 3#include <linux/export.h>
3#include <linux/ip.h> 4#include <linux/ip.h>
@@ -12,19 +13,60 @@
12#include <linux/if_tunnel.h> 13#include <linux/if_tunnel.h>
13#include <linux/if_pppox.h> 14#include <linux/if_pppox.h>
14#include <linux/ppp_defs.h> 15#include <linux/ppp_defs.h>
15#include <net/flow_keys.h> 16#include <linux/stddef.h>
17#include <linux/if_ether.h>
18#include <linux/mpls.h>
19#include <net/flow_dissector.h>
16#include <scsi/fc/fc_fcoe.h> 20#include <scsi/fc/fc_fcoe.h>
17 21
18/* copy saddr & daddr, possibly using 64bit load/store 22static bool skb_flow_dissector_uses_key(struct flow_dissector *flow_dissector,
19 * Equivalent to : flow->src = iph->saddr; 23 enum flow_dissector_key_id key_id)
20 * flow->dst = iph->daddr; 24{
21 */ 25 return flow_dissector->used_keys & (1 << key_id);
22static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph) 26}
27
28static void skb_flow_dissector_set_key(struct flow_dissector *flow_dissector,
29 enum flow_dissector_key_id key_id)
30{
31 flow_dissector->used_keys |= (1 << key_id);
32}
33
34static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
35 enum flow_dissector_key_id key_id,
36 void *target_container)
37{
38 return ((char *) target_container) + flow_dissector->offset[key_id];
39}
40
41void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
42 const struct flow_dissector_key *key,
43 unsigned int key_count)
23{ 44{
24 BUILD_BUG_ON(offsetof(typeof(*flow), dst) != 45 unsigned int i;
25 offsetof(typeof(*flow), src) + sizeof(flow->src)); 46
26 memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst)); 47 memset(flow_dissector, 0, sizeof(*flow_dissector));
48
49 for (i = 0; i < key_count; i++, key++) {
50 /* User should make sure that every key target offset is withing
51 * boundaries of unsigned short.
52 */
53 BUG_ON(key->offset > USHRT_MAX);
54 BUG_ON(skb_flow_dissector_uses_key(flow_dissector,
55 key->key_id));
56
57 skb_flow_dissector_set_key(flow_dissector, key->key_id);
58 flow_dissector->offset[key->key_id] = key->offset;
59 }
60
61 /* Ensure that the dissector always includes control and basic key.
62 * That way we are able to avoid handling lack of these in fast path.
63 */
64 BUG_ON(!skb_flow_dissector_uses_key(flow_dissector,
65 FLOW_DISSECTOR_KEY_CONTROL));
66 BUG_ON(!skb_flow_dissector_uses_key(flow_dissector,
67 FLOW_DISSECTOR_KEY_BASIC));
27} 68}
69EXPORT_SYMBOL(skb_flow_dissector_init);
28 70
29/** 71/**
30 * __skb_flow_get_ports - extract the upper layer ports and return them 72 * __skb_flow_get_ports - extract the upper layer ports and return them
@@ -63,18 +105,31 @@ EXPORT_SYMBOL(__skb_flow_get_ports);
63/** 105/**
64 * __skb_flow_dissect - extract the flow_keys struct and return it 106 * __skb_flow_dissect - extract the flow_keys struct and return it
65 * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified 107 * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
108 * @flow_dissector: list of keys to dissect
109 * @target_container: target structure to put dissected values into
66 * @data: raw buffer pointer to the packet, if NULL use skb->data 110 * @data: raw buffer pointer to the packet, if NULL use skb->data
67 * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol 111 * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
68 * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb) 112 * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
69 * @hlen: packet header length, if @data is NULL use skb_headlen(skb) 113 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
70 * 114 *
71 * The function will try to retrieve the struct flow_keys from either the skbuff 115 * The function will try to retrieve individual keys into target specified
72 * or a raw buffer specified by the rest parameters 116 * by flow_dissector from either the skbuff or a raw buffer specified by the
117 * rest parameters.
118 *
119 * Caller must take care of zeroing target container memory.
73 */ 120 */
74bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, 121bool __skb_flow_dissect(const struct sk_buff *skb,
122 struct flow_dissector *flow_dissector,
123 void *target_container,
75 void *data, __be16 proto, int nhoff, int hlen) 124 void *data, __be16 proto, int nhoff, int hlen)
76{ 125{
77 u8 ip_proto; 126 struct flow_dissector_key_control *key_control;
127 struct flow_dissector_key_basic *key_basic;
128 struct flow_dissector_key_addrs *key_addrs;
129 struct flow_dissector_key_ports *key_ports;
130 struct flow_dissector_key_tags *key_tags;
131 struct flow_dissector_key_keyid *key_keyid;
132 u8 ip_proto = 0;
78 133
79 if (!data) { 134 if (!data) {
80 data = skb->data; 135 data = skb->data;
@@ -83,7 +138,30 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
83 hlen = skb_headlen(skb); 138 hlen = skb_headlen(skb);
84 } 139 }
85 140
86 memset(flow, 0, sizeof(*flow)); 141 /* It is ensured by skb_flow_dissector_init() that control key will
142 * be always present.
143 */
144 key_control = skb_flow_dissector_target(flow_dissector,
145 FLOW_DISSECTOR_KEY_CONTROL,
146 target_container);
147
148 /* It is ensured by skb_flow_dissector_init() that basic key will
149 * be always present.
150 */
151 key_basic = skb_flow_dissector_target(flow_dissector,
152 FLOW_DISSECTOR_KEY_BASIC,
153 target_container);
154
155 if (skb_flow_dissector_uses_key(flow_dissector,
156 FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
157 struct ethhdr *eth = eth_hdr(skb);
158 struct flow_dissector_key_eth_addrs *key_eth_addrs;
159
160 key_eth_addrs = skb_flow_dissector_target(flow_dissector,
161 FLOW_DISSECTOR_KEY_ETH_ADDRS,
162 target_container);
163 memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
164 }
87 165
88again: 166again:
89 switch (proto) { 167 switch (proto) {
@@ -100,14 +178,15 @@ ip:
100 if (ip_is_fragment(iph)) 178 if (ip_is_fragment(iph))
101 ip_proto = 0; 179 ip_proto = 0;
102 180
103 /* skip the address processing if skb is NULL. The assumption 181 if (!skb_flow_dissector_uses_key(flow_dissector,
104 * here is that if there is no skb we are not looking for flow 182 FLOW_DISSECTOR_KEY_IPV4_ADDRS))
105 * info but lengths and protocols.
106 */
107 if (!skb)
108 break; 183 break;
109 184
110 iph_to_flow_copy_addrs(flow, iph); 185 key_addrs = skb_flow_dissector_target(flow_dissector,
186 FLOW_DISSECTOR_KEY_IPV4_ADDRS, target_container);
187 memcpy(&key_addrs->v4addrs, &iph->saddr,
188 sizeof(key_addrs->v4addrs));
189 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
111 break; 190 break;
112 } 191 }
113 case htons(ETH_P_IPV6): { 192 case htons(ETH_P_IPV6): {
@@ -123,25 +202,27 @@ ipv6:
123 ip_proto = iph->nexthdr; 202 ip_proto = iph->nexthdr;
124 nhoff += sizeof(struct ipv6hdr); 203 nhoff += sizeof(struct ipv6hdr);
125 204
126 /* see comment above in IPv4 section */ 205 if (skb_flow_dissector_uses_key(flow_dissector,
127 if (!skb) 206 FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
128 break; 207 struct flow_dissector_key_ipv6_addrs *key_ipv6_addrs;
208
209 key_ipv6_addrs = skb_flow_dissector_target(flow_dissector,
210 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
211 target_container);
129 212
130 flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); 213 memcpy(key_ipv6_addrs, &iph->saddr, sizeof(*key_ipv6_addrs));
131 flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); 214 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
215 }
132 216
133 flow_label = ip6_flowlabel(iph); 217 flow_label = ip6_flowlabel(iph);
134 if (flow_label) { 218 if (flow_label) {
135 /* Awesome, IPv6 packet has a flow label so we can 219 if (skb_flow_dissector_uses_key(flow_dissector,
136 * use that to represent the ports without any 220 FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
137 * further dissection. 221 key_tags = skb_flow_dissector_target(flow_dissector,
138 */ 222 FLOW_DISSECTOR_KEY_FLOW_LABEL,
139 flow->n_proto = proto; 223 target_container);
140 flow->ip_proto = ip_proto; 224 key_tags->flow_label = ntohl(flow_label);
141 flow->ports = flow_label; 225 }
142 flow->thoff = (u16)nhoff;
143
144 return true;
145 } 226 }
146 227
147 break; 228 break;
@@ -155,6 +236,15 @@ ipv6:
155 if (!vlan) 236 if (!vlan)
156 return false; 237 return false;
157 238
239 if (skb_flow_dissector_uses_key(flow_dissector,
240 FLOW_DISSECTOR_KEY_VLANID)) {
241 key_tags = skb_flow_dissector_target(flow_dissector,
242 FLOW_DISSECTOR_KEY_VLANID,
243 target_container);
244
245 key_tags->vlan_id = skb_vlan_tag_get_id(skb);
246 }
247
158 proto = vlan->h_vlan_encapsulated_proto; 248 proto = vlan->h_vlan_encapsulated_proto;
159 nhoff += sizeof(*vlan); 249 nhoff += sizeof(*vlan);
160 goto again; 250 goto again;
@@ -186,19 +276,58 @@ ipv6:
186 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); 276 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
187 if (!hdr) 277 if (!hdr)
188 return false; 278 return false;
189 flow->src = hdr->srcnode; 279 key_basic->n_proto = proto;
190 flow->dst = 0; 280 key_control->thoff = (u16)nhoff;
191 flow->n_proto = proto; 281
192 flow->thoff = (u16)nhoff; 282 if (skb_flow_dissector_uses_key(flow_dissector,
283 FLOW_DISSECTOR_KEY_TIPC_ADDRS)) {
284 key_addrs = skb_flow_dissector_target(flow_dissector,
285 FLOW_DISSECTOR_KEY_TIPC_ADDRS,
286 target_container);
287 key_addrs->tipcaddrs.srcnode = hdr->srcnode;
288 key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS;
289 }
290 return true;
291 }
292
293 case htons(ETH_P_MPLS_UC):
294 case htons(ETH_P_MPLS_MC): {
295 struct mpls_label *hdr, _hdr[2];
296mpls:
297 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
298 hlen, &_hdr);
299 if (!hdr)
300 return false;
301
302 if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >>
303 MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) {
304 if (skb_flow_dissector_uses_key(flow_dissector,
305 FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) {
306 key_keyid = skb_flow_dissector_target(flow_dissector,
307 FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
308 target_container);
309 key_keyid->keyid = hdr[1].entry &
310 htonl(MPLS_LS_LABEL_MASK);
311 }
312
313 key_basic->n_proto = proto;
314 key_basic->ip_proto = ip_proto;
315 key_control->thoff = (u16)nhoff;
316
317 return true;
318 }
319
193 return true; 320 return true;
194 } 321 }
322
195 case htons(ETH_P_FCOE): 323 case htons(ETH_P_FCOE):
196 flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN); 324 key_control->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
197 /* fall through */ 325 /* fall through */
198 default: 326 default:
199 return false; 327 return false;
200 } 328 }
201 329
330ip_proto_again:
202 switch (ip_proto) { 331 switch (ip_proto) {
203 case IPPROTO_GRE: { 332 case IPPROTO_GRE: {
204 struct gre_hdr { 333 struct gre_hdr {
@@ -213,30 +342,65 @@ ipv6:
213 * Only look inside GRE if version zero and no 342 * Only look inside GRE if version zero and no
214 * routing 343 * routing
215 */ 344 */
216 if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) { 345 if (hdr->flags & (GRE_VERSION | GRE_ROUTING))
217 proto = hdr->proto; 346 break;
347
348 proto = hdr->proto;
349 nhoff += 4;
350 if (hdr->flags & GRE_CSUM)
218 nhoff += 4; 351 nhoff += 4;
219 if (hdr->flags & GRE_CSUM) 352 if (hdr->flags & GRE_KEY) {
220 nhoff += 4; 353 const __be32 *keyid;
221 if (hdr->flags & GRE_KEY) 354 __be32 _keyid;
222 nhoff += 4; 355
223 if (hdr->flags & GRE_SEQ) 356 keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid),
224 nhoff += 4; 357 data, hlen, &_keyid);
225 if (proto == htons(ETH_P_TEB)) { 358
226 const struct ethhdr *eth; 359 if (!keyid)
227 struct ethhdr _eth; 360 return false;
228 361
229 eth = __skb_header_pointer(skb, nhoff, 362 if (skb_flow_dissector_uses_key(flow_dissector,
230 sizeof(_eth), 363 FLOW_DISSECTOR_KEY_GRE_KEYID)) {
231 data, hlen, &_eth); 364 key_keyid = skb_flow_dissector_target(flow_dissector,
232 if (!eth) 365 FLOW_DISSECTOR_KEY_GRE_KEYID,
233 return false; 366 target_container);
234 proto = eth->h_proto; 367 key_keyid->keyid = *keyid;
235 nhoff += sizeof(*eth);
236 } 368 }
237 goto again; 369 nhoff += 4;
238 } 370 }
239 break; 371 if (hdr->flags & GRE_SEQ)
372 nhoff += 4;
373 if (proto == htons(ETH_P_TEB)) {
374 const struct ethhdr *eth;
375 struct ethhdr _eth;
376
377 eth = __skb_header_pointer(skb, nhoff,
378 sizeof(_eth),
379 data, hlen, &_eth);
380 if (!eth)
381 return false;
382 proto = eth->h_proto;
383 nhoff += sizeof(*eth);
384 }
385 goto again;
386 }
387 case NEXTHDR_HOP:
388 case NEXTHDR_ROUTING:
389 case NEXTHDR_DEST: {
390 u8 _opthdr[2], *opthdr;
391
392 if (proto != htons(ETH_P_IPV6))
393 break;
394
395 opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr),
396 data, hlen, &_opthdr);
397 if (!opthdr)
398 return false;
399
400 ip_proto = opthdr[0];
401 nhoff += (opthdr[1] + 1) << 3;
402
403 goto ip_proto_again;
240 } 404 }
241 case IPPROTO_IPIP: 405 case IPPROTO_IPIP:
242 proto = htons(ETH_P_IP); 406 proto = htons(ETH_P_IP);
@@ -244,18 +408,25 @@ ipv6:
244 case IPPROTO_IPV6: 408 case IPPROTO_IPV6:
245 proto = htons(ETH_P_IPV6); 409 proto = htons(ETH_P_IPV6);
246 goto ipv6; 410 goto ipv6;
411 case IPPROTO_MPLS:
412 proto = htons(ETH_P_MPLS_UC);
413 goto mpls;
247 default: 414 default:
248 break; 415 break;
249 } 416 }
250 417
251 flow->n_proto = proto; 418 key_basic->n_proto = proto;
252 flow->ip_proto = ip_proto; 419 key_basic->ip_proto = ip_proto;
253 flow->thoff = (u16) nhoff; 420 key_control->thoff = (u16)nhoff;
254 421
255 /* unless skb is set we don't need to record port info */ 422 if (skb_flow_dissector_uses_key(flow_dissector,
256 if (skb) 423 FLOW_DISSECTOR_KEY_PORTS)) {
257 flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, 424 key_ports = skb_flow_dissector_target(flow_dissector,
258 data, hlen); 425 FLOW_DISSECTOR_KEY_PORTS,
426 target_container);
427 key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
428 data, hlen);
429 }
259 430
260 return true; 431 return true;
261} 432}
@@ -267,27 +438,109 @@ static __always_inline void __flow_hash_secret_init(void)
267 net_get_random_once(&hashrnd, sizeof(hashrnd)); 438 net_get_random_once(&hashrnd, sizeof(hashrnd));
268} 439}
269 440
270static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c) 441static __always_inline u32 __flow_hash_words(u32 *words, u32 length, u32 keyval)
271{ 442{
272 __flow_hash_secret_init(); 443 return jhash2(words, length, keyval);
273 return jhash_3words(a, b, c, hashrnd);
274} 444}
275 445
276static inline u32 __flow_hash_from_keys(struct flow_keys *keys) 446static inline void *flow_keys_hash_start(struct flow_keys *flow)
277{ 447{
278 u32 hash; 448 BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32));
449 return (void *)flow + FLOW_KEYS_HASH_OFFSET;
450}
451
452static inline size_t flow_keys_hash_length(struct flow_keys *flow)
453{
454 size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
455 BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
456 BUILD_BUG_ON(offsetof(typeof(*flow), addrs) !=
457 sizeof(*flow) - sizeof(flow->addrs));
458
459 switch (flow->control.addr_type) {
460 case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
461 diff -= sizeof(flow->addrs.v4addrs);
462 break;
463 case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
464 diff -= sizeof(flow->addrs.v6addrs);
465 break;
466 case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
467 diff -= sizeof(flow->addrs.tipcaddrs);
468 break;
469 }
470 return (sizeof(*flow) - diff) / sizeof(u32);
471}
472
473__be32 flow_get_u32_src(const struct flow_keys *flow)
474{
475 switch (flow->control.addr_type) {
476 case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
477 return flow->addrs.v4addrs.src;
478 case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
479 return (__force __be32)ipv6_addr_hash(
480 &flow->addrs.v6addrs.src);
481 case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
482 return flow->addrs.tipcaddrs.srcnode;
483 default:
484 return 0;
485 }
486}
487EXPORT_SYMBOL(flow_get_u32_src);
279 488
280 /* get a consistent hash (same value on both flow directions) */ 489__be32 flow_get_u32_dst(const struct flow_keys *flow)
281 if (((__force u32)keys->dst < (__force u32)keys->src) || 490{
282 (((__force u32)keys->dst == (__force u32)keys->src) && 491 switch (flow->control.addr_type) {
283 ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) { 492 case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
284 swap(keys->dst, keys->src); 493 return flow->addrs.v4addrs.dst;
285 swap(keys->port16[0], keys->port16[1]); 494 case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
495 return (__force __be32)ipv6_addr_hash(
496 &flow->addrs.v6addrs.dst);
497 default:
498 return 0;
286 } 499 }
500}
501EXPORT_SYMBOL(flow_get_u32_dst);
287 502
288 hash = __flow_hash_3words((__force u32)keys->dst, 503static inline void __flow_hash_consistentify(struct flow_keys *keys)
289 (__force u32)keys->src, 504{
290 (__force u32)keys->ports); 505 int addr_diff, i;
506
507 switch (keys->control.addr_type) {
508 case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
509 addr_diff = (__force u32)keys->addrs.v4addrs.dst -
510 (__force u32)keys->addrs.v4addrs.src;
511 if ((addr_diff < 0) ||
512 (addr_diff == 0 &&
513 ((__force u16)keys->ports.dst <
514 (__force u16)keys->ports.src))) {
515 swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
516 swap(keys->ports.src, keys->ports.dst);
517 }
518 break;
519 case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
520 addr_diff = memcmp(&keys->addrs.v6addrs.dst,
521 &keys->addrs.v6addrs.src,
522 sizeof(keys->addrs.v6addrs.dst));
523 if ((addr_diff < 0) ||
524 (addr_diff == 0 &&
525 ((__force u16)keys->ports.dst <
526 (__force u16)keys->ports.src))) {
527 for (i = 0; i < 4; i++)
528 swap(keys->addrs.v6addrs.src.s6_addr32[i],
529 keys->addrs.v6addrs.dst.s6_addr32[i]);
530 swap(keys->ports.src, keys->ports.dst);
531 }
532 break;
533 }
534}
535
536static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
537{
538 u32 hash;
539
540 __flow_hash_consistentify(keys);
541
542 hash = __flow_hash_words((u32 *)flow_keys_hash_start(keys),
543 flow_keys_hash_length(keys), keyval);
291 if (!hash) 544 if (!hash)
292 hash = 1; 545 hash = 1;
293 546
@@ -296,12 +549,52 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys)
296 549
297u32 flow_hash_from_keys(struct flow_keys *keys) 550u32 flow_hash_from_keys(struct flow_keys *keys)
298{ 551{
299 return __flow_hash_from_keys(keys); 552 __flow_hash_secret_init();
553 return __flow_hash_from_keys(keys, hashrnd);
300} 554}
301EXPORT_SYMBOL(flow_hash_from_keys); 555EXPORT_SYMBOL(flow_hash_from_keys);
302 556
303/* 557static inline u32 ___skb_get_hash(const struct sk_buff *skb,
304 * __skb_get_hash: calculate a flow hash based on src/dst addresses 558 struct flow_keys *keys, u32 keyval)
559{
560 if (!skb_flow_dissect_flow_keys(skb, keys))
561 return 0;
562
563 return __flow_hash_from_keys(keys, keyval);
564}
565
566struct _flow_keys_digest_data {
567 __be16 n_proto;
568 u8 ip_proto;
569 u8 padding;
570 __be32 ports;
571 __be32 src;
572 __be32 dst;
573};
574
575void make_flow_keys_digest(struct flow_keys_digest *digest,
576 const struct flow_keys *flow)
577{
578 struct _flow_keys_digest_data *data =
579 (struct _flow_keys_digest_data *)digest;
580
581 BUILD_BUG_ON(sizeof(*data) > sizeof(*digest));
582
583 memset(digest, 0, sizeof(*digest));
584
585 data->n_proto = flow->basic.n_proto;
586 data->ip_proto = flow->basic.ip_proto;
587 data->ports = flow->ports.ports;
588 data->src = flow->addrs.v4addrs.src;
589 data->dst = flow->addrs.v4addrs.dst;
590}
591EXPORT_SYMBOL(make_flow_keys_digest);
592
593/**
594 * __skb_get_hash: calculate a flow hash
595 * @skb: sk_buff to calculate flow hash from
596 *
597 * This function calculates a flow hash based on src/dst addresses
305 * and src/dst port numbers. Sets hash in skb to non-zero hash value 598 * and src/dst port numbers. Sets hash in skb to non-zero hash value
306 * on success, zero indicates no valid hash. Also, sets l4_hash in skb 599 * on success, zero indicates no valid hash. Also, sets l4_hash in skb
307 * if hash is a canonical 4-tuple hash over transport ports. 600 * if hash is a canonical 4-tuple hash over transport ports.
@@ -309,53 +602,34 @@ EXPORT_SYMBOL(flow_hash_from_keys);
309void __skb_get_hash(struct sk_buff *skb) 602void __skb_get_hash(struct sk_buff *skb)
310{ 603{
311 struct flow_keys keys; 604 struct flow_keys keys;
605 u32 hash;
312 606
313 if (!skb_flow_dissect(skb, &keys)) 607 __flow_hash_secret_init();
314 return;
315 608
316 if (keys.ports) 609 hash = ___skb_get_hash(skb, &keys, hashrnd);
610 if (!hash)
611 return;
612 if (keys.ports.ports)
317 skb->l4_hash = 1; 613 skb->l4_hash = 1;
318
319 skb->sw_hash = 1; 614 skb->sw_hash = 1;
320 615 skb->hash = hash;
321 skb->hash = __flow_hash_from_keys(&keys);
322} 616}
323EXPORT_SYMBOL(__skb_get_hash); 617EXPORT_SYMBOL(__skb_get_hash);
324 618
325/* 619__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
326 * Returns a Tx hash based on the given packet descriptor a Tx queues' number
327 * to be used as a distribution range.
328 */
329u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
330 unsigned int num_tx_queues)
331{ 620{
332 u32 hash; 621 struct flow_keys keys;
333 u16 qoffset = 0;
334 u16 qcount = num_tx_queues;
335
336 if (skb_rx_queue_recorded(skb)) {
337 hash = skb_get_rx_queue(skb);
338 while (unlikely(hash >= num_tx_queues))
339 hash -= num_tx_queues;
340 return hash;
341 }
342
343 if (dev->num_tc) {
344 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
345 qoffset = dev->tc_to_txq[tc].offset;
346 qcount = dev->tc_to_txq[tc].count;
347 }
348 622
349 return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset; 623 return ___skb_get_hash(skb, &keys, perturb);
350} 624}
351EXPORT_SYMBOL(__skb_tx_hash); 625EXPORT_SYMBOL(skb_get_hash_perturb);
352 626
353u32 __skb_get_poff(const struct sk_buff *skb, void *data, 627u32 __skb_get_poff(const struct sk_buff *skb, void *data,
354 const struct flow_keys *keys, int hlen) 628 const struct flow_keys *keys, int hlen)
355{ 629{
356 u32 poff = keys->thoff; 630 u32 poff = keys->control.thoff;
357 631
358 switch (keys->ip_proto) { 632 switch (keys->basic.ip_proto) {
359 case IPPROTO_TCP: { 633 case IPPROTO_TCP: {
360 /* access doff as u8 to avoid unaligned access */ 634 /* access doff as u8 to avoid unaligned access */
361 const u8 *doff; 635 const u8 *doff;
@@ -396,8 +670,12 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
396 return poff; 670 return poff;
397} 671}
398 672
399/* skb_get_poff() returns the offset to the payload as far as it could 673/**
400 * be dissected. The main user is currently BPF, so that we can dynamically 674 * skb_get_poff - get the offset to the payload
675 * @skb: sk_buff to get the payload offset from
676 *
677 * The function will get the offset to the payload as far as it could
678 * be dissected. The main user is currently BPF, so that we can dynamically
401 * truncate packets without needing to push actual payload to the user 679 * truncate packets without needing to push actual payload to the user
402 * space and can analyze headers only, instead. 680 * space and can analyze headers only, instead.
403 */ 681 */
@@ -405,86 +683,76 @@ u32 skb_get_poff(const struct sk_buff *skb)
405{ 683{
406 struct flow_keys keys; 684 struct flow_keys keys;
407 685
408 if (!skb_flow_dissect(skb, &keys)) 686 if (!skb_flow_dissect_flow_keys(skb, &keys))
409 return 0; 687 return 0;
410 688
411 return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); 689 return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
412} 690}
413 691
414static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) 692static const struct flow_dissector_key flow_keys_dissector_keys[] = {
693 {
694 .key_id = FLOW_DISSECTOR_KEY_CONTROL,
695 .offset = offsetof(struct flow_keys, control),
696 },
697 {
698 .key_id = FLOW_DISSECTOR_KEY_BASIC,
699 .offset = offsetof(struct flow_keys, basic),
700 },
701 {
702 .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
703 .offset = offsetof(struct flow_keys, addrs.v4addrs),
704 },
705 {
706 .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
707 .offset = offsetof(struct flow_keys, addrs.v6addrs),
708 },
709 {
710 .key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS,
711 .offset = offsetof(struct flow_keys, addrs.tipcaddrs),
712 },
713 {
714 .key_id = FLOW_DISSECTOR_KEY_PORTS,
715 .offset = offsetof(struct flow_keys, ports),
716 },
717 {
718 .key_id = FLOW_DISSECTOR_KEY_VLANID,
719 .offset = offsetof(struct flow_keys, tags),
720 },
721 {
722 .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL,
723 .offset = offsetof(struct flow_keys, tags),
724 },
725 {
726 .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID,
727 .offset = offsetof(struct flow_keys, keyid),
728 },
729};
730
731static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
732 {
733 .key_id = FLOW_DISSECTOR_KEY_CONTROL,
734 .offset = offsetof(struct flow_keys, control),
735 },
736 {
737 .key_id = FLOW_DISSECTOR_KEY_BASIC,
738 .offset = offsetof(struct flow_keys, basic),
739 },
740};
741
742struct flow_dissector flow_keys_dissector __read_mostly;
743EXPORT_SYMBOL(flow_keys_dissector);
744
745struct flow_dissector flow_keys_buf_dissector __read_mostly;
746
747static int __init init_default_flow_dissectors(void)
415{ 748{
416#ifdef CONFIG_XPS 749 skb_flow_dissector_init(&flow_keys_dissector,
417 struct xps_dev_maps *dev_maps; 750 flow_keys_dissector_keys,
418 struct xps_map *map; 751 ARRAY_SIZE(flow_keys_dissector_keys));
419 int queue_index = -1; 752 skb_flow_dissector_init(&flow_keys_buf_dissector,
420 753 flow_keys_buf_dissector_keys,
421 rcu_read_lock(); 754 ARRAY_SIZE(flow_keys_buf_dissector_keys));
422 dev_maps = rcu_dereference(dev->xps_maps); 755 return 0;
423 if (dev_maps) {
424 map = rcu_dereference(
425 dev_maps->cpu_map[skb->sender_cpu - 1]);
426 if (map) {
427 if (map->len == 1)
428 queue_index = map->queues[0];
429 else
430 queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
431 map->len)];
432 if (unlikely(queue_index >= dev->real_num_tx_queues))
433 queue_index = -1;
434 }
435 }
436 rcu_read_unlock();
437
438 return queue_index;
439#else
440 return -1;
441#endif
442}
443
444static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
445{
446 struct sock *sk = skb->sk;
447 int queue_index = sk_tx_queue_get(sk);
448
449 if (queue_index < 0 || skb->ooo_okay ||
450 queue_index >= dev->real_num_tx_queues) {
451 int new_index = get_xps_queue(dev, skb);
452 if (new_index < 0)
453 new_index = skb_tx_hash(dev, skb);
454
455 if (queue_index != new_index && sk &&
456 rcu_access_pointer(sk->sk_dst_cache))
457 sk_tx_queue_set(sk, new_index);
458
459 queue_index = new_index;
460 }
461
462 return queue_index;
463} 756}
464 757
465struct netdev_queue *netdev_pick_tx(struct net_device *dev, 758late_initcall_sync(init_default_flow_dissectors);
466 struct sk_buff *skb,
467 void *accel_priv)
468{
469 int queue_index = 0;
470
471#ifdef CONFIG_XPS
472 if (skb->sender_cpu == 0)
473 skb->sender_cpu = raw_smp_processor_id() + 1;
474#endif
475
476 if (dev->real_num_tx_queues != 1) {
477 const struct net_device_ops *ops = dev->netdev_ops;
478 if (ops->ndo_select_queue)
479 queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
480 __netdev_pick_tx);
481 else
482 queue_index = __netdev_pick_tx(dev, skb);
483
484 if (!accel_priv)
485 queue_index = netdev_cap_txqueue(dev, queue_index);
486 }
487
488 skb_set_queue_mapping(skb, queue_index);
489 return netdev_get_tx_queue(dev, queue_index);
490}
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 9dfb88a933e7..92d886f4adcb 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -66,7 +66,7 @@
66 66
67 NOTES. 67 NOTES.
68 68
69 * avbps is scaled by 2^5, avpps is scaled by 2^10. 69 * avbps and avpps are scaled by 2^5.
70 * both values are reported as 32 bit unsigned values. bps can 70 * both values are reported as 32 bit unsigned values. bps can
71 overflow for fast links : max speed being 34360Mbit/sec 71 overflow for fast links : max speed being 34360Mbit/sec
72 * Minimal interval is HZ/4=250msec (it is the greatest common divisor 72 * Minimal interval is HZ/4=250msec (it is the greatest common divisor
@@ -85,10 +85,10 @@ struct gen_estimator
85 struct gnet_stats_rate_est64 *rate_est; 85 struct gnet_stats_rate_est64 *rate_est;
86 spinlock_t *stats_lock; 86 spinlock_t *stats_lock;
87 int ewma_log; 87 int ewma_log;
88 u32 last_packets;
89 unsigned long avpps;
88 u64 last_bytes; 90 u64 last_bytes;
89 u64 avbps; 91 u64 avbps;
90 u32 last_packets;
91 u32 avpps;
92 struct rcu_head e_rcu; 92 struct rcu_head e_rcu;
93 struct rb_node node; 93 struct rb_node node;
94 struct gnet_stats_basic_cpu __percpu *cpu_bstats; 94 struct gnet_stats_basic_cpu __percpu *cpu_bstats;
@@ -118,8 +118,8 @@ static void est_timer(unsigned long arg)
118 rcu_read_lock(); 118 rcu_read_lock();
119 list_for_each_entry_rcu(e, &elist[idx].list, list) { 119 list_for_each_entry_rcu(e, &elist[idx].list, list) {
120 struct gnet_stats_basic_packed b = {0}; 120 struct gnet_stats_basic_packed b = {0};
121 unsigned long rate;
121 u64 brate; 122 u64 brate;
122 u32 rate;
123 123
124 spin_lock(e->stats_lock); 124 spin_lock(e->stats_lock);
125 read_lock(&est_lock); 125 read_lock(&est_lock);
@@ -133,10 +133,11 @@ static void est_timer(unsigned long arg)
133 e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log); 133 e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
134 e->rate_est->bps = (e->avbps+0xF)>>5; 134 e->rate_est->bps = (e->avbps+0xF)>>5;
135 135
136 rate = (b.packets - e->last_packets)<<(12 - idx); 136 rate = b.packets - e->last_packets;
137 rate <<= (7 - idx);
137 e->last_packets = b.packets; 138 e->last_packets = b.packets;
138 e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log); 139 e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
139 e->rate_est->pps = (e->avpps+0x1FF)>>10; 140 e->rate_est->pps = (e->avpps + 0xF) >> 5;
140skip: 141skip:
141 read_unlock(&est_lock); 142 read_unlock(&est_lock);
142 spin_unlock(e->stats_lock); 143 spin_unlock(e->stats_lock);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 3de654256028..84195dacb8b6 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -913,6 +913,7 @@ static void neigh_timer_handler(unsigned long arg)
913 neigh->nud_state = NUD_PROBE; 913 neigh->nud_state = NUD_PROBE;
914 neigh->updated = jiffies; 914 neigh->updated = jiffies;
915 atomic_set(&neigh->probes, 0); 915 atomic_set(&neigh->probes, 0);
916 notify = 1;
916 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); 917 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
917 } 918 }
918 } else { 919 } else {
@@ -957,6 +958,8 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
957 rc = 0; 958 rc = 0;
958 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) 959 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
959 goto out_unlock_bh; 960 goto out_unlock_bh;
961 if (neigh->dead)
962 goto out_dead;
960 963
961 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { 964 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
962 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + 965 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
@@ -1013,6 +1016,13 @@ out_unlock_bh:
1013 write_unlock(&neigh->lock); 1016 write_unlock(&neigh->lock);
1014 local_bh_enable(); 1017 local_bh_enable();
1015 return rc; 1018 return rc;
1019
1020out_dead:
1021 if (neigh->nud_state & NUD_STALE)
1022 goto out_unlock_bh;
1023 write_unlock_bh(&neigh->lock);
1024 kfree_skb(skb);
1025 return 1;
1016} 1026}
1017EXPORT_SYMBOL(__neigh_event_send); 1027EXPORT_SYMBOL(__neigh_event_send);
1018 1028
@@ -1076,6 +1086,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1076 if (!(flags & NEIGH_UPDATE_F_ADMIN) && 1086 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1077 (old & (NUD_NOARP | NUD_PERMANENT))) 1087 (old & (NUD_NOARP | NUD_PERMANENT)))
1078 goto out; 1088 goto out;
1089 if (neigh->dead)
1090 goto out;
1079 1091
1080 if (!(new & NUD_VALID)) { 1092 if (!(new & NUD_VALID)) {
1081 neigh_del_timer(neigh); 1093 neigh_del_timer(neigh);
@@ -1144,6 +1156,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1144 1156
1145 if (new != old) { 1157 if (new != old) {
1146 neigh_del_timer(neigh); 1158 neigh_del_timer(neigh);
1159 if (new & NUD_PROBE)
1160 atomic_set(&neigh->probes, 0);
1147 if (new & NUD_IN_TIMER) 1161 if (new & NUD_IN_TIMER)
1148 neigh_add_timer(neigh, (jiffies + 1162 neigh_add_timer(neigh, (jiffies +
1149 ((new & NUD_REACHABLE) ? 1163 ((new & NUD_REACHABLE) ?
@@ -1225,6 +1239,8 @@ EXPORT_SYMBOL(neigh_update);
1225 */ 1239 */
1226void __neigh_set_probe_once(struct neighbour *neigh) 1240void __neigh_set_probe_once(struct neighbour *neigh)
1227{ 1241{
1242 if (neigh->dead)
1243 return;
1228 neigh->updated = jiffies; 1244 neigh->updated = jiffies;
1229 if (!(neigh->nud_state & NUD_FAILED)) 1245 if (!(neigh->nud_state & NUD_FAILED))
1230 return; 1246 return;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 4238d6da5c60..18b34d771ed4 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -458,11 +458,15 @@ static ssize_t phys_switch_id_show(struct device *dev,
458 return restart_syscall(); 458 return restart_syscall();
459 459
460 if (dev_isalive(netdev)) { 460 if (dev_isalive(netdev)) {
461 struct netdev_phys_item_id ppid; 461 struct switchdev_attr attr = {
462 .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
463 .flags = SWITCHDEV_F_NO_RECURSE,
464 };
462 465
463 ret = netdev_switch_parent_id_get(netdev, &ppid); 466 ret = switchdev_port_attr_get(netdev, &attr);
464 if (!ret) 467 if (!ret)
465 ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id); 468 ret = sprintf(buf, "%*phN\n", attr.u.ppid.id_len,
469 attr.u.ppid.id);
466 } 470 }
467 rtnl_unlock(); 471 rtnl_unlock();
468 472
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 572af0011997..2c2eb1b629b1 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -147,24 +147,17 @@ static void ops_free_list(const struct pernet_operations *ops,
147 } 147 }
148} 148}
149 149
150static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd, 150/* should be called with nsid_lock held */
151 int id);
152static int alloc_netid(struct net *net, struct net *peer, int reqid) 151static int alloc_netid(struct net *net, struct net *peer, int reqid)
153{ 152{
154 int min = 0, max = 0, id; 153 int min = 0, max = 0;
155
156 ASSERT_RTNL();
157 154
158 if (reqid >= 0) { 155 if (reqid >= 0) {
159 min = reqid; 156 min = reqid;
160 max = reqid + 1; 157 max = reqid + 1;
161 } 158 }
162 159
163 id = idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL); 160 return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC);
164 if (id >= 0)
165 rtnl_net_notifyid(net, peer, RTM_NEWNSID, id);
166
167 return id;
168} 161}
169 162
170/* This function is used by idr_for_each(). If net is equal to peer, the 163/* This function is used by idr_for_each(). If net is equal to peer, the
@@ -180,11 +173,16 @@ static int net_eq_idr(int id, void *net, void *peer)
180 return 0; 173 return 0;
181} 174}
182 175
183static int __peernet2id(struct net *net, struct net *peer, bool alloc) 176/* Should be called with nsid_lock held. If a new id is assigned, the bool alloc
177 * is set to true, thus the caller knows that the new id must be notified via
178 * rtnl.
179 */
180static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc)
184{ 181{
185 int id = idr_for_each(&net->netns_ids, net_eq_idr, peer); 182 int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
183 bool alloc_it = *alloc;
186 184
187 ASSERT_RTNL(); 185 *alloc = false;
188 186
189 /* Magic value for id 0. */ 187 /* Magic value for id 0. */
190 if (id == NET_ID_ZERO) 188 if (id == NET_ID_ZERO)
@@ -192,36 +190,77 @@ static int __peernet2id(struct net *net, struct net *peer, bool alloc)
192 if (id > 0) 190 if (id > 0)
193 return id; 191 return id;
194 192
195 if (alloc) 193 if (alloc_it) {
196 return alloc_netid(net, peer, -1); 194 id = alloc_netid(net, peer, -1);
195 *alloc = true;
196 return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
197 }
198
199 return NETNSA_NSID_NOT_ASSIGNED;
200}
201
202/* should be called with nsid_lock held */
203static int __peernet2id(struct net *net, struct net *peer)
204{
205 bool no = false;
197 206
198 return -ENOENT; 207 return __peernet2id_alloc(net, peer, &no);
199} 208}
200 209
210static void rtnl_net_notifyid(struct net *net, int cmd, int id);
201/* This function returns the id of a peer netns. If no id is assigned, one will 211/* This function returns the id of a peer netns. If no id is assigned, one will
202 * be allocated and returned. 212 * be allocated and returned.
203 */ 213 */
214int peernet2id_alloc(struct net *net, struct net *peer)
215{
216 unsigned long flags;
217 bool alloc;
218 int id;
219
220 spin_lock_irqsave(&net->nsid_lock, flags);
221 alloc = atomic_read(&peer->count) == 0 ? false : true;
222 id = __peernet2id_alloc(net, peer, &alloc);
223 spin_unlock_irqrestore(&net->nsid_lock, flags);
224 if (alloc && id >= 0)
225 rtnl_net_notifyid(net, RTM_NEWNSID, id);
226 return id;
227}
228EXPORT_SYMBOL(peernet2id_alloc);
229
230/* This function returns, if assigned, the id of a peer netns. */
204int peernet2id(struct net *net, struct net *peer) 231int peernet2id(struct net *net, struct net *peer)
205{ 232{
206 bool alloc = atomic_read(&peer->count) == 0 ? false : true; 233 unsigned long flags;
207 int id; 234 int id;
208 235
209 id = __peernet2id(net, peer, alloc); 236 spin_lock_irqsave(&net->nsid_lock, flags);
210 return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; 237 id = __peernet2id(net, peer);
238 spin_unlock_irqrestore(&net->nsid_lock, flags);
239 return id;
240}
241
242/* This function returns true is the peer netns has an id assigned into the
243 * current netns.
244 */
245bool peernet_has_id(struct net *net, struct net *peer)
246{
247 return peernet2id(net, peer) >= 0;
211} 248}
212EXPORT_SYMBOL(peernet2id);
213 249
214struct net *get_net_ns_by_id(struct net *net, int id) 250struct net *get_net_ns_by_id(struct net *net, int id)
215{ 251{
252 unsigned long flags;
216 struct net *peer; 253 struct net *peer;
217 254
218 if (id < 0) 255 if (id < 0)
219 return NULL; 256 return NULL;
220 257
221 rcu_read_lock(); 258 rcu_read_lock();
259 spin_lock_irqsave(&net->nsid_lock, flags);
222 peer = idr_find(&net->netns_ids, id); 260 peer = idr_find(&net->netns_ids, id);
223 if (peer) 261 if (peer)
224 get_net(peer); 262 get_net(peer);
263 spin_unlock_irqrestore(&net->nsid_lock, flags);
225 rcu_read_unlock(); 264 rcu_read_unlock();
226 265
227 return peer; 266 return peer;
@@ -242,6 +281,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
242 net->dev_base_seq = 1; 281 net->dev_base_seq = 1;
243 net->user_ns = user_ns; 282 net->user_ns = user_ns;
244 idr_init(&net->netns_ids); 283 idr_init(&net->netns_ids);
284 spin_lock_init(&net->nsid_lock);
245 285
246 list_for_each_entry(ops, &pernet_list, list) { 286 list_for_each_entry(ops, &pernet_list, list) {
247 error = ops_init(ops, net); 287 error = ops_init(ops, net);
@@ -362,14 +402,19 @@ static void cleanup_net(struct work_struct *work)
362 list_del_rcu(&net->list); 402 list_del_rcu(&net->list);
363 list_add_tail(&net->exit_list, &net_exit_list); 403 list_add_tail(&net->exit_list, &net_exit_list);
364 for_each_net(tmp) { 404 for_each_net(tmp) {
365 int id = __peernet2id(tmp, net, false); 405 int id;
366 406
367 if (id >= 0) { 407 spin_lock_irq(&tmp->nsid_lock);
368 rtnl_net_notifyid(tmp, net, RTM_DELNSID, id); 408 id = __peernet2id(tmp, net);
409 if (id >= 0)
369 idr_remove(&tmp->netns_ids, id); 410 idr_remove(&tmp->netns_ids, id);
370 } 411 spin_unlock_irq(&tmp->nsid_lock);
412 if (id >= 0)
413 rtnl_net_notifyid(tmp, RTM_DELNSID, id);
371 } 414 }
415 spin_lock_irq(&net->nsid_lock);
372 idr_destroy(&net->netns_ids); 416 idr_destroy(&net->netns_ids);
417 spin_unlock_irq(&net->nsid_lock);
373 418
374 } 419 }
375 rtnl_unlock(); 420 rtnl_unlock();
@@ -497,6 +542,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
497{ 542{
498 struct net *net = sock_net(skb->sk); 543 struct net *net = sock_net(skb->sk);
499 struct nlattr *tb[NETNSA_MAX + 1]; 544 struct nlattr *tb[NETNSA_MAX + 1];
545 unsigned long flags;
500 struct net *peer; 546 struct net *peer;
501 int nsid, err; 547 int nsid, err;
502 548
@@ -517,14 +563,19 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
517 if (IS_ERR(peer)) 563 if (IS_ERR(peer))
518 return PTR_ERR(peer); 564 return PTR_ERR(peer);
519 565
520 if (__peernet2id(net, peer, false) >= 0) { 566 spin_lock_irqsave(&net->nsid_lock, flags);
567 if (__peernet2id(net, peer) >= 0) {
568 spin_unlock_irqrestore(&net->nsid_lock, flags);
521 err = -EEXIST; 569 err = -EEXIST;
522 goto out; 570 goto out;
523 } 571 }
524 572
525 err = alloc_netid(net, peer, nsid); 573 err = alloc_netid(net, peer, nsid);
526 if (err > 0) 574 spin_unlock_irqrestore(&net->nsid_lock, flags);
575 if (err >= 0) {
576 rtnl_net_notifyid(net, RTM_NEWNSID, err);
527 err = 0; 577 err = 0;
578 }
528out: 579out:
529 put_net(peer); 580 put_net(peer);
530 return err; 581 return err;
@@ -538,14 +589,10 @@ static int rtnl_net_get_size(void)
538} 589}
539 590
540static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, 591static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
541 int cmd, struct net *net, struct net *peer, 592 int cmd, struct net *net, int nsid)
542 int nsid)
543{ 593{
544 struct nlmsghdr *nlh; 594 struct nlmsghdr *nlh;
545 struct rtgenmsg *rth; 595 struct rtgenmsg *rth;
546 int id;
547
548 ASSERT_RTNL();
549 596
550 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags); 597 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
551 if (!nlh) 598 if (!nlh)
@@ -554,14 +601,7 @@ static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
554 rth = nlmsg_data(nlh); 601 rth = nlmsg_data(nlh);
555 rth->rtgen_family = AF_UNSPEC; 602 rth->rtgen_family = AF_UNSPEC;
556 603
557 if (nsid >= 0) { 604 if (nla_put_s32(skb, NETNSA_NSID, nsid))
558 id = nsid;
559 } else {
560 id = __peernet2id(net, peer, false);
561 if (id < 0)
562 id = NETNSA_NSID_NOT_ASSIGNED;
563 }
564 if (nla_put_s32(skb, NETNSA_NSID, id))
565 goto nla_put_failure; 605 goto nla_put_failure;
566 606
567 nlmsg_end(skb, nlh); 607 nlmsg_end(skb, nlh);
@@ -578,7 +618,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
578 struct nlattr *tb[NETNSA_MAX + 1]; 618 struct nlattr *tb[NETNSA_MAX + 1];
579 struct sk_buff *msg; 619 struct sk_buff *msg;
580 struct net *peer; 620 struct net *peer;
581 int err; 621 int err, id;
582 622
583 err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, 623 err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
584 rtnl_net_policy); 624 rtnl_net_policy);
@@ -600,8 +640,9 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
600 goto out; 640 goto out;
601 } 641 }
602 642
643 id = peernet2id(net, peer);
603 err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 644 err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
604 RTM_NEWNSID, net, peer, -1); 645 RTM_NEWNSID, net, id);
605 if (err < 0) 646 if (err < 0)
606 goto err_out; 647 goto err_out;
607 648
@@ -633,7 +674,7 @@ static int rtnl_net_dumpid_one(int id, void *peer, void *data)
633 674
634 ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid, 675 ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid,
635 net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI, 676 net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI,
636 RTM_NEWNSID, net_cb->net, peer, id); 677 RTM_NEWNSID, net_cb->net, id);
637 if (ret < 0) 678 if (ret < 0)
638 return ret; 679 return ret;
639 680
@@ -652,17 +693,17 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
652 .idx = 0, 693 .idx = 0,
653 .s_idx = cb->args[0], 694 .s_idx = cb->args[0],
654 }; 695 };
696 unsigned long flags;
655 697
656 ASSERT_RTNL(); 698 spin_lock_irqsave(&net->nsid_lock, flags);
657
658 idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb); 699 idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
700 spin_unlock_irqrestore(&net->nsid_lock, flags);
659 701
660 cb->args[0] = net_cb.idx; 702 cb->args[0] = net_cb.idx;
661 return skb->len; 703 return skb->len;
662} 704}
663 705
664static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd, 706static void rtnl_net_notifyid(struct net *net, int cmd, int id)
665 int id)
666{ 707{
667 struct sk_buff *msg; 708 struct sk_buff *msg;
668 int err = -ENOMEM; 709 int err = -ENOMEM;
@@ -671,7 +712,7 @@ static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd,
671 if (!msg) 712 if (!msg)
672 goto out; 713 goto out;
673 714
674 err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, peer, id); 715 err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id);
675 if (err < 0) 716 if (err < 0)
676 goto err_out; 717 goto err_out;
677 718
diff --git a/net/core/netevent.c b/net/core/netevent.c
index f17ccd291d39..8b3bc4fac613 100644
--- a/net/core/netevent.c
+++ b/net/core/netevent.c
@@ -31,10 +31,7 @@ static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain);
31 */ 31 */
32int register_netevent_notifier(struct notifier_block *nb) 32int register_netevent_notifier(struct notifier_block *nb)
33{ 33{
34 int err; 34 return atomic_notifier_chain_register(&netevent_notif_chain, nb);
35
36 err = atomic_notifier_chain_register(&netevent_notif_chain, nb);
37 return err;
38} 35}
39EXPORT_SYMBOL_GPL(register_netevent_notifier); 36EXPORT_SYMBOL_GPL(register_netevent_notifier);
40 37
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 508155b283dd..1ebdf1c0d118 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -177,7 +177,7 @@
177#include <asm/dma.h> 177#include <asm/dma.h>
178#include <asm/div64.h> /* do_div */ 178#include <asm/div64.h> /* do_div */
179 179
180#define VERSION "2.74" 180#define VERSION "2.75"
181#define IP_NAME_SZ 32 181#define IP_NAME_SZ 32
182#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ 182#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
183#define MPLS_STACK_BOTTOM htonl(0x00000100) 183#define MPLS_STACK_BOTTOM htonl(0x00000100)
@@ -210,6 +210,10 @@
210#define T_REMDEVALL (1<<2) /* Remove all devs */ 210#define T_REMDEVALL (1<<2) /* Remove all devs */
211#define T_REMDEV (1<<3) /* Remove one dev */ 211#define T_REMDEV (1<<3) /* Remove one dev */
212 212
213/* Xmit modes */
214#define M_START_XMIT 0 /* Default normal TX */
215#define M_NETIF_RECEIVE 1 /* Inject packets into stack */
216
213/* If lock -- protects updating of if_list */ 217/* If lock -- protects updating of if_list */
214#define if_lock(t) spin_lock(&(t->if_lock)); 218#define if_lock(t) spin_lock(&(t->if_lock));
215#define if_unlock(t) spin_unlock(&(t->if_lock)); 219#define if_unlock(t) spin_unlock(&(t->if_lock));
@@ -251,13 +255,14 @@ struct pktgen_dev {
251 * we will do a random selection from within the range. 255 * we will do a random selection from within the range.
252 */ 256 */
253 __u32 flags; 257 __u32 flags;
254 int removal_mark; /* non-zero => the device is marked for 258 int xmit_mode;
255 * removal by worker thread */
256
257 int min_pkt_size; 259 int min_pkt_size;
258 int max_pkt_size; 260 int max_pkt_size;
259 int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ 261 int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */
260 int nfrags; 262 int nfrags;
263 int removal_mark; /* non-zero => the device is marked for
264 * removal by worker thread */
265
261 struct page *page; 266 struct page *page;
262 u64 delay; /* nano-seconds */ 267 u64 delay; /* nano-seconds */
263 268
@@ -507,7 +512,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
507 pktgen_reset_all_threads(pn); 512 pktgen_reset_all_threads(pn);
508 513
509 else 514 else
510 pr_warn("Unknown command: %s\n", data); 515 return -EINVAL;
511 516
512 return count; 517 return count;
513} 518}
@@ -567,7 +572,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
567 " dst_min: %s dst_max: %s\n", 572 " dst_min: %s dst_max: %s\n",
568 pkt_dev->dst_min, pkt_dev->dst_max); 573 pkt_dev->dst_min, pkt_dev->dst_max);
569 seq_printf(seq, 574 seq_printf(seq,
570 " src_min: %s src_max: %s\n", 575 " src_min: %s src_max: %s\n",
571 pkt_dev->src_min, pkt_dev->src_max); 576 pkt_dev->src_min, pkt_dev->src_max);
572 } 577 }
573 578
@@ -620,6 +625,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
620 if (pkt_dev->node >= 0) 625 if (pkt_dev->node >= 0)
621 seq_printf(seq, " node: %d\n", pkt_dev->node); 626 seq_printf(seq, " node: %d\n", pkt_dev->node);
622 627
628 if (pkt_dev->xmit_mode == M_NETIF_RECEIVE)
629 seq_puts(seq, " xmit_mode: netif_receive\n");
630
623 seq_puts(seq, " Flags: "); 631 seq_puts(seq, " Flags: ");
624 632
625 if (pkt_dev->flags & F_IPV6) 633 if (pkt_dev->flags & F_IPV6)
@@ -1081,7 +1089,8 @@ static ssize_t pktgen_if_write(struct file *file,
1081 if (len < 0) 1089 if (len < 0)
1082 return len; 1090 return len;
1083 if ((value > 0) && 1091 if ((value > 0) &&
1084 (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) 1092 ((pkt_dev->xmit_mode == M_NETIF_RECEIVE) ||
1093 !(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
1085 return -ENOTSUPP; 1094 return -ENOTSUPP;
1086 i += len; 1095 i += len;
1087 pkt_dev->clone_skb = value; 1096 pkt_dev->clone_skb = value;
@@ -1134,7 +1143,7 @@ static ssize_t pktgen_if_write(struct file *file,
1134 return len; 1143 return len;
1135 1144
1136 i += len; 1145 i += len;
1137 if ((value > 1) && 1146 if ((value > 1) && (pkt_dev->xmit_mode == M_START_XMIT) &&
1138 (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) 1147 (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
1139 return -ENOTSUPP; 1148 return -ENOTSUPP;
1140 pkt_dev->burst = value < 1 ? 1 : value; 1149 pkt_dev->burst = value < 1 ? 1 : value;
@@ -1160,6 +1169,45 @@ static ssize_t pktgen_if_write(struct file *file,
1160 sprintf(pg_result, "ERROR: node not possible"); 1169 sprintf(pg_result, "ERROR: node not possible");
1161 return count; 1170 return count;
1162 } 1171 }
1172 if (!strcmp(name, "xmit_mode")) {
1173 char f[32];
1174
1175 memset(f, 0, 32);
1176 len = strn_len(&user_buffer[i], sizeof(f) - 1);
1177 if (len < 0)
1178 return len;
1179
1180 if (copy_from_user(f, &user_buffer[i], len))
1181 return -EFAULT;
1182 i += len;
1183
1184 if (strcmp(f, "start_xmit") == 0) {
1185 pkt_dev->xmit_mode = M_START_XMIT;
1186 } else if (strcmp(f, "netif_receive") == 0) {
1187 /* clone_skb set earlier, not supported in this mode */
1188 if (pkt_dev->clone_skb > 0)
1189 return -ENOTSUPP;
1190
1191 pkt_dev->xmit_mode = M_NETIF_RECEIVE;
1192
1193 /* make sure new packet is allocated every time
1194 * pktgen_xmit() is called
1195 */
1196 pkt_dev->last_ok = 1;
1197
1198 /* override clone_skb if user passed default value
1199 * at module loading time
1200 */
1201 pkt_dev->clone_skb = 0;
1202 } else {
1203 sprintf(pg_result,
1204 "xmit_mode -:%s:- unknown\nAvailable modes: %s",
1205 f, "start_xmit, netif_receive\n");
1206 return count;
1207 }
1208 sprintf(pg_result, "OK: xmit_mode=%s", f);
1209 return count;
1210 }
1163 if (!strcmp(name, "flag")) { 1211 if (!strcmp(name, "flag")) {
1164 char f[32]; 1212 char f[32];
1165 memset(f, 0, 32); 1213 memset(f, 0, 32);
@@ -1267,6 +1315,9 @@ static ssize_t pktgen_if_write(struct file *file,
1267 else if (strcmp(f, "NO_TIMESTAMP") == 0) 1315 else if (strcmp(f, "NO_TIMESTAMP") == 0)
1268 pkt_dev->flags |= F_NO_TIMESTAMP; 1316 pkt_dev->flags |= F_NO_TIMESTAMP;
1269 1317
1318 else if (strcmp(f, "!NO_TIMESTAMP") == 0)
1319 pkt_dev->flags &= ~F_NO_TIMESTAMP;
1320
1270 else { 1321 else {
1271 sprintf(pg_result, 1322 sprintf(pg_result,
1272 "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", 1323 "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
@@ -2212,8 +2263,6 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
2212 do { 2263 do {
2213 set_current_state(TASK_INTERRUPTIBLE); 2264 set_current_state(TASK_INTERRUPTIBLE);
2214 hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); 2265 hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
2215 if (!hrtimer_active(&t.timer))
2216 t.task = NULL;
2217 2266
2218 if (likely(t.task)) 2267 if (likely(t.task))
2219 schedule(); 2268 schedule();
@@ -2594,9 +2643,9 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
2594 struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; 2643 struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x;
2595 int nhead = 0; 2644 int nhead = 0;
2596 if (x) { 2645 if (x) {
2597 int ret; 2646 struct ethhdr *eth;
2598 __u8 *eth;
2599 struct iphdr *iph; 2647 struct iphdr *iph;
2648 int ret;
2600 2649
2601 nhead = x->props.header_len - skb_headroom(skb); 2650 nhead = x->props.header_len - skb_headroom(skb);
2602 if (nhead > 0) { 2651 if (nhead > 0) {
@@ -2616,9 +2665,9 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
2616 goto err; 2665 goto err;
2617 } 2666 }
2618 /* restore ll */ 2667 /* restore ll */
2619 eth = (__u8 *) skb_push(skb, ETH_HLEN); 2668 eth = (struct ethhdr *)skb_push(skb, ETH_HLEN);
2620 memcpy(eth, pkt_dev->hh, 12); 2669 memcpy(eth, pkt_dev->hh, 2 * ETH_ALEN);
2621 *(u16 *) &eth[12] = protocol; 2670 eth->h_proto = protocol;
2622 2671
2623 /* Update IPv4 header len as well as checksum value */ 2672 /* Update IPv4 header len as well as checksum value */
2624 iph = ip_hdr(skb); 2673 iph = ip_hdr(skb);
@@ -3317,6 +3366,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3317 unsigned int burst = ACCESS_ONCE(pkt_dev->burst); 3366 unsigned int burst = ACCESS_ONCE(pkt_dev->burst);
3318 struct net_device *odev = pkt_dev->odev; 3367 struct net_device *odev = pkt_dev->odev;
3319 struct netdev_queue *txq; 3368 struct netdev_queue *txq;
3369 struct sk_buff *skb;
3320 int ret; 3370 int ret;
3321 3371
3322 /* If device is offline, then don't send */ 3372 /* If device is offline, then don't send */
@@ -3354,6 +3404,37 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3354 if (pkt_dev->delay && pkt_dev->last_ok) 3404 if (pkt_dev->delay && pkt_dev->last_ok)
3355 spin(pkt_dev, pkt_dev->next_tx); 3405 spin(pkt_dev, pkt_dev->next_tx);
3356 3406
3407 if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) {
3408 skb = pkt_dev->skb;
3409 skb->protocol = eth_type_trans(skb, skb->dev);
3410 atomic_add(burst, &skb->users);
3411 local_bh_disable();
3412 do {
3413 ret = netif_receive_skb(skb);
3414 if (ret == NET_RX_DROP)
3415 pkt_dev->errors++;
3416 pkt_dev->sofar++;
3417 pkt_dev->seq_num++;
3418 if (atomic_read(&skb->users) != burst) {
3419 /* skb was queued by rps/rfs or taps,
3420 * so cannot reuse this skb
3421 */
3422 atomic_sub(burst - 1, &skb->users);
3423 /* get out of the loop and wait
3424 * until skb is consumed
3425 */
3426 break;
3427 }
3428 /* skb was 'freed' by stack, so clean few
3429 * bits and reuse it
3430 */
3431#ifdef CONFIG_NET_CLS_ACT
3432 skb->tc_verd = 0; /* reset reclass/redir ttl */
3433#endif
3434 } while (--burst > 0);
3435 goto out; /* Skips xmit_mode M_START_XMIT */
3436 }
3437
3357 txq = skb_get_tx_queue(odev, pkt_dev->skb); 3438 txq = skb_get_tx_queue(odev, pkt_dev->skb);
3358 3439
3359 local_bh_disable(); 3440 local_bh_disable();
@@ -3401,6 +3482,7 @@ xmit_more:
3401unlock: 3482unlock:
3402 HARD_TX_UNLOCK(odev, txq); 3483 HARD_TX_UNLOCK(odev, txq);
3403 3484
3485out:
3404 local_bh_enable(); 3486 local_bh_enable();
3405 3487
3406 /* If pkt_dev->count is zero, then run forever */ 3488 /* If pkt_dev->count is zero, then run forever */
@@ -3489,13 +3571,6 @@ static int pktgen_thread_worker(void *arg)
3489 pr_debug("%s removing thread\n", t->tsk->comm); 3571 pr_debug("%s removing thread\n", t->tsk->comm);
3490 pktgen_rem_thread(t); 3572 pktgen_rem_thread(t);
3491 3573
3492 /* Wait for kthread_stop */
3493 while (!kthread_should_stop()) {
3494 set_current_state(TASK_INTERRUPTIBLE);
3495 schedule();
3496 }
3497 __set_current_state(TASK_RUNNING);
3498
3499 return 0; 3574 return 0;
3500} 3575}
3501 3576
@@ -3687,6 +3762,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn)
3687 } 3762 }
3688 3763
3689 t->net = pn; 3764 t->net = pn;
3765 get_task_struct(p);
3690 wake_up_process(p); 3766 wake_up_process(p);
3691 wait_for_completion(&t->start_done); 3767 wait_for_completion(&t->start_done);
3692 3768
@@ -3809,6 +3885,7 @@ static void __net_exit pg_net_exit(struct net *net)
3809 t = list_entry(q, struct pktgen_thread, th_list); 3885 t = list_entry(q, struct pktgen_thread, th_list);
3810 list_del(&t->th_list); 3886 list_del(&t->th_list);
3811 kthread_stop(t->tsk); 3887 kthread_stop(t->tsk);
3888 put_task_struct(t->tsk);
3812 kfree(t); 3889 kfree(t);
3813 } 3890 }
3814 3891
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8de36824018d..9e433d58d265 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -819,7 +819,19 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
819 nla_total_size(sizeof(struct ifla_vf_spoofchk)) + 819 nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
820 nla_total_size(sizeof(struct ifla_vf_rate)) + 820 nla_total_size(sizeof(struct ifla_vf_rate)) +
821 nla_total_size(sizeof(struct ifla_vf_link_state)) + 821 nla_total_size(sizeof(struct ifla_vf_link_state)) +
822 nla_total_size(sizeof(struct ifla_vf_rss_query_en))); 822 nla_total_size(sizeof(struct ifla_vf_rss_query_en)) +
823 /* IFLA_VF_STATS_RX_PACKETS */
824 nla_total_size(sizeof(__u64)) +
825 /* IFLA_VF_STATS_TX_PACKETS */
826 nla_total_size(sizeof(__u64)) +
827 /* IFLA_VF_STATS_RX_BYTES */
828 nla_total_size(sizeof(__u64)) +
829 /* IFLA_VF_STATS_TX_BYTES */
830 nla_total_size(sizeof(__u64)) +
831 /* IFLA_VF_STATS_BROADCAST */
832 nla_total_size(sizeof(__u64)) +
833 /* IFLA_VF_STATS_MULTICAST */
834 nla_total_size(sizeof(__u64)));
823 return size; 835 return size;
824 } else 836 } else
825 return 0; 837 return 0;
@@ -1004,16 +1016,20 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev)
1004static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) 1016static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
1005{ 1017{
1006 int err; 1018 int err;
1007 struct netdev_phys_item_id psid; 1019 struct switchdev_attr attr = {
1020 .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
1021 .flags = SWITCHDEV_F_NO_RECURSE,
1022 };
1008 1023
1009 err = netdev_switch_parent_id_get(dev, &psid); 1024 err = switchdev_port_attr_get(dev, &attr);
1010 if (err) { 1025 if (err) {
1011 if (err == -EOPNOTSUPP) 1026 if (err == -EOPNOTSUPP)
1012 return 0; 1027 return 0;
1013 return err; 1028 return err;
1014 } 1029 }
1015 1030
1016 if (nla_put(skb, IFLA_PHYS_SWITCH_ID, psid.id_len, psid.id)) 1031 if (nla_put(skb, IFLA_PHYS_SWITCH_ID, attr.u.ppid.id_len,
1032 attr.u.ppid.id))
1017 return -EMSGSIZE; 1033 return -EMSGSIZE;
1018 1034
1019 return 0; 1035 return 0;
@@ -1119,7 +1135,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1119 && (ext_filter_mask & RTEXT_FILTER_VF)) { 1135 && (ext_filter_mask & RTEXT_FILTER_VF)) {
1120 int i; 1136 int i;
1121 1137
1122 struct nlattr *vfinfo, *vf; 1138 struct nlattr *vfinfo, *vf, *vfstats;
1123 int num_vfs = dev_num_vf(dev->dev.parent); 1139 int num_vfs = dev_num_vf(dev->dev.parent);
1124 1140
1125 vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); 1141 vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
@@ -1134,6 +1150,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1134 struct ifla_vf_spoofchk vf_spoofchk; 1150 struct ifla_vf_spoofchk vf_spoofchk;
1135 struct ifla_vf_link_state vf_linkstate; 1151 struct ifla_vf_link_state vf_linkstate;
1136 struct ifla_vf_rss_query_en vf_rss_query_en; 1152 struct ifla_vf_rss_query_en vf_rss_query_en;
1153 struct ifla_vf_stats vf_stats;
1137 1154
1138 /* 1155 /*
1139 * Not all SR-IOV capable drivers support the 1156 * Not all SR-IOV capable drivers support the
@@ -1186,6 +1203,30 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1186 sizeof(vf_rss_query_en), 1203 sizeof(vf_rss_query_en),
1187 &vf_rss_query_en)) 1204 &vf_rss_query_en))
1188 goto nla_put_failure; 1205 goto nla_put_failure;
1206 memset(&vf_stats, 0, sizeof(vf_stats));
1207 if (dev->netdev_ops->ndo_get_vf_stats)
1208 dev->netdev_ops->ndo_get_vf_stats(dev, i,
1209 &vf_stats);
1210 vfstats = nla_nest_start(skb, IFLA_VF_STATS);
1211 if (!vfstats) {
1212 nla_nest_cancel(skb, vf);
1213 nla_nest_cancel(skb, vfinfo);
1214 goto nla_put_failure;
1215 }
1216 if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
1217 vf_stats.rx_packets) ||
1218 nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
1219 vf_stats.tx_packets) ||
1220 nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
1221 vf_stats.rx_bytes) ||
1222 nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
1223 vf_stats.tx_bytes) ||
1224 nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
1225 vf_stats.broadcast) ||
1226 nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
1227 vf_stats.multicast))
1228 goto nla_put_failure;
1229 nla_nest_end(skb, vfstats);
1189 nla_nest_end(skb, vf); 1230 nla_nest_end(skb, vf);
1190 } 1231 }
1191 nla_nest_end(skb, vfinfo); 1232 nla_nest_end(skb, vfinfo);
@@ -1204,7 +1245,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1204 struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); 1245 struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
1205 1246
1206 if (!net_eq(dev_net(dev), link_net)) { 1247 if (!net_eq(dev_net(dev), link_net)) {
1207 int id = peernet2id(dev_net(dev), link_net); 1248 int id = peernet2id_alloc(dev_net(dev), link_net);
1208 1249
1209 if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) 1250 if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
1210 goto nla_put_failure; 1251 goto nla_put_failure;
@@ -1287,10 +1328,6 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
1287 [IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED }, 1328 [IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED },
1288}; 1329};
1289 1330
1290static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
1291 [IFLA_VF_INFO] = { .type = NLA_NESTED },
1292};
1293
1294static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { 1331static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
1295 [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, 1332 [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) },
1296 [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, 1333 [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) },
@@ -1299,6 +1336,16 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
1299 [IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) }, 1336 [IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) },
1300 [IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) }, 1337 [IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) },
1301 [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) }, 1338 [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) },
1339 [IFLA_VF_STATS] = { .type = NLA_NESTED },
1340};
1341
1342static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = {
1343 [IFLA_VF_STATS_RX_PACKETS] = { .type = NLA_U64 },
1344 [IFLA_VF_STATS_TX_PACKETS] = { .type = NLA_U64 },
1345 [IFLA_VF_STATS_RX_BYTES] = { .type = NLA_U64 },
1346 [IFLA_VF_STATS_TX_BYTES] = { .type = NLA_U64 },
1347 [IFLA_VF_STATS_BROADCAST] = { .type = NLA_U64 },
1348 [IFLA_VF_STATS_MULTICAST] = { .type = NLA_U64 },
1302}; 1349};
1303 1350
1304static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { 1351static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1437,96 +1484,98 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
1437 return 0; 1484 return 0;
1438} 1485}
1439 1486
1440static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) 1487static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
1441{ 1488{
1442 int rem, err = -EINVAL;
1443 struct nlattr *vf;
1444 const struct net_device_ops *ops = dev->netdev_ops; 1489 const struct net_device_ops *ops = dev->netdev_ops;
1490 int err = -EINVAL;
1445 1491
1446 nla_for_each_nested(vf, attr, rem) { 1492 if (tb[IFLA_VF_MAC]) {
1447 switch (nla_type(vf)) { 1493 struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]);
1448 case IFLA_VF_MAC: {
1449 struct ifla_vf_mac *ivm;
1450 ivm = nla_data(vf);
1451 err = -EOPNOTSUPP;
1452 if (ops->ndo_set_vf_mac)
1453 err = ops->ndo_set_vf_mac(dev, ivm->vf,
1454 ivm->mac);
1455 break;
1456 }
1457 case IFLA_VF_VLAN: {
1458 struct ifla_vf_vlan *ivv;
1459 ivv = nla_data(vf);
1460 err = -EOPNOTSUPP;
1461 if (ops->ndo_set_vf_vlan)
1462 err = ops->ndo_set_vf_vlan(dev, ivv->vf,
1463 ivv->vlan,
1464 ivv->qos);
1465 break;
1466 }
1467 case IFLA_VF_TX_RATE: {
1468 struct ifla_vf_tx_rate *ivt;
1469 struct ifla_vf_info ivf;
1470 ivt = nla_data(vf);
1471 err = -EOPNOTSUPP;
1472 if (ops->ndo_get_vf_config)
1473 err = ops->ndo_get_vf_config(dev, ivt->vf,
1474 &ivf);
1475 if (err)
1476 break;
1477 err = -EOPNOTSUPP;
1478 if (ops->ndo_set_vf_rate)
1479 err = ops->ndo_set_vf_rate(dev, ivt->vf,
1480 ivf.min_tx_rate,
1481 ivt->rate);
1482 break;
1483 }
1484 case IFLA_VF_RATE: {
1485 struct ifla_vf_rate *ivt;
1486 ivt = nla_data(vf);
1487 err = -EOPNOTSUPP;
1488 if (ops->ndo_set_vf_rate)
1489 err = ops->ndo_set_vf_rate(dev, ivt->vf,
1490 ivt->min_tx_rate,
1491 ivt->max_tx_rate);
1492 break;
1493 }
1494 case IFLA_VF_SPOOFCHK: {
1495 struct ifla_vf_spoofchk *ivs;
1496 ivs = nla_data(vf);
1497 err = -EOPNOTSUPP;
1498 if (ops->ndo_set_vf_spoofchk)
1499 err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
1500 ivs->setting);
1501 break;
1502 }
1503 case IFLA_VF_LINK_STATE: {
1504 struct ifla_vf_link_state *ivl;
1505 ivl = nla_data(vf);
1506 err = -EOPNOTSUPP;
1507 if (ops->ndo_set_vf_link_state)
1508 err = ops->ndo_set_vf_link_state(dev, ivl->vf,
1509 ivl->link_state);
1510 break;
1511 }
1512 case IFLA_VF_RSS_QUERY_EN: {
1513 struct ifla_vf_rss_query_en *ivrssq_en;
1514 1494
1515 ivrssq_en = nla_data(vf); 1495 err = -EOPNOTSUPP;
1516 err = -EOPNOTSUPP; 1496 if (ops->ndo_set_vf_mac)
1517 if (ops->ndo_set_vf_rss_query_en) 1497 err = ops->ndo_set_vf_mac(dev, ivm->vf,
1518 err = ops->ndo_set_vf_rss_query_en(dev, 1498 ivm->mac);
1519 ivrssq_en->vf, 1499 if (err < 0)
1520 ivrssq_en->setting); 1500 return err;
1521 break; 1501 }
1522 } 1502
1523 default: 1503 if (tb[IFLA_VF_VLAN]) {
1524 err = -EINVAL; 1504 struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]);
1525 break; 1505
1526 } 1506 err = -EOPNOTSUPP;
1527 if (err) 1507 if (ops->ndo_set_vf_vlan)
1528 break; 1508 err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan,
1509 ivv->qos);
1510 if (err < 0)
1511 return err;
1512 }
1513
1514 if (tb[IFLA_VF_TX_RATE]) {
1515 struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]);
1516 struct ifla_vf_info ivf;
1517
1518 err = -EOPNOTSUPP;
1519 if (ops->ndo_get_vf_config)
1520 err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf);
1521 if (err < 0)
1522 return err;
1523
1524 err = -EOPNOTSUPP;
1525 if (ops->ndo_set_vf_rate)
1526 err = ops->ndo_set_vf_rate(dev, ivt->vf,
1527 ivf.min_tx_rate,
1528 ivt->rate);
1529 if (err < 0)
1530 return err;
1531 }
1532
1533 if (tb[IFLA_VF_RATE]) {
1534 struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]);
1535
1536 err = -EOPNOTSUPP;
1537 if (ops->ndo_set_vf_rate)
1538 err = ops->ndo_set_vf_rate(dev, ivt->vf,
1539 ivt->min_tx_rate,
1540 ivt->max_tx_rate);
1541 if (err < 0)
1542 return err;
1543 }
1544
1545 if (tb[IFLA_VF_SPOOFCHK]) {
1546 struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]);
1547
1548 err = -EOPNOTSUPP;
1549 if (ops->ndo_set_vf_spoofchk)
1550 err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
1551 ivs->setting);
1552 if (err < 0)
1553 return err;
1529 } 1554 }
1555
1556 if (tb[IFLA_VF_LINK_STATE]) {
1557 struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]);
1558
1559 err = -EOPNOTSUPP;
1560 if (ops->ndo_set_vf_link_state)
1561 err = ops->ndo_set_vf_link_state(dev, ivl->vf,
1562 ivl->link_state);
1563 if (err < 0)
1564 return err;
1565 }
1566
1567 if (tb[IFLA_VF_RSS_QUERY_EN]) {
1568 struct ifla_vf_rss_query_en *ivrssq_en;
1569
1570 err = -EOPNOTSUPP;
1571 ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]);
1572 if (ops->ndo_set_vf_rss_query_en)
1573 err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf,
1574 ivrssq_en->setting);
1575 if (err < 0)
1576 return err;
1577 }
1578
1530 return err; 1579 return err;
1531} 1580}
1532 1581
@@ -1722,14 +1771,21 @@ static int do_setlink(const struct sk_buff *skb,
1722 } 1771 }
1723 1772
1724 if (tb[IFLA_VFINFO_LIST]) { 1773 if (tb[IFLA_VFINFO_LIST]) {
1774 struct nlattr *vfinfo[IFLA_VF_MAX + 1];
1725 struct nlattr *attr; 1775 struct nlattr *attr;
1726 int rem; 1776 int rem;
1777
1727 nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { 1778 nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
1728 if (nla_type(attr) != IFLA_VF_INFO) { 1779 if (nla_type(attr) != IFLA_VF_INFO ||
1780 nla_len(attr) < NLA_HDRLEN) {
1729 err = -EINVAL; 1781 err = -EINVAL;
1730 goto errout; 1782 goto errout;
1731 } 1783 }
1732 err = do_setvfinfo(dev, attr); 1784 err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr,
1785 ifla_vf_policy);
1786 if (err < 0)
1787 goto errout;
1788 err = do_setvfinfo(dev, vfinfo);
1733 if (err < 0) 1789 if (err < 0)
1734 goto errout; 1790 goto errout;
1735 status |= DO_SETLINK_NOTIFY; 1791 status |= DO_SETLINK_NOTIFY;
@@ -2857,7 +2913,11 @@ static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask,
2857 2913
2858int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, 2914int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
2859 struct net_device *dev, u16 mode, 2915 struct net_device *dev, u16 mode,
2860 u32 flags, u32 mask, int nlflags) 2916 u32 flags, u32 mask, int nlflags,
2917 u32 filter_mask,
2918 int (*vlan_fill)(struct sk_buff *skb,
2919 struct net_device *dev,
2920 u32 filter_mask))
2861{ 2921{
2862 struct nlmsghdr *nlh; 2922 struct nlmsghdr *nlh;
2863 struct ifinfomsg *ifm; 2923 struct ifinfomsg *ifm;
@@ -2865,6 +2925,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
2865 struct nlattr *protinfo; 2925 struct nlattr *protinfo;
2866 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; 2926 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
2867 struct net_device *br_dev = netdev_master_upper_dev_get(dev); 2927 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
2928 int err = 0;
2868 2929
2869 nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), nlflags); 2930 nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), nlflags);
2870 if (nlh == NULL) 2931 if (nlh == NULL)
@@ -2905,6 +2966,13 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
2905 goto nla_put_failure; 2966 goto nla_put_failure;
2906 } 2967 }
2907 } 2968 }
2969 if (vlan_fill) {
2970 err = vlan_fill(skb, dev, filter_mask);
2971 if (err) {
2972 nla_nest_cancel(skb, br_afspec);
2973 goto nla_put_failure;
2974 }
2975 }
2908 nla_nest_end(skb, br_afspec); 2976 nla_nest_end(skb, br_afspec);
2909 2977
2910 protinfo = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); 2978 protinfo = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED);
@@ -2938,9 +3006,9 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
2938 return 0; 3006 return 0;
2939nla_put_failure: 3007nla_put_failure:
2940 nlmsg_cancel(skb, nlh); 3008 nlmsg_cancel(skb, nlh);
2941 return -EMSGSIZE; 3009 return err ? err : -EMSGSIZE;
2942} 3010}
2943EXPORT_SYMBOL(ndo_dflt_bridge_getlink); 3011EXPORT_SYMBOL_GPL(ndo_dflt_bridge_getlink);
2944 3012
2945static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) 3013static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
2946{ 3014{
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 51dd3193a33e..fd3ce461fbe6 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -154,7 +154,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
154 net_secret_init(); 154 net_secret_init();
155 memcpy(hash, saddr, 16); 155 memcpy(hash, saddr, 16);
156 for (i = 0; i < 4; i++) 156 for (i = 0; i < 4; i++)
157 secret[i] = net_secret[i] + daddr[i]; 157 secret[i] = net_secret[i] + (__force u32)daddr[i];
158 secret[4] = net_secret[4] + 158 secret[4] = net_secret[4] +
159 (((__force u16)sport << 16) + (__force u16)dport); 159 (((__force u16)sport << 16) + (__force u16)dport);
160 for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) 160 for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 41ec02242ea7..b6a19ca0f99e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -347,94 +347,18 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
347} 347}
348EXPORT_SYMBOL(build_skb); 348EXPORT_SYMBOL(build_skb);
349 349
350struct netdev_alloc_cache { 350static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
351 struct page_frag frag; 351static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache);
352 /* we maintain a pagecount bias, so that we dont dirty cache line
353 * containing page->_count every time we allocate a fragment.
354 */
355 unsigned int pagecnt_bias;
356};
357static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
358static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache);
359
360static struct page *__page_frag_refill(struct netdev_alloc_cache *nc,
361 gfp_t gfp_mask)
362{
363 const unsigned int order = NETDEV_FRAG_PAGE_MAX_ORDER;
364 struct page *page = NULL;
365 gfp_t gfp = gfp_mask;
366
367 if (order) {
368 gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY |
369 __GFP_NOMEMALLOC;
370 page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
371 nc->frag.size = PAGE_SIZE << (page ? order : 0);
372 }
373
374 if (unlikely(!page))
375 page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
376
377 nc->frag.page = page;
378
379 return page;
380}
381
382static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache,
383 unsigned int fragsz, gfp_t gfp_mask)
384{
385 struct netdev_alloc_cache *nc = this_cpu_ptr(cache);
386 struct page *page = nc->frag.page;
387 unsigned int size;
388 int offset;
389
390 if (unlikely(!page)) {
391refill:
392 page = __page_frag_refill(nc, gfp_mask);
393 if (!page)
394 return NULL;
395
396 /* if size can vary use frag.size else just use PAGE_SIZE */
397 size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
398
399 /* Even if we own the page, we do not use atomic_set().
400 * This would break get_page_unless_zero() users.
401 */
402 atomic_add(size - 1, &page->_count);
403
404 /* reset page count bias and offset to start of new frag */
405 nc->pagecnt_bias = size;
406 nc->frag.offset = size;
407 }
408
409 offset = nc->frag.offset - fragsz;
410 if (unlikely(offset < 0)) {
411 if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count))
412 goto refill;
413
414 /* if size can vary use frag.size else just use PAGE_SIZE */
415 size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
416
417 /* OK, page count is 0, we can safely set it */
418 atomic_set(&page->_count, size);
419
420 /* reset page count bias and offset to start of new frag */
421 nc->pagecnt_bias = size;
422 offset = size - fragsz;
423 }
424
425 nc->pagecnt_bias--;
426 nc->frag.offset = offset;
427
428 return page_address(page) + offset;
429}
430 352
431static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) 353static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
432{ 354{
355 struct page_frag_cache *nc;
433 unsigned long flags; 356 unsigned long flags;
434 void *data; 357 void *data;
435 358
436 local_irq_save(flags); 359 local_irq_save(flags);
437 data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask); 360 nc = this_cpu_ptr(&netdev_alloc_cache);
361 data = __alloc_page_frag(nc, fragsz, gfp_mask);
438 local_irq_restore(flags); 362 local_irq_restore(flags);
439 return data; 363 return data;
440} 364}
@@ -454,7 +378,9 @@ EXPORT_SYMBOL(netdev_alloc_frag);
454 378
455static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) 379static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
456{ 380{
457 return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask); 381 struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
382
383 return __alloc_page_frag(nc, fragsz, gfp_mask);
458} 384}
459 385
460void *napi_alloc_frag(unsigned int fragsz) 386void *napi_alloc_frag(unsigned int fragsz)
@@ -464,76 +390,70 @@ void *napi_alloc_frag(unsigned int fragsz)
464EXPORT_SYMBOL(napi_alloc_frag); 390EXPORT_SYMBOL(napi_alloc_frag);
465 391
466/** 392/**
467 * __alloc_rx_skb - allocate an skbuff for rx 393 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
394 * @dev: network device to receive on
468 * @length: length to allocate 395 * @length: length to allocate
469 * @gfp_mask: get_free_pages mask, passed to alloc_skb 396 * @gfp_mask: get_free_pages mask, passed to alloc_skb
470 * @flags: If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
471 * allocations in case we have to fallback to __alloc_skb()
472 * If SKB_ALLOC_NAPI is set, page fragment will be allocated
473 * from napi_cache instead of netdev_cache.
474 * 397 *
475 * Allocate a new &sk_buff and assign it a usage count of one. The 398 * Allocate a new &sk_buff and assign it a usage count of one. The
476 * buffer has unspecified headroom built in. Users should allocate 399 * buffer has NET_SKB_PAD headroom built in. Users should allocate
477 * the headroom they think they need without accounting for the 400 * the headroom they think they need without accounting for the
478 * built in space. The built in space is used for optimisations. 401 * built in space. The built in space is used for optimisations.
479 * 402 *
480 * %NULL is returned if there is no free memory. 403 * %NULL is returned if there is no free memory.
481 */ 404 */
482static struct sk_buff *__alloc_rx_skb(unsigned int length, gfp_t gfp_mask, 405struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
483 int flags) 406 gfp_t gfp_mask)
484{ 407{
485 struct sk_buff *skb = NULL; 408 struct page_frag_cache *nc;
486 unsigned int fragsz = SKB_DATA_ALIGN(length) + 409 unsigned long flags;
487 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 410 struct sk_buff *skb;
411 bool pfmemalloc;
412 void *data;
488 413
489 if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { 414 len += NET_SKB_PAD;
490 void *data;
491 415
492 if (sk_memalloc_socks()) 416 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
493 gfp_mask |= __GFP_MEMALLOC; 417 (gfp_mask & (__GFP_WAIT | GFP_DMA))) {
418 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
419 if (!skb)
420 goto skb_fail;
421 goto skb_success;
422 }
494 423
495 data = (flags & SKB_ALLOC_NAPI) ? 424 len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
496 __napi_alloc_frag(fragsz, gfp_mask) : 425 len = SKB_DATA_ALIGN(len);
497 __netdev_alloc_frag(fragsz, gfp_mask);
498 426
499 if (likely(data)) { 427 if (sk_memalloc_socks())
500 skb = build_skb(data, fragsz); 428 gfp_mask |= __GFP_MEMALLOC;
501 if (unlikely(!skb))
502 put_page(virt_to_head_page(data));
503 }
504 } else {
505 skb = __alloc_skb(length, gfp_mask,
506 SKB_ALLOC_RX, NUMA_NO_NODE);
507 }
508 return skb;
509}
510 429
511/** 430 local_irq_save(flags);
512 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device 431
513 * @dev: network device to receive on 432 nc = this_cpu_ptr(&netdev_alloc_cache);
514 * @length: length to allocate 433 data = __alloc_page_frag(nc, len, gfp_mask);
515 * @gfp_mask: get_free_pages mask, passed to alloc_skb 434 pfmemalloc = nc->pfmemalloc;
516 *
517 * Allocate a new &sk_buff and assign it a usage count of one. The
518 * buffer has NET_SKB_PAD headroom built in. Users should allocate
519 * the headroom they think they need without accounting for the
520 * built in space. The built in space is used for optimisations.
521 *
522 * %NULL is returned if there is no free memory.
523 */
524struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
525 unsigned int length, gfp_t gfp_mask)
526{
527 struct sk_buff *skb;
528 435
529 length += NET_SKB_PAD; 436 local_irq_restore(flags);
530 skb = __alloc_rx_skb(length, gfp_mask, 0);
531 437
532 if (likely(skb)) { 438 if (unlikely(!data))
533 skb_reserve(skb, NET_SKB_PAD); 439 return NULL;
534 skb->dev = dev; 440
441 skb = __build_skb(data, len);
442 if (unlikely(!skb)) {
443 skb_free_frag(data);
444 return NULL;
535 } 445 }
536 446
447 /* use OR instead of assignment to avoid clearing of bits in mask */
448 if (pfmemalloc)
449 skb->pfmemalloc = 1;
450 skb->head_frag = 1;
451
452skb_success:
453 skb_reserve(skb, NET_SKB_PAD);
454 skb->dev = dev;
455
456skb_fail:
537 return skb; 457 return skb;
538} 458}
539EXPORT_SYMBOL(__netdev_alloc_skb); 459EXPORT_SYMBOL(__netdev_alloc_skb);
@@ -551,19 +471,49 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
551 * 471 *
552 * %NULL is returned if there is no free memory. 472 * %NULL is returned if there is no free memory.
553 */ 473 */
554struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, 474struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
555 unsigned int length, gfp_t gfp_mask) 475 gfp_t gfp_mask)
556{ 476{
477 struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
557 struct sk_buff *skb; 478 struct sk_buff *skb;
479 void *data;
558 480
559 length += NET_SKB_PAD + NET_IP_ALIGN; 481 len += NET_SKB_PAD + NET_IP_ALIGN;
560 skb = __alloc_rx_skb(length, gfp_mask, SKB_ALLOC_NAPI);
561 482
562 if (likely(skb)) { 483 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
563 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); 484 (gfp_mask & (__GFP_WAIT | GFP_DMA))) {
564 skb->dev = napi->dev; 485 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
486 if (!skb)
487 goto skb_fail;
488 goto skb_success;
565 } 489 }
566 490
491 len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
492 len = SKB_DATA_ALIGN(len);
493
494 if (sk_memalloc_socks())
495 gfp_mask |= __GFP_MEMALLOC;
496
497 data = __alloc_page_frag(nc, len, gfp_mask);
498 if (unlikely(!data))
499 return NULL;
500
501 skb = __build_skb(data, len);
502 if (unlikely(!skb)) {
503 skb_free_frag(data);
504 return NULL;
505 }
506
507 /* use OR instead of assignment to avoid clearing of bits in mask */
508 if (nc->pfmemalloc)
509 skb->pfmemalloc = 1;
510 skb->head_frag = 1;
511
512skb_success:
513 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
514 skb->dev = napi->dev;
515
516skb_fail:
567 return skb; 517 return skb;
568} 518}
569EXPORT_SYMBOL(__napi_alloc_skb); 519EXPORT_SYMBOL(__napi_alloc_skb);
@@ -611,10 +561,12 @@ static void skb_clone_fraglist(struct sk_buff *skb)
611 561
612static void skb_free_head(struct sk_buff *skb) 562static void skb_free_head(struct sk_buff *skb)
613{ 563{
564 unsigned char *head = skb->head;
565
614 if (skb->head_frag) 566 if (skb->head_frag)
615 put_page(virt_to_head_page(skb->head)); 567 skb_free_frag(head);
616 else 568 else
617 kfree(skb->head); 569 kfree(head);
618} 570}
619 571
620static void skb_release_data(struct sk_buff *skb) 572static void skb_release_data(struct sk_buff *skb)
@@ -1918,15 +1870,39 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
1918 return false; 1870 return false;
1919} 1871}
1920 1872
1873ssize_t skb_socket_splice(struct sock *sk,
1874 struct pipe_inode_info *pipe,
1875 struct splice_pipe_desc *spd)
1876{
1877 int ret;
1878
1879 /* Drop the socket lock, otherwise we have reverse
1880 * locking dependencies between sk_lock and i_mutex
1881 * here as compared to sendfile(). We enter here
1882 * with the socket lock held, and splice_to_pipe() will
1883 * grab the pipe inode lock. For sendfile() emulation,
1884 * we call into ->sendpage() with the i_mutex lock held
1885 * and networking will grab the socket lock.
1886 */
1887 release_sock(sk);
1888 ret = splice_to_pipe(pipe, spd);
1889 lock_sock(sk);
1890
1891 return ret;
1892}
1893
1921/* 1894/*
1922 * Map data from the skb to a pipe. Should handle both the linear part, 1895 * Map data from the skb to a pipe. Should handle both the linear part,
1923 * the fragments, and the frag list. It does NOT handle frag lists within 1896 * the fragments, and the frag list. It does NOT handle frag lists within
1924 * the frag list, if such a thing exists. We'd probably need to recurse to 1897 * the frag list, if such a thing exists. We'd probably need to recurse to
1925 * handle that cleanly. 1898 * handle that cleanly.
1926 */ 1899 */
1927int skb_splice_bits(struct sk_buff *skb, unsigned int offset, 1900int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
1928 struct pipe_inode_info *pipe, unsigned int tlen, 1901 struct pipe_inode_info *pipe, unsigned int tlen,
1929 unsigned int flags) 1902 unsigned int flags,
1903 ssize_t (*splice_cb)(struct sock *,
1904 struct pipe_inode_info *,
1905 struct splice_pipe_desc *))
1930{ 1906{
1931 struct partial_page partial[MAX_SKB_FRAGS]; 1907 struct partial_page partial[MAX_SKB_FRAGS];
1932 struct page *pages[MAX_SKB_FRAGS]; 1908 struct page *pages[MAX_SKB_FRAGS];
@@ -1939,7 +1915,6 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
1939 .spd_release = sock_spd_release, 1915 .spd_release = sock_spd_release,
1940 }; 1916 };
1941 struct sk_buff *frag_iter; 1917 struct sk_buff *frag_iter;
1942 struct sock *sk = skb->sk;
1943 int ret = 0; 1918 int ret = 0;
1944 1919
1945 /* 1920 /*
@@ -1962,23 +1937,12 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
1962 } 1937 }
1963 1938
1964done: 1939done:
1965 if (spd.nr_pages) { 1940 if (spd.nr_pages)
1966 /* 1941 ret = splice_cb(sk, pipe, &spd);
1967 * Drop the socket lock, otherwise we have reverse
1968 * locking dependencies between sk_lock and i_mutex
1969 * here as compared to sendfile(). We enter here
1970 * with the socket lock held, and splice_to_pipe() will
1971 * grab the pipe inode lock. For sendfile() emulation,
1972 * we call into ->sendpage() with the i_mutex lock held
1973 * and networking will grab the socket lock.
1974 */
1975 release_sock(sk);
1976 ret = splice_to_pipe(pipe, &spd);
1977 lock_sock(sk);
1978 }
1979 1942
1980 return ret; 1943 return ret;
1981} 1944}
1945EXPORT_SYMBOL_GPL(skb_splice_bits);
1982 1946
1983/** 1947/**
1984 * skb_store_bits - store bits from kernel buffer to skb 1948 * skb_store_bits - store bits from kernel buffer to skb
@@ -2963,6 +2927,24 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
2963} 2927}
2964EXPORT_SYMBOL(skb_append_datato_frags); 2928EXPORT_SYMBOL(skb_append_datato_frags);
2965 2929
2930int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
2931 int offset, size_t size)
2932{
2933 int i = skb_shinfo(skb)->nr_frags;
2934
2935 if (skb_can_coalesce(skb, i, page, offset)) {
2936 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
2937 } else if (i < MAX_SKB_FRAGS) {
2938 get_page(page);
2939 skb_fill_page_desc(skb, i, page, offset, size);
2940 } else {
2941 return -EMSGSIZE;
2942 }
2943
2944 return 0;
2945}
2946EXPORT_SYMBOL_GPL(skb_append_pagefrags);
2947
2966/** 2948/**
2967 * skb_pull_rcsum - pull skb and update receive checksum 2949 * skb_pull_rcsum - pull skb and update receive checksum
2968 * @skb: buffer to update 2950 * @skb: buffer to update
@@ -4030,6 +4012,93 @@ int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
4030} 4012}
4031EXPORT_SYMBOL(skb_checksum_setup); 4013EXPORT_SYMBOL(skb_checksum_setup);
4032 4014
4015/**
4016 * skb_checksum_maybe_trim - maybe trims the given skb
4017 * @skb: the skb to check
4018 * @transport_len: the data length beyond the network header
4019 *
4020 * Checks whether the given skb has data beyond the given transport length.
4021 * If so, returns a cloned skb trimmed to this transport length.
4022 * Otherwise returns the provided skb. Returns NULL in error cases
4023 * (e.g. transport_len exceeds skb length or out-of-memory).
4024 *
4025 * Caller needs to set the skb transport header and release the returned skb.
4026 * Provided skb is consumed.
4027 */
4028static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb,
4029 unsigned int transport_len)
4030{
4031 struct sk_buff *skb_chk;
4032 unsigned int len = skb_transport_offset(skb) + transport_len;
4033 int ret;
4034
4035 if (skb->len < len) {
4036 kfree_skb(skb);
4037 return NULL;
4038 } else if (skb->len == len) {
4039 return skb;
4040 }
4041
4042 skb_chk = skb_clone(skb, GFP_ATOMIC);
4043 kfree_skb(skb);
4044
4045 if (!skb_chk)
4046 return NULL;
4047
4048 ret = pskb_trim_rcsum(skb_chk, len);
4049 if (ret) {
4050 kfree_skb(skb_chk);
4051 return NULL;
4052 }
4053
4054 return skb_chk;
4055}
4056
4057/**
4058 * skb_checksum_trimmed - validate checksum of an skb
4059 * @skb: the skb to check
4060 * @transport_len: the data length beyond the network header
4061 * @skb_chkf: checksum function to use
4062 *
4063 * Applies the given checksum function skb_chkf to the provided skb.
4064 * Returns a checked and maybe trimmed skb. Returns NULL on error.
4065 *
4066 * If the skb has data beyond the given transport length, then a
4067 * trimmed & cloned skb is checked and returned.
4068 *
4069 * Caller needs to set the skb transport header and release the returned skb.
4070 * Provided skb is consumed.
4071 */
4072struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
4073 unsigned int transport_len,
4074 __sum16(*skb_chkf)(struct sk_buff *skb))
4075{
4076 struct sk_buff *skb_chk;
4077 unsigned int offset = skb_transport_offset(skb);
4078 __sum16 ret;
4079
4080 skb_chk = skb_checksum_maybe_trim(skb, transport_len);
4081 if (!skb_chk)
4082 return NULL;
4083
4084 if (!pskb_may_pull(skb_chk, offset)) {
4085 kfree_skb(skb_chk);
4086 return NULL;
4087 }
4088
4089 __skb_pull(skb_chk, offset);
4090 ret = skb_chkf(skb_chk);
4091 __skb_push(skb_chk, offset);
4092
4093 if (ret) {
4094 kfree_skb(skb_chk);
4095 return NULL;
4096 }
4097
4098 return skb_chk;
4099}
4100EXPORT_SYMBOL(skb_checksum_trimmed);
4101
4033void __skb_warn_lro_forwarding(const struct sk_buff *skb) 4102void __skb_warn_lro_forwarding(const struct sk_buff *skb)
4034{ 4103{
4035 net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", 4104 net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n",
diff --git a/net/core/sock.c b/net/core/sock.c
index dc30dc5bb1b8..08f16db46070 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -131,6 +131,7 @@
131#include <linux/ipsec.h> 131#include <linux/ipsec.h>
132#include <net/cls_cgroup.h> 132#include <net/cls_cgroup.h>
133#include <net/netprio_cgroup.h> 133#include <net/netprio_cgroup.h>
134#include <linux/sock_diag.h>
134 135
135#include <linux/filter.h> 136#include <linux/filter.h>
136 137
@@ -1393,9 +1394,10 @@ EXPORT_SYMBOL_GPL(sock_update_netprioidx);
1393 * @family: protocol family 1394 * @family: protocol family
1394 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 1395 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1395 * @prot: struct proto associated with this new sock instance 1396 * @prot: struct proto associated with this new sock instance
1397 * @kern: is this to be a kernel socket?
1396 */ 1398 */
1397struct sock *sk_alloc(struct net *net, int family, gfp_t priority, 1399struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1398 struct proto *prot) 1400 struct proto *prot, int kern)
1399{ 1401{
1400 struct sock *sk; 1402 struct sock *sk;
1401 1403
@@ -1408,7 +1410,10 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1408 */ 1410 */
1409 sk->sk_prot = sk->sk_prot_creator = prot; 1411 sk->sk_prot = sk->sk_prot_creator = prot;
1410 sock_lock_init(sk); 1412 sock_lock_init(sk);
1411 sock_net_set(sk, get_net(net)); 1413 sk->sk_net_refcnt = kern ? 0 : 1;
1414 if (likely(sk->sk_net_refcnt))
1415 get_net(net);
1416 sock_net_set(sk, net);
1412 atomic_set(&sk->sk_wmem_alloc, 1); 1417 atomic_set(&sk->sk_wmem_alloc, 1);
1413 1418
1414 sock_update_classid(sk); 1419 sock_update_classid(sk);
@@ -1419,7 +1424,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1419} 1424}
1420EXPORT_SYMBOL(sk_alloc); 1425EXPORT_SYMBOL(sk_alloc);
1421 1426
1422static void __sk_free(struct sock *sk) 1427void sk_destruct(struct sock *sk)
1423{ 1428{
1424 struct sk_filter *filter; 1429 struct sk_filter *filter;
1425 1430
@@ -1442,10 +1447,19 @@ static void __sk_free(struct sock *sk)
1442 if (sk->sk_peer_cred) 1447 if (sk->sk_peer_cred)
1443 put_cred(sk->sk_peer_cred); 1448 put_cred(sk->sk_peer_cred);
1444 put_pid(sk->sk_peer_pid); 1449 put_pid(sk->sk_peer_pid);
1445 put_net(sock_net(sk)); 1450 if (likely(sk->sk_net_refcnt))
1451 put_net(sock_net(sk));
1446 sk_prot_free(sk->sk_prot_creator, sk); 1452 sk_prot_free(sk->sk_prot_creator, sk);
1447} 1453}
1448 1454
1455static void __sk_free(struct sock *sk)
1456{
1457 if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
1458 sock_diag_broadcast_destroy(sk);
1459 else
1460 sk_destruct(sk);
1461}
1462
1449void sk_free(struct sock *sk) 1463void sk_free(struct sock *sk)
1450{ 1464{
1451 /* 1465 /*
@@ -1458,25 +1472,6 @@ void sk_free(struct sock *sk)
1458} 1472}
1459EXPORT_SYMBOL(sk_free); 1473EXPORT_SYMBOL(sk_free);
1460 1474
1461/*
1462 * Last sock_put should drop reference to sk->sk_net. It has already
1463 * been dropped in sk_change_net. Taking reference to stopping namespace
1464 * is not an option.
1465 * Take reference to a socket to remove it from hash _alive_ and after that
1466 * destroy it in the context of init_net.
1467 */
1468void sk_release_kernel(struct sock *sk)
1469{
1470 if (sk == NULL || sk->sk_socket == NULL)
1471 return;
1472
1473 sock_hold(sk);
1474 sock_release(sk->sk_socket);
1475 sock_net_set(sk, get_net(&init_net));
1476 sock_put(sk);
1477}
1478EXPORT_SYMBOL(sk_release_kernel);
1479
1480static void sk_update_clone(const struct sock *sk, struct sock *newsk) 1475static void sk_update_clone(const struct sock *sk, struct sock *newsk)
1481{ 1476{
1482 if (mem_cgroup_sockets_enabled && sk->sk_cgrp) 1477 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
@@ -1592,6 +1587,8 @@ EXPORT_SYMBOL_GPL(sk_clone_lock);
1592 1587
1593void sk_setup_caps(struct sock *sk, struct dst_entry *dst) 1588void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1594{ 1589{
1590 u32 max_segs = 1;
1591
1595 __sk_dst_set(sk, dst); 1592 __sk_dst_set(sk, dst);
1596 sk->sk_route_caps = dst->dev->features; 1593 sk->sk_route_caps = dst->dev->features;
1597 if (sk->sk_route_caps & NETIF_F_GSO) 1594 if (sk->sk_route_caps & NETIF_F_GSO)
@@ -1603,9 +1600,10 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1603 } else { 1600 } else {
1604 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; 1601 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1605 sk->sk_gso_max_size = dst->dev->gso_max_size; 1602 sk->sk_gso_max_size = dst->dev->gso_max_size;
1606 sk->sk_gso_max_segs = dst->dev->gso_max_segs; 1603 max_segs = max_t(u32, dst->dev->gso_max_segs, 1);
1607 } 1604 }
1608 } 1605 }
1606 sk->sk_gso_max_segs = max_segs;
1609} 1607}
1610EXPORT_SYMBOL_GPL(sk_setup_caps); 1608EXPORT_SYMBOL_GPL(sk_setup_caps);
1611 1609
@@ -2080,12 +2078,13 @@ EXPORT_SYMBOL(__sk_mem_schedule);
2080/** 2078/**
2081 * __sk_reclaim - reclaim memory_allocated 2079 * __sk_reclaim - reclaim memory_allocated
2082 * @sk: socket 2080 * @sk: socket
2081 * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
2083 */ 2082 */
2084void __sk_mem_reclaim(struct sock *sk) 2083void __sk_mem_reclaim(struct sock *sk, int amount)
2085{ 2084{
2086 sk_memory_allocated_sub(sk, 2085 amount >>= SK_MEM_QUANTUM_SHIFT;
2087 sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT); 2086 sk_memory_allocated_sub(sk, amount);
2088 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; 2087 sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2089 2088
2090 if (sk_under_memory_pressure(sk) && 2089 if (sk_under_memory_pressure(sk) &&
2091 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) 2090 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
@@ -2270,7 +2269,6 @@ static void sock_def_write_space(struct sock *sk)
2270 2269
2271static void sock_def_destruct(struct sock *sk) 2270static void sock_def_destruct(struct sock *sk)
2272{ 2271{
2273 kfree(sk->sk_protinfo);
2274} 2272}
2275 2273
2276void sk_send_sigurg(struct sock *sk) 2274void sk_send_sigurg(struct sock *sk)
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 74dddf84adcd..d79866c5f8bc 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -5,6 +5,9 @@
5#include <net/net_namespace.h> 5#include <net/net_namespace.h>
6#include <linux/module.h> 6#include <linux/module.h>
7#include <net/sock.h> 7#include <net/sock.h>
8#include <linux/kernel.h>
9#include <linux/tcp.h>
10#include <linux/workqueue.h>
8 11
9#include <linux/inet_diag.h> 12#include <linux/inet_diag.h>
10#include <linux/sock_diag.h> 13#include <linux/sock_diag.h>
@@ -12,6 +15,7 @@
12static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; 15static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
13static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); 16static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
14static DEFINE_MUTEX(sock_diag_table_mutex); 17static DEFINE_MUTEX(sock_diag_table_mutex);
18static struct workqueue_struct *broadcast_wq;
15 19
16static u64 sock_gen_cookie(struct sock *sk) 20static u64 sock_gen_cookie(struct sock *sk)
17{ 21{
@@ -101,6 +105,62 @@ out:
101} 105}
102EXPORT_SYMBOL(sock_diag_put_filterinfo); 106EXPORT_SYMBOL(sock_diag_put_filterinfo);
103 107
108struct broadcast_sk {
109 struct sock *sk;
110 struct work_struct work;
111};
112
113static size_t sock_diag_nlmsg_size(void)
114{
115 return NLMSG_ALIGN(sizeof(struct inet_diag_msg)
116 + nla_total_size(sizeof(u8)) /* INET_DIAG_PROTOCOL */
117 + nla_total_size(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
118}
119
120static void sock_diag_broadcast_destroy_work(struct work_struct *work)
121{
122 struct broadcast_sk *bsk =
123 container_of(work, struct broadcast_sk, work);
124 struct sock *sk = bsk->sk;
125 const struct sock_diag_handler *hndl;
126 struct sk_buff *skb;
127 const enum sknetlink_groups group = sock_diag_destroy_group(sk);
128 int err = -1;
129
130 WARN_ON(group == SKNLGRP_NONE);
131
132 skb = nlmsg_new(sock_diag_nlmsg_size(), GFP_KERNEL);
133 if (!skb)
134 goto out;
135
136 mutex_lock(&sock_diag_table_mutex);
137 hndl = sock_diag_handlers[sk->sk_family];
138 if (hndl && hndl->get_info)
139 err = hndl->get_info(skb, sk);
140 mutex_unlock(&sock_diag_table_mutex);
141
142 if (!err)
143 nlmsg_multicast(sock_net(sk)->diag_nlsk, skb, 0, group,
144 GFP_KERNEL);
145 else
146 kfree_skb(skb);
147out:
148 sk_destruct(sk);
149 kfree(bsk);
150}
151
152void sock_diag_broadcast_destroy(struct sock *sk)
153{
154 /* Note, this function is often called from an interrupt context. */
155 struct broadcast_sk *bsk =
156 kmalloc(sizeof(struct broadcast_sk), GFP_ATOMIC);
157 if (!bsk)
158 return sk_destruct(sk);
159 bsk->sk = sk;
160 INIT_WORK(&bsk->work, sock_diag_broadcast_destroy_work);
161 queue_work(broadcast_wq, &bsk->work);
162}
163
104void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)) 164void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
105{ 165{
106 mutex_lock(&sock_diag_table_mutex); 166 mutex_lock(&sock_diag_table_mutex);
@@ -211,10 +271,32 @@ static void sock_diag_rcv(struct sk_buff *skb)
211 mutex_unlock(&sock_diag_mutex); 271 mutex_unlock(&sock_diag_mutex);
212} 272}
213 273
274static int sock_diag_bind(struct net *net, int group)
275{
276 switch (group) {
277 case SKNLGRP_INET_TCP_DESTROY:
278 case SKNLGRP_INET_UDP_DESTROY:
279 if (!sock_diag_handlers[AF_INET])
280 request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
281 NETLINK_SOCK_DIAG, AF_INET);
282 break;
283 case SKNLGRP_INET6_TCP_DESTROY:
284 case SKNLGRP_INET6_UDP_DESTROY:
285 if (!sock_diag_handlers[AF_INET6])
286 request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
287 NETLINK_SOCK_DIAG, AF_INET);
288 break;
289 }
290 return 0;
291}
292
214static int __net_init diag_net_init(struct net *net) 293static int __net_init diag_net_init(struct net *net)
215{ 294{
216 struct netlink_kernel_cfg cfg = { 295 struct netlink_kernel_cfg cfg = {
296 .groups = SKNLGRP_MAX,
217 .input = sock_diag_rcv, 297 .input = sock_diag_rcv,
298 .bind = sock_diag_bind,
299 .flags = NL_CFG_F_NONROOT_RECV,
218 }; 300 };
219 301
220 net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, &cfg); 302 net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, &cfg);
@@ -234,12 +316,15 @@ static struct pernet_operations diag_net_ops = {
234 316
235static int __init sock_diag_init(void) 317static int __init sock_diag_init(void)
236{ 318{
319 broadcast_wq = alloc_workqueue("sock_diag_events", 0, 0);
320 BUG_ON(!broadcast_wq);
237 return register_pernet_subsys(&diag_net_ops); 321 return register_pernet_subsys(&diag_net_ops);
238} 322}
239 323
240static void __exit sock_diag_exit(void) 324static void __exit sock_diag_exit(void)
241{ 325{
242 unregister_pernet_subsys(&diag_net_ops); 326 unregister_pernet_subsys(&diag_net_ops);
327 destroy_workqueue(broadcast_wq);
243} 328}
244 329
245module_init(sock_diag_init); 330module_init(sock_diag_init);
diff --git a/net/core/stream.c b/net/core/stream.c
index 301c05f26060..d70f77a0c889 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -119,6 +119,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
119 int err = 0; 119 int err = 0;
120 long vm_wait = 0; 120 long vm_wait = 0;
121 long current_timeo = *timeo_p; 121 long current_timeo = *timeo_p;
122 bool noblock = (*timeo_p ? false : true);
122 DEFINE_WAIT(wait); 123 DEFINE_WAIT(wait);
123 124
124 if (sk_stream_memory_free(sk)) 125 if (sk_stream_memory_free(sk))
@@ -131,8 +132,11 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
131 132
132 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 133 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
133 goto do_error; 134 goto do_error;
134 if (!*timeo_p) 135 if (!*timeo_p) {
136 if (noblock)
137 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
135 goto do_nonblock; 138 goto do_nonblock;
139 }
136 if (signal_pending(current)) 140 if (signal_pending(current))
137 goto do_interrupted; 141 goto do_interrupted;
138 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 142 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
diff --git a/net/core/utils.c b/net/core/utils.c
index 7b803884c162..a7732a068043 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -304,13 +304,15 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
304 __be32 from, __be32 to, int pseudohdr) 304 __be32 from, __be32 to, int pseudohdr)
305{ 305{
306 if (skb->ip_summed != CHECKSUM_PARTIAL) { 306 if (skb->ip_summed != CHECKSUM_PARTIAL) {
307 *sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), from), 307 csum_replace4(sum, from, to);
308 to));
309 if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) 308 if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
310 skb->csum = ~csum_add(csum_sub(~(skb->csum), from), to); 309 skb->csum = ~csum_add(csum_sub(~(skb->csum),
310 (__force __wsum)from),
311 (__force __wsum)to);
311 } else if (pseudohdr) 312 } else if (pseudohdr)
312 *sum = ~csum_fold(csum_add(csum_sub(csum_unfold(*sum), from), 313 *sum = ~csum_fold(csum_add(csum_sub(csum_unfold(*sum),
313 to)); 314 (__force __wsum)from),
315 (__force __wsum)to));
314} 316}
315EXPORT_SYMBOL(inet_proto_csum_replace4); 317EXPORT_SYMBOL(inet_proto_csum_replace4);
316 318
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index 5a45f8de5d99..2d84303ea6bf 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -66,6 +66,7 @@ static const struct inet_diag_handler dccp_diag_handler = {
66 .dump_one = dccp_diag_dump_one, 66 .dump_one = dccp_diag_dump_one,
67 .idiag_get_info = dccp_diag_get_info, 67 .idiag_get_info = dccp_diag_get_info,
68 .idiag_type = IPPROTO_DCCP, 68 .idiag_type = IPPROTO_DCCP,
69 .idiag_info_size = sizeof(struct tcp_info),
69}; 70};
70 71
71static int __init dccp_diag_init(void) 72static int __init dccp_diag_init(void)
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 754484b3cd0e..675cf94e04f8 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -468,10 +468,10 @@ static struct proto dn_proto = {
468 .obj_size = sizeof(struct dn_sock), 468 .obj_size = sizeof(struct dn_sock),
469}; 469};
470 470
471static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp) 471static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp, int kern)
472{ 472{
473 struct dn_scp *scp; 473 struct dn_scp *scp;
474 struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto); 474 struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, kern);
475 475
476 if (!sk) 476 if (!sk)
477 goto out; 477 goto out;
@@ -693,7 +693,7 @@ static int dn_create(struct net *net, struct socket *sock, int protocol,
693 } 693 }
694 694
695 695
696 if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL)) == NULL) 696 if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL, kern)) == NULL)
697 return -ENOBUFS; 697 return -ENOBUFS;
698 698
699 sk->sk_protocol = protocol; 699 sk->sk_protocol = protocol;
@@ -1096,7 +1096,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
1096 1096
1097 cb = DN_SKB_CB(skb); 1097 cb = DN_SKB_CB(skb);
1098 sk->sk_ack_backlog--; 1098 sk->sk_ack_backlog--;
1099 newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation); 1099 newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, 0);
1100 if (newsk == NULL) { 1100 if (newsk == NULL) {
1101 release_sock(sk); 1101 release_sock(sk);
1102 kfree_skb(skb); 1102 kfree_skb(skb);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 392e29a0227d..b445d492c115 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -630,7 +630,7 @@ static int dsa_of_probe(struct device *dev)
630 continue; 630 continue;
631 631
632 cd->sw_addr = be32_to_cpup(sw_addr); 632 cd->sw_addr = be32_to_cpup(sw_addr);
633 if (cd->sw_addr > PHY_MAX_ADDR) 633 if (cd->sw_addr >= PHY_MAX_ADDR)
634 continue; 634 continue;
635 635
636 if (!of_property_read_u32(child, "eeprom-length", &eeprom_len)) 636 if (!of_property_read_u32(child, "eeprom-length", &eeprom_len))
@@ -642,6 +642,8 @@ static int dsa_of_probe(struct device *dev)
642 continue; 642 continue;
643 643
644 port_index = be32_to_cpup(port_reg); 644 port_index = be32_to_cpup(port_reg);
645 if (port_index >= DSA_MAX_PORTS)
646 break;
645 647
646 port_name = of_get_property(port, "label", NULL); 648 port_name = of_get_property(port, "label", NULL);
647 if (!port_name) 649 if (!port_name)
@@ -666,8 +668,6 @@ static int dsa_of_probe(struct device *dev)
666 goto out_free_chip; 668 goto out_free_chip;
667 } 669 }
668 670
669 if (port_index == DSA_MAX_PORTS)
670 break;
671 } 671 }
672 } 672 }
673 673
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 827cda560a55..0917123790ea 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -112,7 +112,7 @@ static int dsa_slave_open(struct net_device *dev)
112 112
113clear_promisc: 113clear_promisc:
114 if (dev->flags & IFF_PROMISC) 114 if (dev->flags & IFF_PROMISC)
115 dev_set_promiscuity(master, 0); 115 dev_set_promiscuity(master, -1);
116clear_allmulti: 116clear_allmulti:
117 if (dev->flags & IFF_ALLMULTI) 117 if (dev->flags & IFF_ALLMULTI)
118 dev_set_allmulti(master, -1); 118 dev_set_allmulti(master, -1);
@@ -345,6 +345,24 @@ static int dsa_slave_stp_update(struct net_device *dev, u8 state)
345 return ret; 345 return ret;
346} 346}
347 347
348static int dsa_slave_port_attr_set(struct net_device *dev,
349 struct switchdev_attr *attr)
350{
351 int ret = 0;
352
353 switch (attr->id) {
354 case SWITCHDEV_ATTR_PORT_STP_STATE:
355 if (attr->trans == SWITCHDEV_TRANS_COMMIT)
356 ret = dsa_slave_stp_update(dev, attr->u.stp_state);
357 break;
358 default:
359 ret = -EOPNOTSUPP;
360 break;
361 }
362
363 return ret;
364}
365
348static int dsa_slave_bridge_port_join(struct net_device *dev, 366static int dsa_slave_bridge_port_join(struct net_device *dev,
349 struct net_device *br) 367 struct net_device *br)
350{ 368{
@@ -382,14 +400,20 @@ static int dsa_slave_bridge_port_leave(struct net_device *dev)
382 return ret; 400 return ret;
383} 401}
384 402
385static int dsa_slave_parent_id_get(struct net_device *dev, 403static int dsa_slave_port_attr_get(struct net_device *dev,
386 struct netdev_phys_item_id *psid) 404 struct switchdev_attr *attr)
387{ 405{
388 struct dsa_slave_priv *p = netdev_priv(dev); 406 struct dsa_slave_priv *p = netdev_priv(dev);
389 struct dsa_switch *ds = p->parent; 407 struct dsa_switch *ds = p->parent;
390 408
391 psid->id_len = sizeof(ds->index); 409 switch (attr->id) {
392 memcpy(&psid->id, &ds->index, psid->id_len); 410 case SWITCHDEV_ATTR_PORT_PARENT_ID:
411 attr->u.ppid.id_len = sizeof(ds->index);
412 memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len);
413 break;
414 default:
415 return -EOPNOTSUPP;
416 }
393 417
394 return 0; 418 return 0;
395} 419}
@@ -675,9 +699,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
675 .ndo_get_iflink = dsa_slave_get_iflink, 699 .ndo_get_iflink = dsa_slave_get_iflink,
676}; 700};
677 701
678static const struct swdev_ops dsa_slave_swdev_ops = { 702static const struct switchdev_ops dsa_slave_switchdev_ops = {
679 .swdev_parent_id_get = dsa_slave_parent_id_get, 703 .switchdev_port_attr_get = dsa_slave_port_attr_get,
680 .swdev_port_stp_update = dsa_slave_stp_update, 704 .switchdev_port_attr_set = dsa_slave_port_attr_set,
681}; 705};
682 706
683static void dsa_slave_adjust_link(struct net_device *dev) 707static void dsa_slave_adjust_link(struct net_device *dev)
@@ -810,12 +834,19 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
810 return 0; 834 return 0;
811} 835}
812 836
837static struct lock_class_key dsa_slave_netdev_xmit_lock_key;
838static void dsa_slave_set_lockdep_class_one(struct net_device *dev,
839 struct netdev_queue *txq,
840 void *_unused)
841{
842 lockdep_set_class(&txq->_xmit_lock,
843 &dsa_slave_netdev_xmit_lock_key);
844}
845
813int dsa_slave_suspend(struct net_device *slave_dev) 846int dsa_slave_suspend(struct net_device *slave_dev)
814{ 847{
815 struct dsa_slave_priv *p = netdev_priv(slave_dev); 848 struct dsa_slave_priv *p = netdev_priv(slave_dev);
816 849
817 netif_device_detach(slave_dev);
818
819 if (p->phy) { 850 if (p->phy) {
820 phy_stop(p->phy); 851 phy_stop(p->phy);
821 p->old_pause = -1; 852 p->old_pause = -1;
@@ -859,7 +890,10 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
859 eth_hw_addr_inherit(slave_dev, master); 890 eth_hw_addr_inherit(slave_dev, master);
860 slave_dev->tx_queue_len = 0; 891 slave_dev->tx_queue_len = 0;
861 slave_dev->netdev_ops = &dsa_slave_netdev_ops; 892 slave_dev->netdev_ops = &dsa_slave_netdev_ops;
862 slave_dev->swdev_ops = &dsa_slave_swdev_ops; 893 slave_dev->switchdev_ops = &dsa_slave_switchdev_ops;
894
895 netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
896 NULL);
863 897
864 SET_NETDEV_DEV(slave_dev, parent); 898 SET_NETDEV_DEV(slave_dev, parent);
865 slave_dev->dev.of_node = ds->pd->port_dn[port]; 899 slave_dev->dev.of_node = ds->pd->port_dn[port];
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index f3bad41d725f..77e0f0e7a88e 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -58,6 +58,7 @@
58#include <net/ipv6.h> 58#include <net/ipv6.h>
59#include <net/ip.h> 59#include <net/ip.h>
60#include <net/dsa.h> 60#include <net/dsa.h>
61#include <net/flow_dissector.h>
61#include <linux/uaccess.h> 62#include <linux/uaccess.h>
62 63
63__setup("ether=", netdev_boot_setup); 64__setup("ether=", netdev_boot_setup);
@@ -130,9 +131,9 @@ u32 eth_get_headlen(void *data, unsigned int len)
130 return len; 131 return len;
131 132
132 /* parse any remaining L2/L3 headers, check for L4 */ 133 /* parse any remaining L2/L3 headers, check for L4 */
133 if (!__skb_flow_dissect(NULL, &keys, data, 134 if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto,
134 eth->h_proto, sizeof(*eth), len)) 135 sizeof(*eth), len))
135 return max_t(u32, keys.thoff, sizeof(*eth)); 136 return max_t(u32, keys.control.thoff, sizeof(*eth));
136 137
137 /* parse for any L4 headers */ 138 /* parse for any L4 headers */
138 return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len); 139 return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len);
@@ -156,10 +157,11 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
156 157
157 skb->dev = dev; 158 skb->dev = dev;
158 skb_reset_mac_header(skb); 159 skb_reset_mac_header(skb);
160
161 eth = (struct ethhdr *)skb->data;
159 skb_pull_inline(skb, ETH_HLEN); 162 skb_pull_inline(skb, ETH_HLEN);
160 eth = eth_hdr(skb);
161 163
162 if (unlikely(is_multicast_ether_addr(eth->h_dest))) { 164 if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) {
163 if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) 165 if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
164 skb->pkt_type = PACKET_BROADCAST; 166 skb->pkt_type = PACKET_BROADCAST;
165 else 167 else
@@ -178,7 +180,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
178 if (unlikely(netdev_uses_dsa(dev))) 180 if (unlikely(netdev_uses_dsa(dev)))
179 return htons(ETH_P_XDSA); 181 return htons(ETH_P_XDSA);
180 182
181 if (likely(ntohs(eth->h_proto) >= ETH_P_802_3_MIN)) 183 if (likely(eth_proto_is_802_3(eth->h_proto)))
182 return eth->h_proto; 184 return eth->h_proto;
183 185
184 /* 186 /*
@@ -468,6 +470,7 @@ EXPORT_SYMBOL(eth_gro_complete);
468 470
469static struct packet_offload eth_packet_offload __read_mostly = { 471static struct packet_offload eth_packet_offload __read_mostly = {
470 .type = cpu_to_be16(ETH_P_TEB), 472 .type = cpu_to_be16(ETH_P_TEB),
473 .priority = 10,
471 .callbacks = { 474 .callbacks = {
472 .gro_receive = eth_gro_receive, 475 .gro_receive = eth_gro_receive,
473 .gro_complete = eth_gro_complete, 476 .gro_complete = eth_gro_complete,
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 0ae5822ef944..f20a387a1011 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -55,27 +55,6 @@
55LIST_HEAD(lowpan_devices); 55LIST_HEAD(lowpan_devices);
56static int lowpan_open_count; 56static int lowpan_open_count;
57 57
58static __le16 lowpan_get_pan_id(const struct net_device *dev)
59{
60 struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
61
62 return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev);
63}
64
65static __le16 lowpan_get_short_addr(const struct net_device *dev)
66{
67 struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
68
69 return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev);
70}
71
72static u8 lowpan_get_dsn(const struct net_device *dev)
73{
74 struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
75
76 return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev);
77}
78
79static struct header_ops lowpan_header_ops = { 58static struct header_ops lowpan_header_ops = {
80 .create = lowpan_header_create, 59 .create = lowpan_header_create,
81}; 60};
@@ -103,12 +82,6 @@ static const struct net_device_ops lowpan_netdev_ops = {
103 .ndo_start_xmit = lowpan_xmit, 82 .ndo_start_xmit = lowpan_xmit,
104}; 83};
105 84
106static struct ieee802154_mlme_ops lowpan_mlme = {
107 .get_pan_id = lowpan_get_pan_id,
108 .get_short_addr = lowpan_get_short_addr,
109 .get_dsn = lowpan_get_dsn,
110};
111
112static void lowpan_setup(struct net_device *dev) 85static void lowpan_setup(struct net_device *dev)
113{ 86{
114 dev->addr_len = IEEE802154_ADDR_LEN; 87 dev->addr_len = IEEE802154_ADDR_LEN;
@@ -124,7 +97,6 @@ static void lowpan_setup(struct net_device *dev)
124 97
125 dev->netdev_ops = &lowpan_netdev_ops; 98 dev->netdev_ops = &lowpan_netdev_ops;
126 dev->header_ops = &lowpan_header_ops; 99 dev->header_ops = &lowpan_header_ops;
127 dev->ml_priv = &lowpan_mlme;
128 dev->destructor = free_netdev; 100 dev->destructor = free_netdev;
129 dev->features |= NETIF_F_NETNS_LOCAL; 101 dev->features |= NETIF_F_NETNS_LOCAL;
130} 102}
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index 2349070bd534..2597abbf7f4b 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -190,6 +190,7 @@ err:
190 190
191static int lowpan_header(struct sk_buff *skb, struct net_device *dev) 191static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
192{ 192{
193 struct wpan_dev *wpan_dev = lowpan_dev_info(dev)->real_dev->ieee802154_ptr;
193 struct ieee802154_addr sa, da; 194 struct ieee802154_addr sa, da;
194 struct ieee802154_mac_cb *cb = mac_cb_init(skb); 195 struct ieee802154_mac_cb *cb = mac_cb_init(skb);
195 struct lowpan_addr_info info; 196 struct lowpan_addr_info info;
@@ -207,7 +208,7 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
207 208
208 /* prepare wpan address data */ 209 /* prepare wpan address data */
209 sa.mode = IEEE802154_ADDR_LONG; 210 sa.mode = IEEE802154_ADDR_LONG;
210 sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); 211 sa.pan_id = wpan_dev->pan_id;
211 sa.extended_addr = ieee802154_devaddr_from_raw(saddr); 212 sa.extended_addr = ieee802154_devaddr_from_raw(saddr);
212 213
213 /* intra-PAN communications */ 214 /* intra-PAN communications */
@@ -223,7 +224,7 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
223 } else { 224 } else {
224 da.mode = IEEE802154_ADDR_LONG; 225 da.mode = IEEE802154_ADDR_LONG;
225 da.extended_addr = ieee802154_devaddr_from_raw(daddr); 226 da.extended_addr = ieee802154_devaddr_from_raw(daddr);
226 cb->ackreq = true; 227 cb->ackreq = wpan_dev->frame_retries >= 0;
227 } 228 }
228 229
229 return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, 230 return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index 2ee00e8a0308..b0248e934230 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -121,8 +121,6 @@ wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size)
121 /* atomic_inc_return makes it start at 1, make it start at 0 */ 121 /* atomic_inc_return makes it start at 1, make it start at 0 */
122 rdev->wpan_phy_idx--; 122 rdev->wpan_phy_idx--;
123 123
124 mutex_init(&rdev->wpan_phy.pib_lock);
125
126 INIT_LIST_HEAD(&rdev->wpan_dev_list); 124 INIT_LIST_HEAD(&rdev->wpan_dev_list);
127 device_initialize(&rdev->wpan_phy.dev); 125 device_initialize(&rdev->wpan_phy.dev);
128 dev_set_name(&rdev->wpan_phy.dev, PHY_NAME "%d", rdev->wpan_phy_idx); 126 dev_set_name(&rdev->wpan_phy.dev, PHY_NAME "%d", rdev->wpan_phy_idx);
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 2b4955d7aae5..3503c38954f9 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -97,8 +97,10 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid,
97 BUG_ON(!phy); 97 BUG_ON(!phy);
98 get_device(&phy->dev); 98 get_device(&phy->dev);
99 99
100 short_addr = ops->get_short_addr(dev); 100 rtnl_lock();
101 pan_id = ops->get_pan_id(dev); 101 short_addr = dev->ieee802154_ptr->short_addr;
102 pan_id = dev->ieee802154_ptr->pan_id;
103 rtnl_unlock();
102 104
103 if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || 105 if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
104 nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) || 106 nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) ||
@@ -117,12 +119,12 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid,
117 rtnl_unlock(); 119 rtnl_unlock();
118 120
119 if (nla_put_s8(msg, IEEE802154_ATTR_TXPOWER, 121 if (nla_put_s8(msg, IEEE802154_ATTR_TXPOWER,
120 params.transmit_power) || 122 params.transmit_power / 100) ||
121 nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, params.lbt) || 123 nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, params.lbt) ||
122 nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE, 124 nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE,
123 params.cca.mode) || 125 params.cca.mode) ||
124 nla_put_s32(msg, IEEE802154_ATTR_CCA_ED_LEVEL, 126 nla_put_s32(msg, IEEE802154_ATTR_CCA_ED_LEVEL,
125 params.cca_ed_level) || 127 params.cca_ed_level / 100) ||
126 nla_put_u8(msg, IEEE802154_ATTR_CSMA_RETRIES, 128 nla_put_u8(msg, IEEE802154_ATTR_CSMA_RETRIES,
127 params.csma_retries) || 129 params.csma_retries) ||
128 nla_put_u8(msg, IEEE802154_ATTR_CSMA_MIN_BE, 130 nla_put_u8(msg, IEEE802154_ATTR_CSMA_MIN_BE,
@@ -166,10 +168,7 @@ static struct net_device *ieee802154_nl_get_dev(struct genl_info *info)
166 if (!dev) 168 if (!dev)
167 return NULL; 169 return NULL;
168 170
169 /* Check on mtu is currently a hacked solution because lowpan 171 if (dev->type != ARPHRD_IEEE802154) {
170 * and wpan have the same ARPHRD type.
171 */
172 if (dev->type != ARPHRD_IEEE802154 || dev->mtu != IEEE802154_MTU) {
173 dev_put(dev); 172 dev_put(dev);
174 return NULL; 173 return NULL;
175 } 174 }
@@ -244,7 +243,9 @@ int ieee802154_associate_resp(struct sk_buff *skb, struct genl_info *info)
244 addr.mode = IEEE802154_ADDR_LONG; 243 addr.mode = IEEE802154_ADDR_LONG;
245 addr.extended_addr = nla_get_hwaddr( 244 addr.extended_addr = nla_get_hwaddr(
246 info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]); 245 info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]);
247 addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); 246 rtnl_lock();
247 addr.pan_id = dev->ieee802154_ptr->pan_id;
248 rtnl_unlock();
248 249
249 ret = ieee802154_mlme_ops(dev)->assoc_resp(dev, &addr, 250 ret = ieee802154_mlme_ops(dev)->assoc_resp(dev, &addr,
250 nla_get_shortaddr(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]), 251 nla_get_shortaddr(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]),
@@ -281,7 +282,9 @@ int ieee802154_disassociate_req(struct sk_buff *skb, struct genl_info *info)
281 addr.short_addr = nla_get_shortaddr( 282 addr.short_addr = nla_get_shortaddr(
282 info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]); 283 info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]);
283 } 284 }
284 addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); 285 rtnl_lock();
286 addr.pan_id = dev->ieee802154_ptr->pan_id;
287 rtnl_unlock();
285 288
286 ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr, 289 ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr,
287 nla_get_u8(info->attrs[IEEE802154_ATTR_REASON])); 290 nla_get_u8(info->attrs[IEEE802154_ATTR_REASON]));
@@ -449,11 +452,7 @@ int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb)
449 452
450 idx = 0; 453 idx = 0;
451 for_each_netdev(net, dev) { 454 for_each_netdev(net, dev) {
452 /* Check on mtu is currently a hacked solution because lowpan 455 if (idx < s_idx || dev->type != ARPHRD_IEEE802154)
453 * and wpan have the same ARPHRD type.
454 */
455 if (idx < s_idx || dev->type != ARPHRD_IEEE802154 ||
456 dev->mtu != IEEE802154_MTU)
457 goto cont; 456 goto cont;
458 457
459 if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).portid, 458 if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).portid,
@@ -510,7 +509,7 @@ int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info)
510 ops->get_mac_params(dev, &params); 509 ops->get_mac_params(dev, &params);
511 510
512 if (info->attrs[IEEE802154_ATTR_TXPOWER]) 511 if (info->attrs[IEEE802154_ATTR_TXPOWER])
513 params.transmit_power = nla_get_s8(info->attrs[IEEE802154_ATTR_TXPOWER]); 512 params.transmit_power = nla_get_s8(info->attrs[IEEE802154_ATTR_TXPOWER]) * 100;
514 513
515 if (info->attrs[IEEE802154_ATTR_LBT_ENABLED]) 514 if (info->attrs[IEEE802154_ATTR_LBT_ENABLED])
516 params.lbt = nla_get_u8(info->attrs[IEEE802154_ATTR_LBT_ENABLED]); 515 params.lbt = nla_get_u8(info->attrs[IEEE802154_ATTR_LBT_ENABLED]);
@@ -519,7 +518,7 @@ int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info)
519 params.cca.mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]); 518 params.cca.mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]);
520 519
521 if (info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) 520 if (info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL])
522 params.cca_ed_level = nla_get_s32(info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]); 521 params.cca_ed_level = nla_get_s32(info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) * 100;
523 522
524 if (info->attrs[IEEE802154_ATTR_CSMA_RETRIES]) 523 if (info->attrs[IEEE802154_ATTR_CSMA_RETRIES])
525 params.csma_retries = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_RETRIES]); 524 params.csma_retries = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_RETRIES]);
@@ -783,11 +782,7 @@ ieee802154_llsec_dump_table(struct sk_buff *skb, struct netlink_callback *cb,
783 int rc; 782 int rc;
784 783
785 for_each_netdev(net, dev) { 784 for_each_netdev(net, dev) {
786 /* Check on mtu is currently a hacked solution because lowpan 785 if (idx < first_dev || dev->type != ARPHRD_IEEE802154)
787 * and wpan have the same ARPHRD type.
788 */
789 if (idx < first_dev || dev->type != ARPHRD_IEEE802154 ||
790 dev->mtu != IEEE802154_MTU)
791 goto skip; 786 goto skip;
792 787
793 data.ops = ieee802154_mlme_ops(dev); 788 data.ops = ieee802154_mlme_ops(dev);
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index 346c6665d25e..77d73014bde3 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -50,26 +50,26 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid,
50 if (!hdr) 50 if (!hdr)
51 goto out; 51 goto out;
52 52
53 mutex_lock(&phy->pib_lock); 53 rtnl_lock();
54 if (nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) || 54 if (nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) ||
55 nla_put_u8(msg, IEEE802154_ATTR_PAGE, phy->current_page) || 55 nla_put_u8(msg, IEEE802154_ATTR_PAGE, phy->current_page) ||
56 nla_put_u8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel)) 56 nla_put_u8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel))
57 goto nla_put_failure; 57 goto nla_put_failure;
58 for (i = 0; i < 32; i++) { 58 for (i = 0; i < 32; i++) {
59 if (phy->channels_supported[i]) 59 if (phy->supported.channels[i])
60 buf[pages++] = phy->channels_supported[i] | (i << 27); 60 buf[pages++] = phy->supported.channels[i] | (i << 27);
61 } 61 }
62 if (pages && 62 if (pages &&
63 nla_put(msg, IEEE802154_ATTR_CHANNEL_PAGE_LIST, 63 nla_put(msg, IEEE802154_ATTR_CHANNEL_PAGE_LIST,
64 pages * sizeof(uint32_t), buf)) 64 pages * sizeof(uint32_t), buf))
65 goto nla_put_failure; 65 goto nla_put_failure;
66 mutex_unlock(&phy->pib_lock); 66 rtnl_unlock();
67 kfree(buf); 67 kfree(buf);
68 genlmsg_end(msg, hdr); 68 genlmsg_end(msg, hdr);
69 return 0; 69 return 0;
70 70
71nla_put_failure: 71nla_put_failure:
72 mutex_unlock(&phy->pib_lock); 72 rtnl_unlock();
73 genlmsg_cancel(msg, hdr); 73 genlmsg_cancel(msg, hdr);
74out: 74out:
75 kfree(buf); 75 kfree(buf);
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index f3c12f6a4a39..68f24016860c 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -207,10 +207,11 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
207 [NL802154_ATTR_PAGE] = { .type = NLA_U8, }, 207 [NL802154_ATTR_PAGE] = { .type = NLA_U8, },
208 [NL802154_ATTR_CHANNEL] = { .type = NLA_U8, }, 208 [NL802154_ATTR_CHANNEL] = { .type = NLA_U8, },
209 209
210 [NL802154_ATTR_TX_POWER] = { .type = NLA_S8, }, 210 [NL802154_ATTR_TX_POWER] = { .type = NLA_S32, },
211 211
212 [NL802154_ATTR_CCA_MODE] = { .type = NLA_U32, }, 212 [NL802154_ATTR_CCA_MODE] = { .type = NLA_U32, },
213 [NL802154_ATTR_CCA_OPT] = { .type = NLA_U32, }, 213 [NL802154_ATTR_CCA_OPT] = { .type = NLA_U32, },
214 [NL802154_ATTR_CCA_ED_LEVEL] = { .type = NLA_S32, },
214 215
215 [NL802154_ATTR_SUPPORTED_CHANNEL] = { .type = NLA_U32, }, 216 [NL802154_ATTR_SUPPORTED_CHANNEL] = { .type = NLA_U32, },
216 217
@@ -225,6 +226,10 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
225 [NL802154_ATTR_MAX_FRAME_RETRIES] = { .type = NLA_S8, }, 226 [NL802154_ATTR_MAX_FRAME_RETRIES] = { .type = NLA_S8, },
226 227
227 [NL802154_ATTR_LBT_MODE] = { .type = NLA_U8, }, 228 [NL802154_ATTR_LBT_MODE] = { .type = NLA_U8, },
229
230 [NL802154_ATTR_WPAN_PHY_CAPS] = { .type = NLA_NESTED },
231
232 [NL802154_ATTR_SUPPORTED_COMMANDS] = { .type = NLA_NESTED },
228}; 233};
229 234
230/* message building helper */ 235/* message building helper */
@@ -236,6 +241,28 @@ static inline void *nl802154hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
236} 241}
237 242
238static int 243static int
244nl802154_put_flags(struct sk_buff *msg, int attr, u32 mask)
245{
246 struct nlattr *nl_flags = nla_nest_start(msg, attr);
247 int i;
248
249 if (!nl_flags)
250 return -ENOBUFS;
251
252 i = 0;
253 while (mask) {
254 if ((mask & 1) && nla_put_flag(msg, i))
255 return -ENOBUFS;
256
257 mask >>= 1;
258 i++;
259 }
260
261 nla_nest_end(msg, nl_flags);
262 return 0;
263}
264
265static int
239nl802154_send_wpan_phy_channels(struct cfg802154_registered_device *rdev, 266nl802154_send_wpan_phy_channels(struct cfg802154_registered_device *rdev,
240 struct sk_buff *msg) 267 struct sk_buff *msg)
241{ 268{
@@ -248,7 +275,7 @@ nl802154_send_wpan_phy_channels(struct cfg802154_registered_device *rdev,
248 275
249 for (page = 0; page <= IEEE802154_MAX_PAGE; page++) { 276 for (page = 0; page <= IEEE802154_MAX_PAGE; page++) {
250 if (nla_put_u32(msg, NL802154_ATTR_SUPPORTED_CHANNEL, 277 if (nla_put_u32(msg, NL802154_ATTR_SUPPORTED_CHANNEL,
251 rdev->wpan_phy.channels_supported[page])) 278 rdev->wpan_phy.supported.channels[page]))
252 return -ENOBUFS; 279 return -ENOBUFS;
253 } 280 }
254 nla_nest_end(msg, nl_page); 281 nla_nest_end(msg, nl_page);
@@ -256,12 +283,100 @@ nl802154_send_wpan_phy_channels(struct cfg802154_registered_device *rdev,
256 return 0; 283 return 0;
257} 284}
258 285
286static int
287nl802154_put_capabilities(struct sk_buff *msg,
288 struct cfg802154_registered_device *rdev)
289{
290 const struct wpan_phy_supported *caps = &rdev->wpan_phy.supported;
291 struct nlattr *nl_caps, *nl_channels;
292 int i;
293
294 nl_caps = nla_nest_start(msg, NL802154_ATTR_WPAN_PHY_CAPS);
295 if (!nl_caps)
296 return -ENOBUFS;
297
298 nl_channels = nla_nest_start(msg, NL802154_CAP_ATTR_CHANNELS);
299 if (!nl_channels)
300 return -ENOBUFS;
301
302 for (i = 0; i <= IEEE802154_MAX_PAGE; i++) {
303 if (caps->channels[i]) {
304 if (nl802154_put_flags(msg, i, caps->channels[i]))
305 return -ENOBUFS;
306 }
307 }
308
309 nla_nest_end(msg, nl_channels);
310
311 if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_ED_LEVEL) {
312 struct nlattr *nl_ed_lvls;
313
314 nl_ed_lvls = nla_nest_start(msg,
315 NL802154_CAP_ATTR_CCA_ED_LEVELS);
316 if (!nl_ed_lvls)
317 return -ENOBUFS;
318
319 for (i = 0; i < caps->cca_ed_levels_size; i++) {
320 if (nla_put_s32(msg, i, caps->cca_ed_levels[i]))
321 return -ENOBUFS;
322 }
323
324 nla_nest_end(msg, nl_ed_lvls);
325 }
326
327 if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER) {
328 struct nlattr *nl_tx_pwrs;
329
330 nl_tx_pwrs = nla_nest_start(msg, NL802154_CAP_ATTR_TX_POWERS);
331 if (!nl_tx_pwrs)
332 return -ENOBUFS;
333
334 for (i = 0; i < caps->tx_powers_size; i++) {
335 if (nla_put_s32(msg, i, caps->tx_powers[i]))
336 return -ENOBUFS;
337 }
338
339 nla_nest_end(msg, nl_tx_pwrs);
340 }
341
342 if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_MODE) {
343 if (nl802154_put_flags(msg, NL802154_CAP_ATTR_CCA_MODES,
344 caps->cca_modes) ||
345 nl802154_put_flags(msg, NL802154_CAP_ATTR_CCA_OPTS,
346 caps->cca_opts))
347 return -ENOBUFS;
348 }
349
350 if (nla_put_u8(msg, NL802154_CAP_ATTR_MIN_MINBE, caps->min_minbe) ||
351 nla_put_u8(msg, NL802154_CAP_ATTR_MAX_MINBE, caps->max_minbe) ||
352 nla_put_u8(msg, NL802154_CAP_ATTR_MIN_MAXBE, caps->min_maxbe) ||
353 nla_put_u8(msg, NL802154_CAP_ATTR_MAX_MAXBE, caps->max_maxbe) ||
354 nla_put_u8(msg, NL802154_CAP_ATTR_MIN_CSMA_BACKOFFS,
355 caps->min_csma_backoffs) ||
356 nla_put_u8(msg, NL802154_CAP_ATTR_MAX_CSMA_BACKOFFS,
357 caps->max_csma_backoffs) ||
358 nla_put_s8(msg, NL802154_CAP_ATTR_MIN_FRAME_RETRIES,
359 caps->min_frame_retries) ||
360 nla_put_s8(msg, NL802154_CAP_ATTR_MAX_FRAME_RETRIES,
361 caps->max_frame_retries) ||
362 nl802154_put_flags(msg, NL802154_CAP_ATTR_IFTYPES,
363 caps->iftypes) ||
364 nla_put_u32(msg, NL802154_CAP_ATTR_LBT, caps->lbt))
365 return -ENOBUFS;
366
367 nla_nest_end(msg, nl_caps);
368
369 return 0;
370}
371
259static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev, 372static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev,
260 enum nl802154_commands cmd, 373 enum nl802154_commands cmd,
261 struct sk_buff *msg, u32 portid, u32 seq, 374 struct sk_buff *msg, u32 portid, u32 seq,
262 int flags) 375 int flags)
263{ 376{
377 struct nlattr *nl_cmds;
264 void *hdr; 378 void *hdr;
379 int i;
265 380
266 hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); 381 hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
267 if (!hdr) 382 if (!hdr)
@@ -286,25 +401,76 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev,
286 rdev->wpan_phy.current_channel)) 401 rdev->wpan_phy.current_channel))
287 goto nla_put_failure; 402 goto nla_put_failure;
288 403
289 /* supported channels array */ 404 /* TODO remove this behaviour, we still keep support it for a while
405 * so users can change the behaviour to the new one.
406 */
290 if (nl802154_send_wpan_phy_channels(rdev, msg)) 407 if (nl802154_send_wpan_phy_channels(rdev, msg))
291 goto nla_put_failure; 408 goto nla_put_failure;
292 409
293 /* cca mode */ 410 /* cca mode */
294 if (nla_put_u32(msg, NL802154_ATTR_CCA_MODE, 411 if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_MODE) {
295 rdev->wpan_phy.cca.mode)) 412 if (nla_put_u32(msg, NL802154_ATTR_CCA_MODE,
296 goto nla_put_failure; 413 rdev->wpan_phy.cca.mode))
414 goto nla_put_failure;
415
416 if (rdev->wpan_phy.cca.mode == NL802154_CCA_ENERGY_CARRIER) {
417 if (nla_put_u32(msg, NL802154_ATTR_CCA_OPT,
418 rdev->wpan_phy.cca.opt))
419 goto nla_put_failure;
420 }
421 }
422
423 if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER) {
424 if (nla_put_s32(msg, NL802154_ATTR_TX_POWER,
425 rdev->wpan_phy.transmit_power))
426 goto nla_put_failure;
427 }
297 428
298 if (rdev->wpan_phy.cca.mode == NL802154_CCA_ENERGY_CARRIER) { 429 if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_ED_LEVEL) {
299 if (nla_put_u32(msg, NL802154_ATTR_CCA_OPT, 430 if (nla_put_s32(msg, NL802154_ATTR_CCA_ED_LEVEL,
300 rdev->wpan_phy.cca.opt)) 431 rdev->wpan_phy.cca_ed_level))
301 goto nla_put_failure; 432 goto nla_put_failure;
302 } 433 }
303 434
304 if (nla_put_s8(msg, NL802154_ATTR_TX_POWER, 435 if (nl802154_put_capabilities(msg, rdev))
305 rdev->wpan_phy.transmit_power)) 436 goto nla_put_failure;
437
438 nl_cmds = nla_nest_start(msg, NL802154_ATTR_SUPPORTED_COMMANDS);
439 if (!nl_cmds)
306 goto nla_put_failure; 440 goto nla_put_failure;
307 441
442 i = 0;
443#define CMD(op, n) \
444 do { \
445 if (rdev->ops->op) { \
446 i++; \
447 if (nla_put_u32(msg, i, NL802154_CMD_ ## n)) \
448 goto nla_put_failure; \
449 } \
450 } while (0)
451
452 CMD(add_virtual_intf, NEW_INTERFACE);
453 CMD(del_virtual_intf, DEL_INTERFACE);
454 CMD(set_channel, SET_CHANNEL);
455 CMD(set_pan_id, SET_PAN_ID);
456 CMD(set_short_addr, SET_SHORT_ADDR);
457 CMD(set_backoff_exponent, SET_BACKOFF_EXPONENT);
458 CMD(set_max_csma_backoffs, SET_MAX_CSMA_BACKOFFS);
459 CMD(set_max_frame_retries, SET_MAX_FRAME_RETRIES);
460 CMD(set_lbt_mode, SET_LBT_MODE);
461
462 if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER)
463 CMD(set_tx_power, SET_TX_POWER);
464
465 if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_ED_LEVEL)
466 CMD(set_cca_ed_level, SET_CCA_ED_LEVEL);
467
468 if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_MODE)
469 CMD(set_cca_mode, SET_CCA_MODE);
470
471#undef CMD
472 nla_nest_end(msg, nl_cmds);
473
308finish: 474finish:
309 genlmsg_end(msg, hdr); 475 genlmsg_end(msg, hdr);
310 return 0; 476 return 0;
@@ -575,7 +741,8 @@ static int nl802154_new_interface(struct sk_buff *skb, struct genl_info *info)
575 741
576 if (info->attrs[NL802154_ATTR_IFTYPE]) { 742 if (info->attrs[NL802154_ATTR_IFTYPE]) {
577 type = nla_get_u32(info->attrs[NL802154_ATTR_IFTYPE]); 743 type = nla_get_u32(info->attrs[NL802154_ATTR_IFTYPE]);
578 if (type > NL802154_IFTYPE_MAX) 744 if (type > NL802154_IFTYPE_MAX ||
745 !(rdev->wpan_phy.supported.iftypes & BIT(type)))
579 return -EINVAL; 746 return -EINVAL;
580 } 747 }
581 748
@@ -625,7 +792,8 @@ static int nl802154_set_channel(struct sk_buff *skb, struct genl_info *info)
625 channel = nla_get_u8(info->attrs[NL802154_ATTR_CHANNEL]); 792 channel = nla_get_u8(info->attrs[NL802154_ATTR_CHANNEL]);
626 793
627 /* check 802.15.4 constraints */ 794 /* check 802.15.4 constraints */
628 if (page > IEEE802154_MAX_PAGE || channel > IEEE802154_MAX_CHANNEL) 795 if (page > IEEE802154_MAX_PAGE || channel > IEEE802154_MAX_CHANNEL ||
796 !(rdev->wpan_phy.supported.channels[page] & BIT(channel)))
629 return -EINVAL; 797 return -EINVAL;
630 798
631 return rdev_set_channel(rdev, page, channel); 799 return rdev_set_channel(rdev, page, channel);
@@ -636,12 +804,17 @@ static int nl802154_set_cca_mode(struct sk_buff *skb, struct genl_info *info)
636 struct cfg802154_registered_device *rdev = info->user_ptr[0]; 804 struct cfg802154_registered_device *rdev = info->user_ptr[0];
637 struct wpan_phy_cca cca; 805 struct wpan_phy_cca cca;
638 806
807 if (!(rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_MODE))
808 return -EOPNOTSUPP;
809
639 if (!info->attrs[NL802154_ATTR_CCA_MODE]) 810 if (!info->attrs[NL802154_ATTR_CCA_MODE])
640 return -EINVAL; 811 return -EINVAL;
641 812
642 cca.mode = nla_get_u32(info->attrs[NL802154_ATTR_CCA_MODE]); 813 cca.mode = nla_get_u32(info->attrs[NL802154_ATTR_CCA_MODE]);
643 /* checking 802.15.4 constraints */ 814 /* checking 802.15.4 constraints */
644 if (cca.mode < NL802154_CCA_ENERGY || cca.mode > NL802154_CCA_ATTR_MAX) 815 if (cca.mode < NL802154_CCA_ENERGY ||
816 cca.mode > NL802154_CCA_ATTR_MAX ||
817 !(rdev->wpan_phy.supported.cca_modes & BIT(cca.mode)))
645 return -EINVAL; 818 return -EINVAL;
646 819
647 if (cca.mode == NL802154_CCA_ENERGY_CARRIER) { 820 if (cca.mode == NL802154_CCA_ENERGY_CARRIER) {
@@ -649,13 +822,58 @@ static int nl802154_set_cca_mode(struct sk_buff *skb, struct genl_info *info)
649 return -EINVAL; 822 return -EINVAL;
650 823
651 cca.opt = nla_get_u32(info->attrs[NL802154_ATTR_CCA_OPT]); 824 cca.opt = nla_get_u32(info->attrs[NL802154_ATTR_CCA_OPT]);
652 if (cca.opt > NL802154_CCA_OPT_ATTR_MAX) 825 if (cca.opt > NL802154_CCA_OPT_ATTR_MAX ||
826 !(rdev->wpan_phy.supported.cca_opts & BIT(cca.opt)))
653 return -EINVAL; 827 return -EINVAL;
654 } 828 }
655 829
656 return rdev_set_cca_mode(rdev, &cca); 830 return rdev_set_cca_mode(rdev, &cca);
657} 831}
658 832
833static int nl802154_set_cca_ed_level(struct sk_buff *skb, struct genl_info *info)
834{
835 struct cfg802154_registered_device *rdev = info->user_ptr[0];
836 s32 ed_level;
837 int i;
838
839 if (!(rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_ED_LEVEL))
840 return -EOPNOTSUPP;
841
842 if (!info->attrs[NL802154_ATTR_CCA_ED_LEVEL])
843 return -EINVAL;
844
845 ed_level = nla_get_s32(info->attrs[NL802154_ATTR_CCA_ED_LEVEL]);
846
847 for (i = 0; i < rdev->wpan_phy.supported.cca_ed_levels_size; i++) {
848 if (ed_level == rdev->wpan_phy.supported.cca_ed_levels[i])
849 return rdev_set_cca_ed_level(rdev, ed_level);
850 }
851
852 return -EINVAL;
853}
854
855static int nl802154_set_tx_power(struct sk_buff *skb, struct genl_info *info)
856{
857 struct cfg802154_registered_device *rdev = info->user_ptr[0];
858 s32 power;
859 int i;
860
861 if (!(rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER))
862 return -EOPNOTSUPP;
863
864 if (!info->attrs[NL802154_ATTR_TX_POWER])
865 return -EINVAL;
866
867 power = nla_get_s32(info->attrs[NL802154_ATTR_TX_POWER]);
868
869 for (i = 0; i < rdev->wpan_phy.supported.tx_powers_size; i++) {
870 if (power == rdev->wpan_phy.supported.tx_powers[i])
871 return rdev_set_tx_power(rdev, power);
872 }
873
874 return -EINVAL;
875}
876
659static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info) 877static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info)
660{ 878{
661 struct cfg802154_registered_device *rdev = info->user_ptr[0]; 879 struct cfg802154_registered_device *rdev = info->user_ptr[0];
@@ -668,14 +886,22 @@ static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info)
668 return -EBUSY; 886 return -EBUSY;
669 887
670 /* don't change address fields on monitor */ 888 /* don't change address fields on monitor */
671 if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) 889 if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR ||
672 return -EINVAL; 890 !info->attrs[NL802154_ATTR_PAN_ID])
673
674 if (!info->attrs[NL802154_ATTR_PAN_ID])
675 return -EINVAL; 891 return -EINVAL;
676 892
677 pan_id = nla_get_le16(info->attrs[NL802154_ATTR_PAN_ID]); 893 pan_id = nla_get_le16(info->attrs[NL802154_ATTR_PAN_ID]);
678 894
895 /* TODO
896 * I am not sure about to check here on broadcast pan_id.
897 * Broadcast is a valid setting, comment from 802.15.4:
898 * If this value is 0xffff, the device is not associated.
899 *
900 * This could useful to simple deassociate an device.
901 */
902 if (pan_id == cpu_to_le16(IEEE802154_PAN_ID_BROADCAST))
903 return -EINVAL;
904
679 return rdev_set_pan_id(rdev, wpan_dev, pan_id); 905 return rdev_set_pan_id(rdev, wpan_dev, pan_id);
680} 906}
681 907
@@ -691,14 +917,27 @@ static int nl802154_set_short_addr(struct sk_buff *skb, struct genl_info *info)
691 return -EBUSY; 917 return -EBUSY;
692 918
693 /* don't change address fields on monitor */ 919 /* don't change address fields on monitor */
694 if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) 920 if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR ||
695 return -EINVAL; 921 !info->attrs[NL802154_ATTR_SHORT_ADDR])
696
697 if (!info->attrs[NL802154_ATTR_SHORT_ADDR])
698 return -EINVAL; 922 return -EINVAL;
699 923
700 short_addr = nla_get_le16(info->attrs[NL802154_ATTR_SHORT_ADDR]); 924 short_addr = nla_get_le16(info->attrs[NL802154_ATTR_SHORT_ADDR]);
701 925
926 /* TODO
927 * I am not sure about to check here on broadcast short_addr.
928 * Broadcast is a valid setting, comment from 802.15.4:
929 * A value of 0xfffe indicates that the device has
930 * associated but has not been allocated an address. A
931 * value of 0xffff indicates that the device does not
932 * have a short address.
933 *
934 * I think we should allow to set these settings but
935 * don't allow to allow socket communication with it.
936 */
937 if (short_addr == cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC) ||
938 short_addr == cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST))
939 return -EINVAL;
940
702 return rdev_set_short_addr(rdev, wpan_dev, short_addr); 941 return rdev_set_short_addr(rdev, wpan_dev, short_addr);
703} 942}
704 943
@@ -722,7 +961,11 @@ nl802154_set_backoff_exponent(struct sk_buff *skb, struct genl_info *info)
722 max_be = nla_get_u8(info->attrs[NL802154_ATTR_MAX_BE]); 961 max_be = nla_get_u8(info->attrs[NL802154_ATTR_MAX_BE]);
723 962
724 /* check 802.15.4 constraints */ 963 /* check 802.15.4 constraints */
725 if (max_be < 3 || max_be > 8 || min_be > max_be) 964 if (min_be < rdev->wpan_phy.supported.min_minbe ||
965 min_be > rdev->wpan_phy.supported.max_minbe ||
966 max_be < rdev->wpan_phy.supported.min_maxbe ||
967 max_be > rdev->wpan_phy.supported.max_maxbe ||
968 min_be > max_be)
726 return -EINVAL; 969 return -EINVAL;
727 970
728 return rdev_set_backoff_exponent(rdev, wpan_dev, min_be, max_be); 971 return rdev_set_backoff_exponent(rdev, wpan_dev, min_be, max_be);
@@ -747,7 +990,8 @@ nl802154_set_max_csma_backoffs(struct sk_buff *skb, struct genl_info *info)
747 info->attrs[NL802154_ATTR_MAX_CSMA_BACKOFFS]); 990 info->attrs[NL802154_ATTR_MAX_CSMA_BACKOFFS]);
748 991
749 /* check 802.15.4 constraints */ 992 /* check 802.15.4 constraints */
750 if (max_csma_backoffs > 5) 993 if (max_csma_backoffs < rdev->wpan_phy.supported.min_csma_backoffs ||
994 max_csma_backoffs > rdev->wpan_phy.supported.max_csma_backoffs)
751 return -EINVAL; 995 return -EINVAL;
752 996
753 return rdev_set_max_csma_backoffs(rdev, wpan_dev, max_csma_backoffs); 997 return rdev_set_max_csma_backoffs(rdev, wpan_dev, max_csma_backoffs);
@@ -771,7 +1015,8 @@ nl802154_set_max_frame_retries(struct sk_buff *skb, struct genl_info *info)
771 info->attrs[NL802154_ATTR_MAX_FRAME_RETRIES]); 1015 info->attrs[NL802154_ATTR_MAX_FRAME_RETRIES]);
772 1016
773 /* check 802.15.4 constraints */ 1017 /* check 802.15.4 constraints */
774 if (max_frame_retries < -1 || max_frame_retries > 7) 1018 if (max_frame_retries < rdev->wpan_phy.supported.min_frame_retries ||
1019 max_frame_retries > rdev->wpan_phy.supported.max_frame_retries)
775 return -EINVAL; 1020 return -EINVAL;
776 1021
777 return rdev_set_max_frame_retries(rdev, wpan_dev, max_frame_retries); 1022 return rdev_set_max_frame_retries(rdev, wpan_dev, max_frame_retries);
@@ -791,6 +1036,9 @@ static int nl802154_set_lbt_mode(struct sk_buff *skb, struct genl_info *info)
791 return -EINVAL; 1036 return -EINVAL;
792 1037
793 mode = !!nla_get_u8(info->attrs[NL802154_ATTR_LBT_MODE]); 1038 mode = !!nla_get_u8(info->attrs[NL802154_ATTR_LBT_MODE]);
1039 if (!wpan_phy_supported_bool(mode, rdev->wpan_phy.supported.lbt))
1040 return -EINVAL;
1041
794 return rdev_set_lbt_mode(rdev, wpan_dev, mode); 1042 return rdev_set_lbt_mode(rdev, wpan_dev, mode);
795} 1043}
796 1044
@@ -937,6 +1185,22 @@ static const struct genl_ops nl802154_ops[] = {
937 NL802154_FLAG_NEED_RTNL, 1185 NL802154_FLAG_NEED_RTNL,
938 }, 1186 },
939 { 1187 {
1188 .cmd = NL802154_CMD_SET_CCA_ED_LEVEL,
1189 .doit = nl802154_set_cca_ed_level,
1190 .policy = nl802154_policy,
1191 .flags = GENL_ADMIN_PERM,
1192 .internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
1193 NL802154_FLAG_NEED_RTNL,
1194 },
1195 {
1196 .cmd = NL802154_CMD_SET_TX_POWER,
1197 .doit = nl802154_set_tx_power,
1198 .policy = nl802154_policy,
1199 .flags = GENL_ADMIN_PERM,
1200 .internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
1201 NL802154_FLAG_NEED_RTNL,
1202 },
1203 {
940 .cmd = NL802154_CMD_SET_PAN_ID, 1204 .cmd = NL802154_CMD_SET_PAN_ID,
941 .doit = nl802154_set_pan_id, 1205 .doit = nl802154_set_pan_id,
942 .policy = nl802154_policy, 1206 .policy = nl802154_policy,
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h
index 7b5a9dd94fe5..b2155a123f6c 100644
--- a/net/ieee802154/rdev-ops.h
+++ b/net/ieee802154/rdev-ops.h
@@ -75,6 +75,29 @@ rdev_set_cca_mode(struct cfg802154_registered_device *rdev,
75} 75}
76 76
77static inline int 77static inline int
78rdev_set_cca_ed_level(struct cfg802154_registered_device *rdev, s32 ed_level)
79{
80 int ret;
81
82 trace_802154_rdev_set_cca_ed_level(&rdev->wpan_phy, ed_level);
83 ret = rdev->ops->set_cca_ed_level(&rdev->wpan_phy, ed_level);
84 trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
85 return ret;
86}
87
88static inline int
89rdev_set_tx_power(struct cfg802154_registered_device *rdev,
90 s32 power)
91{
92 int ret;
93
94 trace_802154_rdev_set_tx_power(&rdev->wpan_phy, power);
95 ret = rdev->ops->set_tx_power(&rdev->wpan_phy, power);
96 trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
97 return ret;
98}
99
100static inline int
78rdev_set_pan_id(struct cfg802154_registered_device *rdev, 101rdev_set_pan_id(struct cfg802154_registered_device *rdev,
79 struct wpan_dev *wpan_dev, __le16 pan_id) 102 struct wpan_dev *wpan_dev, __le16 pan_id)
80{ 103{
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index b60c65f70346..b6eacf30ee7a 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -64,10 +64,8 @@ ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr)
64 if (tmp->type != ARPHRD_IEEE802154) 64 if (tmp->type != ARPHRD_IEEE802154)
65 continue; 65 continue;
66 66
67 pan_id = ieee802154_mlme_ops(tmp)->get_pan_id(tmp); 67 pan_id = tmp->ieee802154_ptr->pan_id;
68 short_addr = 68 short_addr = tmp->ieee802154_ptr->short_addr;
69 ieee802154_mlme_ops(tmp)->get_short_addr(tmp);
70
71 if (pan_id == addr->pan_id && 69 if (pan_id == addr->pan_id &&
72 short_addr == addr->short_addr) { 70 short_addr == addr->short_addr) {
73 dev = tmp; 71 dev = tmp;
@@ -228,15 +226,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len)
228 goto out; 226 goto out;
229 } 227 }
230 228
231 if (dev->type != ARPHRD_IEEE802154) {
232 err = -ENODEV;
233 goto out_put;
234 }
235
236 sk->sk_bound_dev_if = dev->ifindex; 229 sk->sk_bound_dev_if = dev->ifindex;
237 sk_dst_reset(sk); 230 sk_dst_reset(sk);
238 231
239out_put:
240 dev_put(dev); 232 dev_put(dev);
241out: 233out:
242 release_sock(sk); 234 release_sock(sk);
@@ -286,7 +278,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
286 278
287 if (size > mtu) { 279 if (size > mtu) {
288 pr_debug("size = %Zu, mtu = %u\n", size, mtu); 280 pr_debug("size = %Zu, mtu = %u\n", size, mtu);
289 err = -EINVAL; 281 err = -EMSGSIZE;
290 goto out_dev; 282 goto out_dev;
291 } 283 }
292 284
@@ -739,6 +731,12 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
739 sock_recv_ts_and_drops(msg, sk, skb); 731 sock_recv_ts_and_drops(msg, sk, skb);
740 732
741 if (saddr) { 733 if (saddr) {
734 /* Clear the implicit padding in struct sockaddr_ieee802154
735 * (16 bits between 'family' and 'addr') and in struct
736 * ieee802154_addr_sa (16 bits at the end of the structure).
737 */
738 memset(saddr, 0, sizeof(*saddr));
739
742 saddr->family = AF_IEEE802154; 740 saddr->family = AF_IEEE802154;
743 ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source); 741 ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source);
744 *addr_len = sizeof(*saddr); 742 *addr_len = sizeof(*saddr);
@@ -797,9 +795,9 @@ static int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb)
797 /* Data frame processing */ 795 /* Data frame processing */
798 BUG_ON(dev->type != ARPHRD_IEEE802154); 796 BUG_ON(dev->type != ARPHRD_IEEE802154);
799 797
800 pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); 798 pan_id = dev->ieee802154_ptr->pan_id;
801 short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev); 799 short_addr = dev->ieee802154_ptr->short_addr;
802 hw_addr = ieee802154_devaddr_from_raw(dev->dev_addr); 800 hw_addr = dev->ieee802154_ptr->extended_addr;
803 801
804 read_lock(&dgram_lock); 802 read_lock(&dgram_lock);
805 sk_for_each(sk, &dgram_head) { 803 sk_for_each(sk, &dgram_head) {
@@ -1014,7 +1012,7 @@ static int ieee802154_create(struct net *net, struct socket *sock,
1014 } 1012 }
1015 1013
1016 rc = -ENOMEM; 1014 rc = -ENOMEM;
1017 sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto); 1015 sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto, kern);
1018 if (!sk) 1016 if (!sk)
1019 goto out; 1017 goto out;
1020 rc = 0; 1018 rc = 0;
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h
index 5ac25eb6ed17..9b5f0eb36696 100644
--- a/net/ieee802154/trace.h
+++ b/net/ieee802154/trace.h
@@ -1,4 +1,4 @@
1/* Based on net/wireless/tracing.h */ 1/* Based on net/wireless/trace.h */
2 2
3#undef TRACE_SYSTEM 3#undef TRACE_SYSTEM
4#define TRACE_SYSTEM cfg802154 4#define TRACE_SYSTEM cfg802154
@@ -56,7 +56,7 @@ TRACE_EVENT(802154_rdev_add_virtual_intf,
56 __entry->type = type; 56 __entry->type = type;
57 __entry->extended_addr = extended_addr; 57 __entry->extended_addr = extended_addr;
58 ), 58 ),
59 TP_printk(WPAN_PHY_PR_FMT ", virtual intf name: %s, type: %d, ea %llx", 59 TP_printk(WPAN_PHY_PR_FMT ", virtual intf name: %s, type: %d, extended addr: 0x%llx",
60 WPAN_PHY_PR_ARG, __get_str(vir_intf_name), __entry->type, 60 WPAN_PHY_PR_ARG, __get_str(vir_intf_name), __entry->type,
61 __le64_to_cpu(__entry->extended_addr)) 61 __le64_to_cpu(__entry->extended_addr))
62); 62);
@@ -93,6 +93,21 @@ TRACE_EVENT(802154_rdev_set_channel,
93 __entry->page, __entry->channel) 93 __entry->page, __entry->channel)
94); 94);
95 95
96TRACE_EVENT(802154_rdev_set_tx_power,
97 TP_PROTO(struct wpan_phy *wpan_phy, s32 power),
98 TP_ARGS(wpan_phy, power),
99 TP_STRUCT__entry(
100 WPAN_PHY_ENTRY
101 __field(s32, power)
102 ),
103 TP_fast_assign(
104 WPAN_PHY_ASSIGN;
105 __entry->power = power;
106 ),
107 TP_printk(WPAN_PHY_PR_FMT ", mbm: %d", WPAN_PHY_PR_ARG,
108 __entry->power)
109);
110
96TRACE_EVENT(802154_rdev_set_cca_mode, 111TRACE_EVENT(802154_rdev_set_cca_mode,
97 TP_PROTO(struct wpan_phy *wpan_phy, const struct wpan_phy_cca *cca), 112 TP_PROTO(struct wpan_phy *wpan_phy, const struct wpan_phy_cca *cca),
98 TP_ARGS(wpan_phy, cca), 113 TP_ARGS(wpan_phy, cca),
@@ -108,6 +123,21 @@ TRACE_EVENT(802154_rdev_set_cca_mode,
108 WPAN_CCA_PR_ARG) 123 WPAN_CCA_PR_ARG)
109); 124);
110 125
126TRACE_EVENT(802154_rdev_set_cca_ed_level,
127 TP_PROTO(struct wpan_phy *wpan_phy, s32 ed_level),
128 TP_ARGS(wpan_phy, ed_level),
129 TP_STRUCT__entry(
130 WPAN_PHY_ENTRY
131 __field(s32, ed_level)
132 ),
133 TP_fast_assign(
134 WPAN_PHY_ASSIGN;
135 __entry->ed_level = ed_level;
136 ),
137 TP_printk(WPAN_PHY_PR_FMT ", ed level: %d", WPAN_PHY_PR_ARG,
138 __entry->ed_level)
139);
140
111DECLARE_EVENT_CLASS(802154_le16_template, 141DECLARE_EVENT_CLASS(802154_le16_template,
112 TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, 142 TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
113 __le16 le16arg), 143 __le16 le16arg),
@@ -137,7 +167,7 @@ DEFINE_EVENT_PRINT(802154_le16_template, 802154_rdev_set_short_addr,
137 TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, 167 TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
138 __le16 le16arg), 168 __le16 le16arg),
139 TP_ARGS(wpan_phy, wpan_dev, le16arg), 169 TP_ARGS(wpan_phy, wpan_dev, le16arg),
140 TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", sa: 0x%04x", 170 TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", short addr: 0x%04x",
141 WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, 171 WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG,
142 __le16_to_cpu(__entry->le16arg)) 172 __le16_to_cpu(__entry->le16arg))
143); 173);
@@ -160,7 +190,7 @@ TRACE_EVENT(802154_rdev_set_backoff_exponent,
160 ), 190 ),
161 191
162 TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT 192 TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT
163 ", min be: %d, max_be: %d", WPAN_PHY_PR_ARG, 193 ", min be: %d, max be: %d", WPAN_PHY_PR_ARG,
164 WPAN_DEV_PR_ARG, __entry->min_be, __entry->max_be) 194 WPAN_DEV_PR_ARG, __entry->min_be, __entry->max_be)
165); 195);
166 196
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index bd2901604842..6fb3c90ad726 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -331,8 +331,8 @@ config NET_FOU_IP_TUNNELS
331 When this option is enabled IP tunnels can be configured to use 331 When this option is enabled IP tunnels can be configured to use
332 FOU or GUE encapsulation. 332 FOU or GUE encapsulation.
333 333
334config GENEVE 334config GENEVE_CORE
335 tristate "Generic Network Virtualization Encapsulation (Geneve)" 335 tristate "Generic Network Virtualization Encapsulation library"
336 depends on INET 336 depends on INET
337 select NET_UDP_TUNNEL 337 select NET_UDP_TUNNEL
338 ---help--- 338 ---help---
@@ -615,6 +615,22 @@ config TCP_CONG_DCTCP
615 For further details see: 615 For further details see:
616 http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp-final.pdf 616 http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp-final.pdf
617 617
618config TCP_CONG_CDG
619 tristate "CAIA Delay-Gradient (CDG)"
620 default n
621 ---help---
622 CAIA Delay-Gradient (CDG) is a TCP congestion control that modifies
623 the TCP sender in order to:
624
625 o Use the delay gradient as a congestion signal.
626 o Back off with an average probability that is independent of the RTT.
627 o Coexist with flows that use loss-based congestion control.
628 o Tolerate packet loss unrelated to congestion.
629
630 For further details see:
631 D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using
632 delay gradients." In Networking 2011. Preprint: http://goo.gl/No3vdg
633
618choice 634choice
619 prompt "Default TCP congestion control" 635 prompt "Default TCP congestion control"
620 default DEFAULT_CUBIC 636 default DEFAULT_CUBIC
@@ -646,6 +662,9 @@ choice
646 config DEFAULT_DCTCP 662 config DEFAULT_DCTCP
647 bool "DCTCP" if TCP_CONG_DCTCP=y 663 bool "DCTCP" if TCP_CONG_DCTCP=y
648 664
665 config DEFAULT_CDG
666 bool "CDG" if TCP_CONG_CDG=y
667
649 config DEFAULT_RENO 668 config DEFAULT_RENO
650 bool "Reno" 669 bool "Reno"
651endchoice 670endchoice
@@ -668,6 +687,7 @@ config DEFAULT_TCP_CONG
668 default "veno" if DEFAULT_VENO 687 default "veno" if DEFAULT_VENO
669 default "reno" if DEFAULT_RENO 688 default "reno" if DEFAULT_RENO
670 default "dctcp" if DEFAULT_DCTCP 689 default "dctcp" if DEFAULT_DCTCP
690 default "cdg" if DEFAULT_CDG
671 default "cubic" 691 default "cubic"
672 692
673config TCP_MD5SIG 693config TCP_MD5SIG
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 518c04ed666e..efc43f300b8c 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
42obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o 42obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
43obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o 43obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
44obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o 44obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
45obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o
45obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o 46obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
46obj-$(CONFIG_TCP_CONG_DCTCP) += tcp_dctcp.o 47obj-$(CONFIG_TCP_CONG_DCTCP) += tcp_dctcp.o
47obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o 48obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o
@@ -56,7 +57,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
56obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o 57obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
57obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o 58obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
58obj-$(CONFIG_NETLABEL) += cipso_ipv4.o 59obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
59obj-$(CONFIG_GENEVE) += geneve.o 60obj-$(CONFIG_GENEVE_CORE) += geneve_core.o
60 61
61obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ 62obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
62 xfrm4_output.o xfrm4_protocol.o 63 xfrm4_output.o xfrm4_protocol.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8b47a4d79d04..9532ee87151f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -228,6 +228,8 @@ int inet_listen(struct socket *sock, int backlog)
228 err = 0; 228 err = 0;
229 if (err) 229 if (err)
230 goto out; 230 goto out;
231
232 tcp_fastopen_init_key_once(true);
231 } 233 }
232 err = inet_csk_listen_start(sk, backlog); 234 err = inet_csk_listen_start(sk, backlog);
233 if (err) 235 if (err)
@@ -317,7 +319,7 @@ lookup_protocol:
317 WARN_ON(!answer_prot->slab); 319 WARN_ON(!answer_prot->slab);
318 320
319 err = -ENOBUFS; 321 err = -ENOBUFS;
320 sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot); 322 sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);
321 if (!sk) 323 if (!sk)
322 goto out; 324 goto out;
323 325
@@ -488,7 +490,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
488 inet->inet_saddr = 0; /* Use device */ 490 inet->inet_saddr = 0; /* Use device */
489 491
490 /* Make sure we are allowed to bind here. */ 492 /* Make sure we are allowed to bind here. */
491 if (sk->sk_prot->get_port(sk, snum)) { 493 if ((snum || !inet->bind_address_no_port) &&
494 sk->sk_prot->get_port(sk, snum)) {
492 inet->inet_saddr = inet->inet_rcv_saddr = 0; 495 inet->inet_saddr = inet->inet_rcv_saddr = 0;
493 err = -EADDRINUSE; 496 err = -EADDRINUSE;
494 goto out_release_sock; 497 goto out_release_sock;
@@ -1430,7 +1433,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
1430 struct net *net) 1433 struct net *net)
1431{ 1434{
1432 struct socket *sock; 1435 struct socket *sock;
1433 int rc = sock_create_kern(family, type, protocol, &sock); 1436 int rc = sock_create_kern(net, family, type, protocol, &sock);
1434 1437
1435 if (rc == 0) { 1438 if (rc == 0) {
1436 *sk = sock->sk; 1439 *sk = sock->sk;
@@ -1440,8 +1443,6 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
1440 * we do not wish this socket to see incoming packets. 1443 * we do not wish this socket to see incoming packets.
1441 */ 1444 */
1442 (*sk)->sk_prot->unhash(*sk); 1445 (*sk)->sk_prot->unhash(*sk);
1443
1444 sk_change_net(*sk, net);
1445 } 1446 }
1446 return rc; 1447 return rc;
1447} 1448}
@@ -1597,7 +1598,7 @@ static __net_init int inet_init_net(struct net *net)
1597 */ 1598 */
1598 seqlock_init(&net->ipv4.ip_local_ports.lock); 1599 seqlock_init(&net->ipv4.ip_local_ports.lock);
1599 net->ipv4.ip_local_ports.range[0] = 32768; 1600 net->ipv4.ip_local_ports.range[0] = 32768;
1600 net->ipv4.ip_local_ports.range[1] = 61000; 1601 net->ipv4.ip_local_ports.range[1] = 60999;
1601 1602
1602 seqlock_init(&net->ipv4.ping_group_range.lock); 1603 seqlock_init(&net->ipv4.ping_group_range.lock);
1603 /* 1604 /*
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 419d23c53ec7..e813196c91c7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1740,6 +1740,8 @@ static int inet_netconf_msgsize_devconf(int type)
1740 size += nla_total_size(4); 1740 size += nla_total_size(4);
1741 if (type == -1 || type == NETCONFA_PROXY_NEIGH) 1741 if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1742 size += nla_total_size(4); 1742 size += nla_total_size(4);
1743 if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1744 size += nla_total_size(4);
1743 1745
1744 return size; 1746 return size;
1745} 1747}
@@ -1780,6 +1782,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1780 nla_put_s32(skb, NETCONFA_PROXY_NEIGH, 1782 nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1781 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) 1783 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1782 goto nla_put_failure; 1784 goto nla_put_failure;
1785 if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1786 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1787 IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1788 goto nla_put_failure;
1783 1789
1784 nlmsg_end(skb, nlh); 1790 nlmsg_end(skb, nlh);
1785 return 0; 1791 return 0;
@@ -1819,6 +1825,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1819 [NETCONFA_FORWARDING] = { .len = sizeof(int) }, 1825 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1820 [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, 1826 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1821 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, 1827 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
1828 [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) },
1822}; 1829};
1823 1830
1824static int inet_netconf_get_devconf(struct sk_buff *in_skb, 1831static int inet_netconf_get_devconf(struct sk_buff *in_skb,
@@ -2048,6 +2055,12 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write,
2048 inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, 2055 inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2049 ifindex, cnf); 2056 ifindex, cnf);
2050 } 2057 }
2058 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2059 new_value != old_value) {
2060 ifindex = devinet_conf_ifindex(net, cnf);
2061 inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2062 ifindex, cnf);
2063 }
2051 } 2064 }
2052 2065
2053 return ret; 2066 return ret;
@@ -2169,6 +2182,8 @@ static struct devinet_sysctl_table {
2169 "igmpv2_unsolicited_report_interval"), 2182 "igmpv2_unsolicited_report_interval"),
2170 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL, 2183 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2171 "igmpv3_unsolicited_report_interval"), 2184 "igmpv3_unsolicited_report_interval"),
2185 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2186 "ignore_routes_with_linkdown"),
2172 2187
2173 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), 2188 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2174 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), 2189 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 30b544f025ac..477937465a20 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -49,7 +49,7 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen)
49 len = ALIGN(len, crypto_tfm_ctx_alignment()); 49 len = ALIGN(len, crypto_tfm_ctx_alignment());
50 } 50 }
51 51
52 len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead); 52 len += sizeof(struct aead_request) + crypto_aead_reqsize(aead);
53 len = ALIGN(len, __alignof__(struct scatterlist)); 53 len = ALIGN(len, __alignof__(struct scatterlist));
54 54
55 len += sizeof(struct scatterlist) * nfrags; 55 len += sizeof(struct scatterlist) * nfrags;
@@ -68,17 +68,6 @@ static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
68 crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; 68 crypto_aead_alignmask(aead) + 1) : tmp + seqhilen;
69} 69}
70 70
71static inline struct aead_givcrypt_request *esp_tmp_givreq(
72 struct crypto_aead *aead, u8 *iv)
73{
74 struct aead_givcrypt_request *req;
75
76 req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
77 crypto_tfm_ctx_alignment());
78 aead_givcrypt_set_tfm(req, aead);
79 return req;
80}
81
82static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv) 71static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv)
83{ 72{
84 struct aead_request *req; 73 struct aead_request *req;
@@ -97,14 +86,6 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
97 __alignof__(struct scatterlist)); 86 __alignof__(struct scatterlist));
98} 87}
99 88
100static inline struct scatterlist *esp_givreq_sg(
101 struct crypto_aead *aead, struct aead_givcrypt_request *req)
102{
103 return (void *)ALIGN((unsigned long)(req + 1) +
104 crypto_aead_reqsize(aead),
105 __alignof__(struct scatterlist));
106}
107
108static void esp_output_done(struct crypto_async_request *base, int err) 89static void esp_output_done(struct crypto_async_request *base, int err)
109{ 90{
110 struct sk_buff *skb = base->data; 91 struct sk_buff *skb = base->data;
@@ -113,14 +94,37 @@ static void esp_output_done(struct crypto_async_request *base, int err)
113 xfrm_output_resume(skb, err); 94 xfrm_output_resume(skb, err);
114} 95}
115 96
97/* Move ESP header back into place. */
98static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
99{
100 struct ip_esp_hdr *esph = (void *)(skb->data + offset);
101 void *tmp = ESP_SKB_CB(skb)->tmp;
102 __be32 *seqhi = esp_tmp_seqhi(tmp);
103
104 esph->seq_no = esph->spi;
105 esph->spi = *seqhi;
106}
107
108static void esp_output_restore_header(struct sk_buff *skb)
109{
110 esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32));
111}
112
113static void esp_output_done_esn(struct crypto_async_request *base, int err)
114{
115 struct sk_buff *skb = base->data;
116
117 esp_output_restore_header(skb);
118 esp_output_done(base, err);
119}
120
116static int esp_output(struct xfrm_state *x, struct sk_buff *skb) 121static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
117{ 122{
118 int err; 123 int err;
119 struct ip_esp_hdr *esph; 124 struct ip_esp_hdr *esph;
120 struct crypto_aead *aead; 125 struct crypto_aead *aead;
121 struct aead_givcrypt_request *req; 126 struct aead_request *req;
122 struct scatterlist *sg; 127 struct scatterlist *sg;
123 struct scatterlist *asg;
124 struct sk_buff *trailer; 128 struct sk_buff *trailer;
125 void *tmp; 129 void *tmp;
126 u8 *iv; 130 u8 *iv;
@@ -129,17 +133,19 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
129 int clen; 133 int clen;
130 int alen; 134 int alen;
131 int plen; 135 int plen;
136 int ivlen;
132 int tfclen; 137 int tfclen;
133 int nfrags; 138 int nfrags;
134 int assoclen; 139 int assoclen;
135 int sglists;
136 int seqhilen; 140 int seqhilen;
137 __be32 *seqhi; 141 __be32 *seqhi;
142 __be64 seqno;
138 143
139 /* skb is pure payload to encrypt */ 144 /* skb is pure payload to encrypt */
140 145
141 aead = x->data; 146 aead = x->data;
142 alen = crypto_aead_authsize(aead); 147 alen = crypto_aead_authsize(aead);
148 ivlen = crypto_aead_ivsize(aead);
143 149
144 tfclen = 0; 150 tfclen = 0;
145 if (x->tfcpad) { 151 if (x->tfcpad) {
@@ -160,16 +166,14 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
160 nfrags = err; 166 nfrags = err;
161 167
162 assoclen = sizeof(*esph); 168 assoclen = sizeof(*esph);
163 sglists = 1;
164 seqhilen = 0; 169 seqhilen = 0;
165 170
166 if (x->props.flags & XFRM_STATE_ESN) { 171 if (x->props.flags & XFRM_STATE_ESN) {
167 sglists += 2;
168 seqhilen += sizeof(__be32); 172 seqhilen += sizeof(__be32);
169 assoclen += seqhilen; 173 assoclen += seqhilen;
170 } 174 }
171 175
172 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); 176 tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
173 if (!tmp) { 177 if (!tmp) {
174 err = -ENOMEM; 178 err = -ENOMEM;
175 goto error; 179 goto error;
@@ -177,9 +181,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
177 181
178 seqhi = esp_tmp_seqhi(tmp); 182 seqhi = esp_tmp_seqhi(tmp);
179 iv = esp_tmp_iv(aead, tmp, seqhilen); 183 iv = esp_tmp_iv(aead, tmp, seqhilen);
180 req = esp_tmp_givreq(aead, iv); 184 req = esp_tmp_req(aead, iv);
181 asg = esp_givreq_sg(aead, req); 185 sg = esp_req_sg(aead, req);
182 sg = asg + sglists;
183 186
184 /* Fill padding... */ 187 /* Fill padding... */
185 tail = skb_tail_pointer(trailer); 188 tail = skb_tail_pointer(trailer);
@@ -235,37 +238,53 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
235 *skb_mac_header(skb) = IPPROTO_UDP; 238 *skb_mac_header(skb) = IPPROTO_UDP;
236 } 239 }
237 240
238 esph->spi = x->id.spi;
239 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); 241 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
240 242
243 aead_request_set_callback(req, 0, esp_output_done, skb);
244
245 /* For ESN we move the header forward by 4 bytes to
246 * accomodate the high bits. We will move it back after
247 * encryption.
248 */
249 if ((x->props.flags & XFRM_STATE_ESN)) {
250 esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
251 *seqhi = esph->spi;
252 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
253 aead_request_set_callback(req, 0, esp_output_done_esn, skb);
254 }
255
256 esph->spi = x->id.spi;
257
241 sg_init_table(sg, nfrags); 258 sg_init_table(sg, nfrags);
242 skb_to_sgvec(skb, sg, 259 skb_to_sgvec(skb, sg,
243 esph->enc_data + crypto_aead_ivsize(aead) - skb->data, 260 (unsigned char *)esph - skb->data,
244 clen + alen); 261 assoclen + ivlen + clen + alen);
245 262
246 if ((x->props.flags & XFRM_STATE_ESN)) { 263 aead_request_set_crypt(req, sg, sg, ivlen + clen, iv);
247 sg_init_table(asg, 3); 264 aead_request_set_ad(req, assoclen);
248 sg_set_buf(asg, &esph->spi, sizeof(__be32)); 265
249 *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); 266 seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
250 sg_set_buf(asg + 1, seqhi, seqhilen); 267 ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
251 sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); 268
252 } else 269 memset(iv, 0, ivlen);
253 sg_init_one(asg, esph, sizeof(*esph)); 270 memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&seqno + 8 - min(ivlen, 8),
254 271 min(ivlen, 8));
255 aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
256 aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
257 aead_givcrypt_set_assoc(req, asg, assoclen);
258 aead_givcrypt_set_giv(req, esph->enc_data,
259 XFRM_SKB_CB(skb)->seq.output.low +
260 ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
261 272
262 ESP_SKB_CB(skb)->tmp = tmp; 273 ESP_SKB_CB(skb)->tmp = tmp;
263 err = crypto_aead_givencrypt(req); 274 err = crypto_aead_encrypt(req);
264 if (err == -EINPROGRESS) 275
276 switch (err) {
277 case -EINPROGRESS:
265 goto error; 278 goto error;
266 279
267 if (err == -EBUSY) 280 case -EBUSY:
268 err = NET_XMIT_DROP; 281 err = NET_XMIT_DROP;
282 break;
283
284 case 0:
285 if ((x->props.flags & XFRM_STATE_ESN))
286 esp_output_restore_header(skb);
287 }
269 288
270 kfree(tmp); 289 kfree(tmp);
271 290
@@ -364,6 +383,20 @@ static void esp_input_done(struct crypto_async_request *base, int err)
364 xfrm_input_resume(skb, esp_input_done2(skb, err)); 383 xfrm_input_resume(skb, esp_input_done2(skb, err));
365} 384}
366 385
386static void esp_input_restore_header(struct sk_buff *skb)
387{
388 esp_restore_header(skb, 0);
389 __skb_pull(skb, 4);
390}
391
392static void esp_input_done_esn(struct crypto_async_request *base, int err)
393{
394 struct sk_buff *skb = base->data;
395
396 esp_input_restore_header(skb);
397 esp_input_done(base, err);
398}
399
367/* 400/*
368 * Note: detecting truncated vs. non-truncated authentication data is very 401 * Note: detecting truncated vs. non-truncated authentication data is very
369 * expensive, so we only support truncated data, which is the recommended 402 * expensive, so we only support truncated data, which is the recommended
@@ -375,19 +408,18 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
375 struct crypto_aead *aead = x->data; 408 struct crypto_aead *aead = x->data;
376 struct aead_request *req; 409 struct aead_request *req;
377 struct sk_buff *trailer; 410 struct sk_buff *trailer;
378 int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); 411 int ivlen = crypto_aead_ivsize(aead);
412 int elen = skb->len - sizeof(*esph) - ivlen;
379 int nfrags; 413 int nfrags;
380 int assoclen; 414 int assoclen;
381 int sglists;
382 int seqhilen; 415 int seqhilen;
383 __be32 *seqhi; 416 __be32 *seqhi;
384 void *tmp; 417 void *tmp;
385 u8 *iv; 418 u8 *iv;
386 struct scatterlist *sg; 419 struct scatterlist *sg;
387 struct scatterlist *asg;
388 int err = -EINVAL; 420 int err = -EINVAL;
389 421
390 if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) 422 if (!pskb_may_pull(skb, sizeof(*esph) + ivlen))
391 goto out; 423 goto out;
392 424
393 if (elen <= 0) 425 if (elen <= 0)
@@ -400,17 +432,15 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
400 nfrags = err; 432 nfrags = err;
401 433
402 assoclen = sizeof(*esph); 434 assoclen = sizeof(*esph);
403 sglists = 1;
404 seqhilen = 0; 435 seqhilen = 0;
405 436
406 if (x->props.flags & XFRM_STATE_ESN) { 437 if (x->props.flags & XFRM_STATE_ESN) {
407 sglists += 2;
408 seqhilen += sizeof(__be32); 438 seqhilen += sizeof(__be32);
409 assoclen += seqhilen; 439 assoclen += seqhilen;
410 } 440 }
411 441
412 err = -ENOMEM; 442 err = -ENOMEM;
413 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); 443 tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
414 if (!tmp) 444 if (!tmp)
415 goto out; 445 goto out;
416 446
@@ -418,36 +448,39 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
418 seqhi = esp_tmp_seqhi(tmp); 448 seqhi = esp_tmp_seqhi(tmp);
419 iv = esp_tmp_iv(aead, tmp, seqhilen); 449 iv = esp_tmp_iv(aead, tmp, seqhilen);
420 req = esp_tmp_req(aead, iv); 450 req = esp_tmp_req(aead, iv);
421 asg = esp_req_sg(aead, req); 451 sg = esp_req_sg(aead, req);
422 sg = asg + sglists;
423 452
424 skb->ip_summed = CHECKSUM_NONE; 453 skb->ip_summed = CHECKSUM_NONE;
425 454
426 esph = (struct ip_esp_hdr *)skb->data; 455 esph = (struct ip_esp_hdr *)skb->data;
427 456
428 /* Get ivec. This can be wrong, check against another impls. */ 457 aead_request_set_callback(req, 0, esp_input_done, skb);
429 iv = esph->enc_data;
430
431 sg_init_table(sg, nfrags);
432 skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen);
433 458
459 /* For ESN we move the header forward by 4 bytes to
460 * accomodate the high bits. We will move it back after
461 * decryption.
462 */
434 if ((x->props.flags & XFRM_STATE_ESN)) { 463 if ((x->props.flags & XFRM_STATE_ESN)) {
435 sg_init_table(asg, 3); 464 esph = (void *)skb_push(skb, 4);
436 sg_set_buf(asg, &esph->spi, sizeof(__be32)); 465 *seqhi = esph->spi;
437 *seqhi = XFRM_SKB_CB(skb)->seq.input.hi; 466 esph->spi = esph->seq_no;
438 sg_set_buf(asg + 1, seqhi, seqhilen); 467 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi);
439 sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); 468 aead_request_set_callback(req, 0, esp_input_done_esn, skb);
440 } else 469 }
441 sg_init_one(asg, esph, sizeof(*esph));
442 470
443 aead_request_set_callback(req, 0, esp_input_done, skb); 471 sg_init_table(sg, nfrags);
444 aead_request_set_crypt(req, sg, sg, elen, iv); 472 skb_to_sgvec(skb, sg, 0, skb->len);
445 aead_request_set_assoc(req, asg, assoclen); 473
474 aead_request_set_crypt(req, sg, sg, elen + ivlen, iv);
475 aead_request_set_ad(req, assoclen);
446 476
447 err = crypto_aead_decrypt(req); 477 err = crypto_aead_decrypt(req);
448 if (err == -EINPROGRESS) 478 if (err == -EINPROGRESS)
449 goto out; 479 goto out;
450 480
481 if ((x->props.flags & XFRM_STATE_ESN))
482 esp_input_restore_header(skb);
483
451 err = esp_input_done2(skb, err); 484 err = esp_input_done2(skb, err);
452 485
453out: 486out:
@@ -519,10 +552,16 @@ static void esp_destroy(struct xfrm_state *x)
519 552
520static int esp_init_aead(struct xfrm_state *x) 553static int esp_init_aead(struct xfrm_state *x)
521{ 554{
555 char aead_name[CRYPTO_MAX_ALG_NAME];
522 struct crypto_aead *aead; 556 struct crypto_aead *aead;
523 int err; 557 int err;
524 558
525 aead = crypto_alloc_aead(x->aead->alg_name, 0, 0); 559 err = -ENAMETOOLONG;
560 if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
561 x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
562 goto error;
563
564 aead = crypto_alloc_aead(aead_name, 0, 0);
526 err = PTR_ERR(aead); 565 err = PTR_ERR(aead);
527 if (IS_ERR(aead)) 566 if (IS_ERR(aead))
528 goto error; 567 goto error;
@@ -561,15 +600,19 @@ static int esp_init_authenc(struct xfrm_state *x)
561 600
562 if ((x->props.flags & XFRM_STATE_ESN)) { 601 if ((x->props.flags & XFRM_STATE_ESN)) {
563 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, 602 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
564 "authencesn(%s,%s)", 603 "%s%sauthencesn(%s,%s)%s",
604 x->geniv ?: "", x->geniv ? "(" : "",
565 x->aalg ? x->aalg->alg_name : "digest_null", 605 x->aalg ? x->aalg->alg_name : "digest_null",
566 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) 606 x->ealg->alg_name,
607 x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME)
567 goto error; 608 goto error;
568 } else { 609 } else {
569 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, 610 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
570 "authenc(%s,%s)", 611 "%s%sauthenc(%s,%s)%s",
612 x->geniv ?: "", x->geniv ? "(" : "",
571 x->aalg ? x->aalg->alg_name : "digest_null", 613 x->aalg ? x->aalg->alg_name : "digest_null",
572 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) 614 x->ealg->alg_name,
615 x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME)
573 goto error; 616 goto error;
574 } 617 }
575 618
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 872494e6e6eb..6bbc54940eb4 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -280,7 +280,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
280 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); 280 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
281 fl4.flowi4_scope = scope; 281 fl4.flowi4_scope = scope;
282 fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; 282 fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
283 if (!fib_lookup(net, &fl4, &res)) 283 if (!fib_lookup(net, &fl4, &res, 0))
284 return FIB_RES_PREFSRC(net, res); 284 return FIB_RES_PREFSRC(net, res);
285 } else { 285 } else {
286 scope = RT_SCOPE_LINK; 286 scope = RT_SCOPE_LINK;
@@ -319,7 +319,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
319 fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0; 319 fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0;
320 320
321 net = dev_net(dev); 321 net = dev_net(dev);
322 if (fib_lookup(net, &fl4, &res)) 322 if (fib_lookup(net, &fl4, &res, 0))
323 goto last_resort; 323 goto last_resort;
324 if (res.type != RTN_UNICAST && 324 if (res.type != RTN_UNICAST &&
325 (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev))) 325 (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
@@ -354,7 +354,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
354 fl4.flowi4_oif = dev->ifindex; 354 fl4.flowi4_oif = dev->ifindex;
355 355
356 ret = 0; 356 ret = 0;
357 if (fib_lookup(net, &fl4, &res) == 0) { 357 if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) {
358 if (res.type == RTN_UNICAST) 358 if (res.type == RTN_UNICAST)
359 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 359 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
360 } 360 }
@@ -1063,9 +1063,9 @@ static void nl_fib_lookup_exit(struct net *net)
1063 net->ipv4.fibnl = NULL; 1063 net->ipv4.fibnl = NULL;
1064} 1064}
1065 1065
1066static void fib_disable_ip(struct net_device *dev, int force) 1066static void fib_disable_ip(struct net_device *dev, unsigned long event)
1067{ 1067{
1068 if (fib_sync_down_dev(dev, force)) 1068 if (fib_sync_down_dev(dev, event))
1069 fib_flush(dev_net(dev)); 1069 fib_flush(dev_net(dev));
1070 rt_cache_flush(dev_net(dev)); 1070 rt_cache_flush(dev_net(dev));
1071 arp_ifdown(dev); 1071 arp_ifdown(dev);
@@ -1081,7 +1081,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
1081 case NETDEV_UP: 1081 case NETDEV_UP:
1082 fib_add_ifaddr(ifa); 1082 fib_add_ifaddr(ifa);
1083#ifdef CONFIG_IP_ROUTE_MULTIPATH 1083#ifdef CONFIG_IP_ROUTE_MULTIPATH
1084 fib_sync_up(dev); 1084 fib_sync_up(dev, RTNH_F_DEAD);
1085#endif 1085#endif
1086 atomic_inc(&net->ipv4.dev_addr_genid); 1086 atomic_inc(&net->ipv4.dev_addr_genid);
1087 rt_cache_flush(dev_net(dev)); 1087 rt_cache_flush(dev_net(dev));
@@ -1093,7 +1093,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
1093 /* Last address was deleted from this interface. 1093 /* Last address was deleted from this interface.
1094 * Disable IP. 1094 * Disable IP.
1095 */ 1095 */
1096 fib_disable_ip(dev, 1); 1096 fib_disable_ip(dev, event);
1097 } else { 1097 } else {
1098 rt_cache_flush(dev_net(dev)); 1098 rt_cache_flush(dev_net(dev));
1099 } 1099 }
@@ -1107,9 +1107,10 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
1107 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1107 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1108 struct in_device *in_dev; 1108 struct in_device *in_dev;
1109 struct net *net = dev_net(dev); 1109 struct net *net = dev_net(dev);
1110 unsigned int flags;
1110 1111
1111 if (event == NETDEV_UNREGISTER) { 1112 if (event == NETDEV_UNREGISTER) {
1112 fib_disable_ip(dev, 2); 1113 fib_disable_ip(dev, event);
1113 rt_flush_dev(dev); 1114 rt_flush_dev(dev);
1114 return NOTIFY_DONE; 1115 return NOTIFY_DONE;
1115 } 1116 }
@@ -1124,16 +1125,22 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
1124 fib_add_ifaddr(ifa); 1125 fib_add_ifaddr(ifa);
1125 } endfor_ifa(in_dev); 1126 } endfor_ifa(in_dev);
1126#ifdef CONFIG_IP_ROUTE_MULTIPATH 1127#ifdef CONFIG_IP_ROUTE_MULTIPATH
1127 fib_sync_up(dev); 1128 fib_sync_up(dev, RTNH_F_DEAD);
1128#endif 1129#endif
1129 atomic_inc(&net->ipv4.dev_addr_genid); 1130 atomic_inc(&net->ipv4.dev_addr_genid);
1130 rt_cache_flush(net); 1131 rt_cache_flush(net);
1131 break; 1132 break;
1132 case NETDEV_DOWN: 1133 case NETDEV_DOWN:
1133 fib_disable_ip(dev, 0); 1134 fib_disable_ip(dev, event);
1134 break; 1135 break;
1135 case NETDEV_CHANGEMTU:
1136 case NETDEV_CHANGE: 1136 case NETDEV_CHANGE:
1137 flags = dev_get_flags(dev);
1138 if (flags & (IFF_RUNNING | IFF_LOWER_UP))
1139 fib_sync_up(dev, RTNH_F_LINKDOWN);
1140 else
1141 fib_sync_down_dev(dev, event);
1142 /* fall through */
1143 case NETDEV_CHANGEMTU:
1137 rt_cache_flush(net); 1144 rt_cache_flush(net);
1138 break; 1145 break;
1139 } 1146 }
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 56151982f74e..18123d50f576 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -47,11 +47,12 @@ struct fib4_rule {
47#endif 47#endif
48}; 48};
49 49
50int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) 50int __fib_lookup(struct net *net, struct flowi4 *flp,
51 struct fib_result *res, unsigned int flags)
51{ 52{
52 struct fib_lookup_arg arg = { 53 struct fib_lookup_arg arg = {
53 .result = res, 54 .result = res,
54 .flags = FIB_LOOKUP_NOREF, 55 .flags = flags,
55 }; 56 };
56 int err; 57 int err;
57 58
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 8d695b6659c7..c7358ea4ae93 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -266,7 +266,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
266#ifdef CONFIG_IP_ROUTE_CLASSID 266#ifdef CONFIG_IP_ROUTE_CLASSID
267 nh->nh_tclassid != onh->nh_tclassid || 267 nh->nh_tclassid != onh->nh_tclassid ||
268#endif 268#endif
269 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) 269 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK))
270 return -1; 270 return -1;
271 onh++; 271 onh++;
272 } endfor_nexthops(fi); 272 } endfor_nexthops(fi);
@@ -318,7 +318,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
318 nfi->fib_type == fi->fib_type && 318 nfi->fib_type == fi->fib_type &&
319 memcmp(nfi->fib_metrics, fi->fib_metrics, 319 memcmp(nfi->fib_metrics, fi->fib_metrics,
320 sizeof(u32) * RTAX_MAX) == 0 && 320 sizeof(u32) * RTAX_MAX) == 0 &&
321 ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 && 321 !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
322 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 322 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
323 return fi; 323 return fi;
324 } 324 }
@@ -604,6 +604,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
604 return -ENODEV; 604 return -ENODEV;
605 if (!(dev->flags & IFF_UP)) 605 if (!(dev->flags & IFF_UP))
606 return -ENETDOWN; 606 return -ENETDOWN;
607 if (!netif_carrier_ok(dev))
608 nh->nh_flags |= RTNH_F_LINKDOWN;
607 nh->nh_dev = dev; 609 nh->nh_dev = dev;
608 dev_hold(dev); 610 dev_hold(dev);
609 nh->nh_scope = RT_SCOPE_LINK; 611 nh->nh_scope = RT_SCOPE_LINK;
@@ -621,7 +623,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
621 /* It is not necessary, but requires a bit of thinking */ 623 /* It is not necessary, but requires a bit of thinking */
622 if (fl4.flowi4_scope < RT_SCOPE_LINK) 624 if (fl4.flowi4_scope < RT_SCOPE_LINK)
623 fl4.flowi4_scope = RT_SCOPE_LINK; 625 fl4.flowi4_scope = RT_SCOPE_LINK;
624 err = fib_lookup(net, &fl4, &res); 626 err = fib_lookup(net, &fl4, &res,
627 FIB_LOOKUP_IGNORE_LINKSTATE);
625 if (err) { 628 if (err) {
626 rcu_read_unlock(); 629 rcu_read_unlock();
627 return err; 630 return err;
@@ -636,6 +639,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
636 if (!dev) 639 if (!dev)
637 goto out; 640 goto out;
638 dev_hold(dev); 641 dev_hold(dev);
642 if (!netif_carrier_ok(dev))
643 nh->nh_flags |= RTNH_F_LINKDOWN;
639 err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; 644 err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
640 } else { 645 } else {
641 struct in_device *in_dev; 646 struct in_device *in_dev;
@@ -654,6 +659,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
654 nh->nh_dev = in_dev->dev; 659 nh->nh_dev = in_dev->dev;
655 dev_hold(nh->nh_dev); 660 dev_hold(nh->nh_dev);
656 nh->nh_scope = RT_SCOPE_HOST; 661 nh->nh_scope = RT_SCOPE_HOST;
662 if (!netif_carrier_ok(nh->nh_dev))
663 nh->nh_flags |= RTNH_F_LINKDOWN;
657 err = 0; 664 err = 0;
658 } 665 }
659out: 666out:
@@ -713,8 +720,6 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
713 struct hlist_head *dest; 720 struct hlist_head *dest;
714 unsigned int new_hash; 721 unsigned int new_hash;
715 722
716 hlist_del(&fi->fib_hash);
717
718 new_hash = fib_info_hashfn(fi); 723 new_hash = fib_info_hashfn(fi);
719 dest = &new_info_hash[new_hash]; 724 dest = &new_info_hash[new_hash];
720 hlist_add_head(&fi->fib_hash, dest); 725 hlist_add_head(&fi->fib_hash, dest);
@@ -731,8 +736,6 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
731 struct hlist_head *ldest; 736 struct hlist_head *ldest;
732 unsigned int new_hash; 737 unsigned int new_hash;
733 738
734 hlist_del(&fi->fib_lhash);
735
736 new_hash = fib_laddr_hashfn(fi->fib_prefsrc); 739 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
737 ldest = &new_laddrhash[new_hash]; 740 ldest = &new_laddrhash[new_hash];
738 hlist_add_head(&fi->fib_lhash, ldest); 741 hlist_add_head(&fi->fib_lhash, ldest);
@@ -924,11 +927,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
924 if (!nh->nh_dev) 927 if (!nh->nh_dev)
925 goto failure; 928 goto failure;
926 } else { 929 } else {
930 int linkdown = 0;
931
927 change_nexthops(fi) { 932 change_nexthops(fi) {
928 err = fib_check_nh(cfg, fi, nexthop_nh); 933 err = fib_check_nh(cfg, fi, nexthop_nh);
929 if (err != 0) 934 if (err != 0)
930 goto failure; 935 goto failure;
936 if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
937 linkdown++;
931 } endfor_nexthops(fi) 938 } endfor_nexthops(fi)
939 if (linkdown == fi->fib_nhs)
940 fi->fib_flags |= RTNH_F_LINKDOWN;
932 } 941 }
933 942
934 if (fi->fib_prefsrc) { 943 if (fi->fib_prefsrc) {
@@ -1027,12 +1036,20 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
1027 nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) 1036 nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
1028 goto nla_put_failure; 1037 goto nla_put_failure;
1029 if (fi->fib_nhs == 1) { 1038 if (fi->fib_nhs == 1) {
1039 struct in_device *in_dev;
1040
1030 if (fi->fib_nh->nh_gw && 1041 if (fi->fib_nh->nh_gw &&
1031 nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) 1042 nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
1032 goto nla_put_failure; 1043 goto nla_put_failure;
1033 if (fi->fib_nh->nh_oif && 1044 if (fi->fib_nh->nh_oif &&
1034 nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) 1045 nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
1035 goto nla_put_failure; 1046 goto nla_put_failure;
1047 if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
1048 in_dev = __in_dev_get_rtnl(fi->fib_nh->nh_dev);
1049 if (in_dev &&
1050 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
1051 rtm->rtm_flags |= RTNH_F_DEAD;
1052 }
1036#ifdef CONFIG_IP_ROUTE_CLASSID 1053#ifdef CONFIG_IP_ROUTE_CLASSID
1037 if (fi->fib_nh[0].nh_tclassid && 1054 if (fi->fib_nh[0].nh_tclassid &&
1038 nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) 1055 nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
@@ -1049,11 +1066,19 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
1049 goto nla_put_failure; 1066 goto nla_put_failure;
1050 1067
1051 for_nexthops(fi) { 1068 for_nexthops(fi) {
1069 struct in_device *in_dev;
1070
1052 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 1071 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1053 if (!rtnh) 1072 if (!rtnh)
1054 goto nla_put_failure; 1073 goto nla_put_failure;
1055 1074
1056 rtnh->rtnh_flags = nh->nh_flags & 0xFF; 1075 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1076 if (nh->nh_flags & RTNH_F_LINKDOWN) {
1077 in_dev = __in_dev_get_rtnl(nh->nh_dev);
1078 if (in_dev &&
1079 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
1080 rtnh->rtnh_flags |= RTNH_F_DEAD;
1081 }
1057 rtnh->rtnh_hops = nh->nh_weight - 1; 1082 rtnh->rtnh_hops = nh->nh_weight - 1;
1058 rtnh->rtnh_ifindex = nh->nh_oif; 1083 rtnh->rtnh_ifindex = nh->nh_oif;
1059 1084
@@ -1107,7 +1132,7 @@ int fib_sync_down_addr(struct net *net, __be32 local)
1107 return ret; 1132 return ret;
1108} 1133}
1109 1134
1110int fib_sync_down_dev(struct net_device *dev, int force) 1135int fib_sync_down_dev(struct net_device *dev, unsigned long event)
1111{ 1136{
1112 int ret = 0; 1137 int ret = 0;
1113 int scope = RT_SCOPE_NOWHERE; 1138 int scope = RT_SCOPE_NOWHERE;
@@ -1116,7 +1141,8 @@ int fib_sync_down_dev(struct net_device *dev, int force)
1116 struct hlist_head *head = &fib_info_devhash[hash]; 1141 struct hlist_head *head = &fib_info_devhash[hash];
1117 struct fib_nh *nh; 1142 struct fib_nh *nh;
1118 1143
1119 if (force) 1144 if (event == NETDEV_UNREGISTER ||
1145 event == NETDEV_DOWN)
1120 scope = -1; 1146 scope = -1;
1121 1147
1122 hlist_for_each_entry(nh, head, nh_hash) { 1148 hlist_for_each_entry(nh, head, nh_hash) {
@@ -1133,7 +1159,15 @@ int fib_sync_down_dev(struct net_device *dev, int force)
1133 dead++; 1159 dead++;
1134 else if (nexthop_nh->nh_dev == dev && 1160 else if (nexthop_nh->nh_dev == dev &&
1135 nexthop_nh->nh_scope != scope) { 1161 nexthop_nh->nh_scope != scope) {
1136 nexthop_nh->nh_flags |= RTNH_F_DEAD; 1162 switch (event) {
1163 case NETDEV_DOWN:
1164 case NETDEV_UNREGISTER:
1165 nexthop_nh->nh_flags |= RTNH_F_DEAD;
1166 /* fall through */
1167 case NETDEV_CHANGE:
1168 nexthop_nh->nh_flags |= RTNH_F_LINKDOWN;
1169 break;
1170 }
1137#ifdef CONFIG_IP_ROUTE_MULTIPATH 1171#ifdef CONFIG_IP_ROUTE_MULTIPATH
1138 spin_lock_bh(&fib_multipath_lock); 1172 spin_lock_bh(&fib_multipath_lock);
1139 fi->fib_power -= nexthop_nh->nh_power; 1173 fi->fib_power -= nexthop_nh->nh_power;
@@ -1143,14 +1177,23 @@ int fib_sync_down_dev(struct net_device *dev, int force)
1143 dead++; 1177 dead++;
1144 } 1178 }
1145#ifdef CONFIG_IP_ROUTE_MULTIPATH 1179#ifdef CONFIG_IP_ROUTE_MULTIPATH
1146 if (force > 1 && nexthop_nh->nh_dev == dev) { 1180 if (event == NETDEV_UNREGISTER &&
1181 nexthop_nh->nh_dev == dev) {
1147 dead = fi->fib_nhs; 1182 dead = fi->fib_nhs;
1148 break; 1183 break;
1149 } 1184 }
1150#endif 1185#endif
1151 } endfor_nexthops(fi) 1186 } endfor_nexthops(fi)
1152 if (dead == fi->fib_nhs) { 1187 if (dead == fi->fib_nhs) {
1153 fi->fib_flags |= RTNH_F_DEAD; 1188 switch (event) {
1189 case NETDEV_DOWN:
1190 case NETDEV_UNREGISTER:
1191 fi->fib_flags |= RTNH_F_DEAD;
1192 /* fall through */
1193 case NETDEV_CHANGE:
1194 fi->fib_flags |= RTNH_F_LINKDOWN;
1195 break;
1196 }
1154 ret++; 1197 ret++;
1155 } 1198 }
1156 } 1199 }
@@ -1214,13 +1257,11 @@ out:
1214 return; 1257 return;
1215} 1258}
1216 1259
1217#ifdef CONFIG_IP_ROUTE_MULTIPATH
1218
1219/* 1260/*
1220 * Dead device goes up. We wake up dead nexthops. 1261 * Dead device goes up. We wake up dead nexthops.
1221 * It takes sense only on multipath routes. 1262 * It takes sense only on multipath routes.
1222 */ 1263 */
1223int fib_sync_up(struct net_device *dev) 1264int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
1224{ 1265{
1225 struct fib_info *prev_fi; 1266 struct fib_info *prev_fi;
1226 unsigned int hash; 1267 unsigned int hash;
@@ -1247,7 +1288,7 @@ int fib_sync_up(struct net_device *dev)
1247 prev_fi = fi; 1288 prev_fi = fi;
1248 alive = 0; 1289 alive = 0;
1249 change_nexthops(fi) { 1290 change_nexthops(fi) {
1250 if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) { 1291 if (!(nexthop_nh->nh_flags & nh_flags)) {
1251 alive++; 1292 alive++;
1252 continue; 1293 continue;
1253 } 1294 }
@@ -1258,14 +1299,18 @@ int fib_sync_up(struct net_device *dev)
1258 !__in_dev_get_rtnl(dev)) 1299 !__in_dev_get_rtnl(dev))
1259 continue; 1300 continue;
1260 alive++; 1301 alive++;
1302#ifdef CONFIG_IP_ROUTE_MULTIPATH
1261 spin_lock_bh(&fib_multipath_lock); 1303 spin_lock_bh(&fib_multipath_lock);
1262 nexthop_nh->nh_power = 0; 1304 nexthop_nh->nh_power = 0;
1263 nexthop_nh->nh_flags &= ~RTNH_F_DEAD; 1305 nexthop_nh->nh_flags &= ~nh_flags;
1264 spin_unlock_bh(&fib_multipath_lock); 1306 spin_unlock_bh(&fib_multipath_lock);
1307#else
1308 nexthop_nh->nh_flags &= ~nh_flags;
1309#endif
1265 } endfor_nexthops(fi) 1310 } endfor_nexthops(fi)
1266 1311
1267 if (alive > 0) { 1312 if (alive > 0) {
1268 fi->fib_flags &= ~RTNH_F_DEAD; 1313 fi->fib_flags &= ~nh_flags;
1269 ret++; 1314 ret++;
1270 } 1315 }
1271 } 1316 }
@@ -1273,6 +1318,8 @@ int fib_sync_up(struct net_device *dev)
1273 return ret; 1318 return ret;
1274} 1319}
1275 1320
1321#ifdef CONFIG_IP_ROUTE_MULTIPATH
1322
1276/* 1323/*
1277 * The algorithm is suboptimal, but it provides really 1324 * The algorithm is suboptimal, but it provides really
1278 * fair weighted route distribution. 1325 * fair weighted route distribution.
@@ -1280,16 +1327,22 @@ int fib_sync_up(struct net_device *dev)
1280void fib_select_multipath(struct fib_result *res) 1327void fib_select_multipath(struct fib_result *res)
1281{ 1328{
1282 struct fib_info *fi = res->fi; 1329 struct fib_info *fi = res->fi;
1330 struct in_device *in_dev;
1283 int w; 1331 int w;
1284 1332
1285 spin_lock_bh(&fib_multipath_lock); 1333 spin_lock_bh(&fib_multipath_lock);
1286 if (fi->fib_power <= 0) { 1334 if (fi->fib_power <= 0) {
1287 int power = 0; 1335 int power = 0;
1288 change_nexthops(fi) { 1336 change_nexthops(fi) {
1289 if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) { 1337 in_dev = __in_dev_get_rcu(nexthop_nh->nh_dev);
1290 power += nexthop_nh->nh_weight; 1338 if (nexthop_nh->nh_flags & RTNH_F_DEAD)
1291 nexthop_nh->nh_power = nexthop_nh->nh_weight; 1339 continue;
1292 } 1340 if (in_dev &&
1341 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1342 nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
1343 continue;
1344 power += nexthop_nh->nh_weight;
1345 nexthop_nh->nh_power = nexthop_nh->nh_weight;
1293 } endfor_nexthops(fi); 1346 } endfor_nexthops(fi);
1294 fi->fib_power = power; 1347 fi->fib_power = power;
1295 if (power <= 0) { 1348 if (power <= 0) {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 09b62e17dd8c..15d32612e3c6 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -72,6 +72,7 @@
72#include <linux/list.h> 72#include <linux/list.h>
73#include <linux/slab.h> 73#include <linux/slab.h>
74#include <linux/export.h> 74#include <linux/export.h>
75#include <linux/vmalloc.h>
75#include <net/net_namespace.h> 76#include <net/net_namespace.h>
76#include <net/ip.h> 77#include <net/ip.h>
77#include <net/protocol.h> 78#include <net/protocol.h>
@@ -324,13 +325,15 @@ static inline void empty_child_dec(struct key_vector *n)
324 325
325static struct key_vector *leaf_new(t_key key, struct fib_alias *fa) 326static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
326{ 327{
327 struct tnode *kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); 328 struct key_vector *l;
328 struct key_vector *l = kv->kv; 329 struct tnode *kv;
329 330
331 kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
330 if (!kv) 332 if (!kv)
331 return NULL; 333 return NULL;
332 334
333 /* initialize key vector */ 335 /* initialize key vector */
336 l = kv->kv;
334 l->key = key; 337 l->key = key;
335 l->pos = 0; 338 l->pos = 0;
336 l->bits = 0; 339 l->bits = 0;
@@ -345,24 +348,26 @@ static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
345 348
346static struct key_vector *tnode_new(t_key key, int pos, int bits) 349static struct key_vector *tnode_new(t_key key, int pos, int bits)
347{ 350{
348 struct tnode *tnode = tnode_alloc(bits);
349 unsigned int shift = pos + bits; 351 unsigned int shift = pos + bits;
350 struct key_vector *tn = tnode->kv; 352 struct key_vector *tn;
353 struct tnode *tnode;
351 354
352 /* verify bits and pos their msb bits clear and values are valid */ 355 /* verify bits and pos their msb bits clear and values are valid */
353 BUG_ON(!bits || (shift > KEYLENGTH)); 356 BUG_ON(!bits || (shift > KEYLENGTH));
354 357
355 pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0), 358 tnode = tnode_alloc(bits);
356 sizeof(struct key_vector *) << bits);
357
358 if (!tnode) 359 if (!tnode)
359 return NULL; 360 return NULL;
360 361
362 pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0),
363 sizeof(struct key_vector *) << bits);
364
361 if (bits == KEYLENGTH) 365 if (bits == KEYLENGTH)
362 tnode->full_children = 1; 366 tnode->full_children = 1;
363 else 367 else
364 tnode->empty_children = 1ul << bits; 368 tnode->empty_children = 1ul << bits;
365 369
370 tn = tnode->kv;
366 tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0; 371 tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
367 tn->pos = pos; 372 tn->pos = pos;
368 tn->bits = bits; 373 tn->bits = bits;
@@ -1077,6 +1082,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1077 struct trie *t = (struct trie *)tb->tb_data; 1082 struct trie *t = (struct trie *)tb->tb_data;
1078 struct fib_alias *fa, *new_fa; 1083 struct fib_alias *fa, *new_fa;
1079 struct key_vector *l, *tp; 1084 struct key_vector *l, *tp;
1085 unsigned int nlflags = 0;
1080 struct fib_info *fi; 1086 struct fib_info *fi;
1081 u8 plen = cfg->fc_dst_len; 1087 u8 plen = cfg->fc_dst_len;
1082 u8 slen = KEYLENGTH - plen; 1088 u8 slen = KEYLENGTH - plen;
@@ -1166,13 +1172,13 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1166 new_fa->fa_slen = fa->fa_slen; 1172 new_fa->fa_slen = fa->fa_slen;
1167 new_fa->tb_id = tb->tb_id; 1173 new_fa->tb_id = tb->tb_id;
1168 1174
1169 err = netdev_switch_fib_ipv4_add(key, plen, fi, 1175 err = switchdev_fib_ipv4_add(key, plen, fi,
1170 new_fa->fa_tos, 1176 new_fa->fa_tos,
1171 cfg->fc_type, 1177 cfg->fc_type,
1172 cfg->fc_nlflags, 1178 cfg->fc_nlflags,
1173 tb->tb_id); 1179 tb->tb_id);
1174 if (err) { 1180 if (err) {
1175 netdev_switch_fib_ipv4_abort(fi); 1181 switchdev_fib_ipv4_abort(fi);
1176 kmem_cache_free(fn_alias_kmem, new_fa); 1182 kmem_cache_free(fn_alias_kmem, new_fa);
1177 goto out; 1183 goto out;
1178 } 1184 }
@@ -1196,7 +1202,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1196 if (fa_match) 1202 if (fa_match)
1197 goto out; 1203 goto out;
1198 1204
1199 if (!(cfg->fc_nlflags & NLM_F_APPEND)) 1205 if (cfg->fc_nlflags & NLM_F_APPEND)
1206 nlflags = NLM_F_APPEND;
1207 else
1200 fa = fa_first; 1208 fa = fa_first;
1201 } 1209 }
1202 err = -ENOENT; 1210 err = -ENOENT;
@@ -1216,12 +1224,10 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1216 new_fa->tb_id = tb->tb_id; 1224 new_fa->tb_id = tb->tb_id;
1217 1225
1218 /* (Optionally) offload fib entry to switch hardware. */ 1226 /* (Optionally) offload fib entry to switch hardware. */
1219 err = netdev_switch_fib_ipv4_add(key, plen, fi, tos, 1227 err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type,
1220 cfg->fc_type, 1228 cfg->fc_nlflags, tb->tb_id);
1221 cfg->fc_nlflags,
1222 tb->tb_id);
1223 if (err) { 1229 if (err) {
1224 netdev_switch_fib_ipv4_abort(fi); 1230 switchdev_fib_ipv4_abort(fi);
1225 goto out_free_new_fa; 1231 goto out_free_new_fa;
1226 } 1232 }
1227 1233
@@ -1235,12 +1241,12 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1235 1241
1236 rt_cache_flush(cfg->fc_nlinfo.nl_net); 1242 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1237 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, 1243 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
1238 &cfg->fc_nlinfo, 0); 1244 &cfg->fc_nlinfo, nlflags);
1239succeeded: 1245succeeded:
1240 return 0; 1246 return 0;
1241 1247
1242out_sw_fib_del: 1248out_sw_fib_del:
1243 netdev_switch_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); 1249 switchdev_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id);
1244out_free_new_fa: 1250out_free_new_fa:
1245 kmem_cache_free(fn_alias_kmem, new_fa); 1251 kmem_cache_free(fn_alias_kmem, new_fa);
1246out: 1252out:
@@ -1406,9 +1412,15 @@ found:
1406 continue; 1412 continue;
1407 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { 1413 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
1408 const struct fib_nh *nh = &fi->fib_nh[nhsel]; 1414 const struct fib_nh *nh = &fi->fib_nh[nhsel];
1415 struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev);
1409 1416
1410 if (nh->nh_flags & RTNH_F_DEAD) 1417 if (nh->nh_flags & RTNH_F_DEAD)
1411 continue; 1418 continue;
1419 if (in_dev &&
1420 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1421 nh->nh_flags & RTNH_F_LINKDOWN &&
1422 !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
1423 continue;
1412 if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) 1424 if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
1413 continue; 1425 continue;
1414 1426
@@ -1518,8 +1530,8 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
1518 if (!fa_to_delete) 1530 if (!fa_to_delete)
1519 return -ESRCH; 1531 return -ESRCH;
1520 1532
1521 netdev_switch_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, 1533 switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos,
1522 cfg->fc_type, tb->tb_id); 1534 cfg->fc_type, tb->tb_id);
1523 1535
1524 rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, 1536 rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
1525 &cfg->fc_nlinfo, 0); 1537 &cfg->fc_nlinfo, 0);
@@ -1768,10 +1780,9 @@ void fib_table_flush_external(struct fib_table *tb)
1768 if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD)) 1780 if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD))
1769 continue; 1781 continue;
1770 1782
1771 netdev_switch_fib_ipv4_del(n->key, 1783 switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen,
1772 KEYLENGTH - fa->fa_slen, 1784 fi, fa->fa_tos, fa->fa_type,
1773 fi, fa->fa_tos, 1785 tb->tb_id);
1774 fa->fa_type, tb->tb_id);
1775 } 1786 }
1776 1787
1777 /* update leaf slen */ 1788 /* update leaf slen */
@@ -1836,10 +1847,9 @@ int fib_table_flush(struct fib_table *tb)
1836 continue; 1847 continue;
1837 } 1848 }
1838 1849
1839 netdev_switch_fib_ipv4_del(n->key, 1850 switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen,
1840 KEYLENGTH - fa->fa_slen, 1851 fi, fa->fa_tos, fa->fa_type,
1841 fi, fa->fa_tos, 1852 tb->tb_id);
1842 fa->fa_type, tb->tb_id);
1843 hlist_del_rcu(&fa->fa_list); 1853 hlist_del_rcu(&fa->fa_list);
1844 fib_release_info(fa->fa_info); 1854 fib_release_info(fa->fa_info);
1845 alias_free_mem_rcu(fa); 1855 alias_free_mem_rcu(fa);
@@ -2057,11 +2067,12 @@ static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter)
2057static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter, 2067static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter,
2058 struct trie *t) 2068 struct trie *t)
2059{ 2069{
2060 struct key_vector *n, *pn = t->kv; 2070 struct key_vector *n, *pn;
2061 2071
2062 if (!t) 2072 if (!t)
2063 return NULL; 2073 return NULL;
2064 2074
2075 pn = t->kv;
2065 n = rcu_dereference(pn->tnode[0]); 2076 n = rcu_dereference(pn->tnode[0]);
2066 if (!n) 2077 if (!n)
2067 return NULL; 2078 return NULL;
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve_core.c
index 8986e63f3bda..311a4ba6950a 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve_core.c
@@ -60,11 +60,6 @@ struct geneve_net {
60 60
61static int geneve_net_id; 61static int geneve_net_id;
62 62
63static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
64{
65 return (struct genevehdr *)(udp_hdr(skb) + 1);
66}
67
68static struct geneve_sock *geneve_find_sock(struct net *net, 63static struct geneve_sock *geneve_find_sock(struct net *net,
69 sa_family_t family, __be16 port) 64 sa_family_t family, __be16 port)
70{ 65{
@@ -435,7 +430,7 @@ static int __init geneve_init_module(void)
435 if (rc) 430 if (rc)
436 return rc; 431 return rc;
437 432
438 pr_info("Geneve driver\n"); 433 pr_info("Geneve core logic\n");
439 434
440 return 0; 435 return 0;
441} 436}
@@ -449,5 +444,4 @@ module_exit(geneve_cleanup_module);
449 444
450MODULE_LICENSE("GPL"); 445MODULE_LICENSE("GPL");
451MODULE_AUTHOR("Jesse Gross <jesse@nicira.com>"); 446MODULE_AUTHOR("Jesse Gross <jesse@nicira.com>");
452MODULE_DESCRIPTION("Driver for GENEVE encapsulated traffic"); 447MODULE_DESCRIPTION("Driver library for GENEVE encapsulated traffic");
453MODULE_ALIAS_RTNL_LINK("geneve");
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index a3a697f5ffba..651cdf648ec4 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1339,6 +1339,168 @@ out:
1339} 1339}
1340EXPORT_SYMBOL(ip_mc_inc_group); 1340EXPORT_SYMBOL(ip_mc_inc_group);
1341 1341
1342static int ip_mc_check_iphdr(struct sk_buff *skb)
1343{
1344 const struct iphdr *iph;
1345 unsigned int len;
1346 unsigned int offset = skb_network_offset(skb) + sizeof(*iph);
1347
1348 if (!pskb_may_pull(skb, offset))
1349 return -EINVAL;
1350
1351 iph = ip_hdr(skb);
1352
1353 if (iph->version != 4 || ip_hdrlen(skb) < sizeof(*iph))
1354 return -EINVAL;
1355
1356 offset += ip_hdrlen(skb) - sizeof(*iph);
1357
1358 if (!pskb_may_pull(skb, offset))
1359 return -EINVAL;
1360
1361 iph = ip_hdr(skb);
1362
1363 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
1364 return -EINVAL;
1365
1366 len = skb_network_offset(skb) + ntohs(iph->tot_len);
1367 if (skb->len < len || len < offset)
1368 return -EINVAL;
1369
1370 skb_set_transport_header(skb, offset);
1371
1372 return 0;
1373}
1374
1375static int ip_mc_check_igmp_reportv3(struct sk_buff *skb)
1376{
1377 unsigned int len = skb_transport_offset(skb);
1378
1379 len += sizeof(struct igmpv3_report);
1380
1381 return pskb_may_pull(skb, len) ? 0 : -EINVAL;
1382}
1383
1384static int ip_mc_check_igmp_query(struct sk_buff *skb)
1385{
1386 unsigned int len = skb_transport_offset(skb);
1387
1388 len += sizeof(struct igmphdr);
1389 if (skb->len < len)
1390 return -EINVAL;
1391
1392 /* IGMPv{1,2}? */
1393 if (skb->len != len) {
1394 /* or IGMPv3? */
1395 len += sizeof(struct igmpv3_query) - sizeof(struct igmphdr);
1396 if (skb->len < len || !pskb_may_pull(skb, len))
1397 return -EINVAL;
1398 }
1399
1400 /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer
1401 * all-systems destination addresses (224.0.0.1) for general queries
1402 */
1403 if (!igmp_hdr(skb)->group &&
1404 ip_hdr(skb)->daddr != htonl(INADDR_ALLHOSTS_GROUP))
1405 return -EINVAL;
1406
1407 return 0;
1408}
1409
1410static int ip_mc_check_igmp_msg(struct sk_buff *skb)
1411{
1412 switch (igmp_hdr(skb)->type) {
1413 case IGMP_HOST_LEAVE_MESSAGE:
1414 case IGMP_HOST_MEMBERSHIP_REPORT:
1415 case IGMPV2_HOST_MEMBERSHIP_REPORT:
1416 /* fall through */
1417 return 0;
1418 case IGMPV3_HOST_MEMBERSHIP_REPORT:
1419 return ip_mc_check_igmp_reportv3(skb);
1420 case IGMP_HOST_MEMBERSHIP_QUERY:
1421 return ip_mc_check_igmp_query(skb);
1422 default:
1423 return -ENOMSG;
1424 }
1425}
1426
1427static inline __sum16 ip_mc_validate_checksum(struct sk_buff *skb)
1428{
1429 return skb_checksum_simple_validate(skb);
1430}
1431
1432static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed)
1433
1434{
1435 struct sk_buff *skb_chk;
1436 unsigned int transport_len;
1437 unsigned int len = skb_transport_offset(skb) + sizeof(struct igmphdr);
1438 int ret;
1439
1440 transport_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb);
1441
1442 skb_get(skb);
1443 skb_chk = skb_checksum_trimmed(skb, transport_len,
1444 ip_mc_validate_checksum);
1445 if (!skb_chk)
1446 return -EINVAL;
1447
1448 if (!pskb_may_pull(skb_chk, len)) {
1449 kfree_skb(skb_chk);
1450 return -EINVAL;
1451 }
1452
1453 ret = ip_mc_check_igmp_msg(skb_chk);
1454 if (ret) {
1455 kfree_skb(skb_chk);
1456 return ret;
1457 }
1458
1459 if (skb_trimmed)
1460 *skb_trimmed = skb_chk;
1461 else
1462 kfree_skb(skb_chk);
1463
1464 return 0;
1465}
1466
1467/**
1468 * ip_mc_check_igmp - checks whether this is a sane IGMP packet
1469 * @skb: the skb to validate
1470 * @skb_trimmed: to store an skb pointer trimmed to IPv4 packet tail (optional)
1471 *
1472 * Checks whether an IPv4 packet is a valid IGMP packet. If so sets
1473 * skb network and transport headers accordingly and returns zero.
1474 *
1475 * -EINVAL: A broken packet was detected, i.e. it violates some internet
1476 * standard
1477 * -ENOMSG: IP header validation succeeded but it is not an IGMP packet.
1478 * -ENOMEM: A memory allocation failure happened.
1479 *
1480 * Optionally, an skb pointer might be provided via skb_trimmed (or set it
1481 * to NULL): After parsing an IGMP packet successfully it will point to
1482 * an skb which has its tail aligned to the IP packet end. This might
1483 * either be the originally provided skb or a trimmed, cloned version if
1484 * the skb frame had data beyond the IP packet. A cloned skb allows us
1485 * to leave the original skb and its full frame unchanged (which might be
1486 * desirable for layer 2 frame jugglers).
1487 *
1488 * The caller needs to release a reference count from any returned skb_trimmed.
1489 */
1490int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed)
1491{
1492 int ret = ip_mc_check_iphdr(skb);
1493
1494 if (ret < 0)
1495 return ret;
1496
1497 if (ip_hdr(skb)->protocol != IPPROTO_IGMP)
1498 return -ENOMSG;
1499
1500 return __ip_mc_check_igmp(skb, skb_trimmed);
1501}
1502EXPORT_SYMBOL(ip_mc_check_igmp);
1503
1342/* 1504/*
1343 * Resend IGMP JOIN report; used by netdev notifier. 1505 * Resend IGMP JOIN report; used by netdev notifier.
1344 */ 1506 */
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8976ca423a07..60021d0d9326 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -99,6 +99,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
99 struct net *net = sock_net(sk); 99 struct net *net = sock_net(sk);
100 int smallest_size = -1, smallest_rover; 100 int smallest_size = -1, smallest_rover;
101 kuid_t uid = sock_i_uid(sk); 101 kuid_t uid = sock_i_uid(sk);
102 int attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
102 103
103 local_bh_disable(); 104 local_bh_disable();
104 if (!snum) { 105 if (!snum) {
@@ -106,6 +107,14 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
106 107
107again: 108again:
108 inet_get_local_port_range(net, &low, &high); 109 inet_get_local_port_range(net, &low, &high);
110 if (attempt_half) {
111 int half = low + ((high - low) >> 1);
112
113 if (attempt_half == 1)
114 high = half;
115 else
116 low = half;
117 }
109 remaining = (high - low) + 1; 118 remaining = (high - low) + 1;
110 smallest_rover = rover = prandom_u32() % remaining + low; 119 smallest_rover = rover = prandom_u32() % remaining + low;
111 120
@@ -127,11 +136,6 @@ again:
127 (tb->num_owners < smallest_size || smallest_size == -1)) { 136 (tb->num_owners < smallest_size || smallest_size == -1)) {
128 smallest_size = tb->num_owners; 137 smallest_size = tb->num_owners;
129 smallest_rover = rover; 138 smallest_rover = rover;
130 if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 &&
131 !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
132 snum = smallest_rover;
133 goto tb_found;
134 }
135 } 139 }
136 if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { 140 if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
137 snum = rover; 141 snum = rover;
@@ -159,6 +163,11 @@ again:
159 snum = smallest_rover; 163 snum = smallest_rover;
160 goto have_snum; 164 goto have_snum;
161 } 165 }
166 if (attempt_half == 1) {
167 /* OK we now try the upper half of the range */
168 attempt_half = 2;
169 goto again;
170 }
162 goto fail; 171 goto fail;
163 } 172 }
164 /* OK, here is the one we will use. HEAD is 173 /* OK, here is the one we will use. HEAD is
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 4d32262c7502..c3b1f3a0f4cf 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -151,6 +151,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
151 if (nla_put_u8(skb, INET_DIAG_TCLASS, 151 if (nla_put_u8(skb, INET_DIAG_TCLASS,
152 inet6_sk(sk)->tclass) < 0) 152 inet6_sk(sk)->tclass) < 0)
153 goto errout; 153 goto errout;
154
155 if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) &&
156 nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk)))
157 goto errout;
154 } 158 }
155#endif 159#endif
156 160
@@ -200,9 +204,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
200 } 204 }
201#undef EXPIRES_IN_MS 205#undef EXPIRES_IN_MS
202 206
203 if (ext & (1 << (INET_DIAG_INFO - 1))) { 207 if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
204 attr = nla_reserve(skb, INET_DIAG_INFO, 208 attr = nla_reserve(skb, INET_DIAG_INFO,
205 sizeof(struct tcp_info)); 209 handler->idiag_info_size);
206 if (!attr) 210 if (!attr)
207 goto errout; 211 goto errout;
208 212
@@ -746,7 +750,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
746 750
747 entry.family = sk->sk_family; 751 entry.family = sk->sk_family;
748 752
749 spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 753 spin_lock(&icsk->icsk_accept_queue.syn_wait_lock);
750 754
751 lopt = icsk->icsk_accept_queue.listen_opt; 755 lopt = icsk->icsk_accept_queue.listen_opt;
752 if (!lopt || !listen_sock_qlen(lopt)) 756 if (!lopt || !listen_sock_qlen(lopt))
@@ -794,7 +798,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
794 } 798 }
795 799
796out: 800out:
797 spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 801 spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
798 802
799 return err; 803 return err;
800} 804}
@@ -1078,14 +1082,62 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
1078 return inet_diag_get_exact(skb, h, nlmsg_data(h)); 1082 return inet_diag_get_exact(skb, h, nlmsg_data(h));
1079} 1083}
1080 1084
1085static
1086int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
1087{
1088 const struct inet_diag_handler *handler;
1089 struct nlmsghdr *nlh;
1090 struct nlattr *attr;
1091 struct inet_diag_msg *r;
1092 void *info = NULL;
1093 int err = 0;
1094
1095 nlh = nlmsg_put(skb, 0, 0, SOCK_DIAG_BY_FAMILY, sizeof(*r), 0);
1096 if (!nlh)
1097 return -ENOMEM;
1098
1099 r = nlmsg_data(nlh);
1100 memset(r, 0, sizeof(*r));
1101 inet_diag_msg_common_fill(r, sk);
1102 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_STREAM)
1103 r->id.idiag_sport = inet_sk(sk)->inet_sport;
1104 r->idiag_state = sk->sk_state;
1105
1106 if ((err = nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))) {
1107 nlmsg_cancel(skb, nlh);
1108 return err;
1109 }
1110
1111 handler = inet_diag_lock_handler(sk->sk_protocol);
1112 if (IS_ERR(handler)) {
1113 inet_diag_unlock_handler(handler);
1114 nlmsg_cancel(skb, nlh);
1115 return PTR_ERR(handler);
1116 }
1117
1118 attr = handler->idiag_info_size
1119 ? nla_reserve(skb, INET_DIAG_INFO, handler->idiag_info_size)
1120 : NULL;
1121 if (attr)
1122 info = nla_data(attr);
1123
1124 handler->idiag_get_info(sk, r, info);
1125 inet_diag_unlock_handler(handler);
1126
1127 nlmsg_end(skb, nlh);
1128 return 0;
1129}
1130
1081static const struct sock_diag_handler inet_diag_handler = { 1131static const struct sock_diag_handler inet_diag_handler = {
1082 .family = AF_INET, 1132 .family = AF_INET,
1083 .dump = inet_diag_handler_dump, 1133 .dump = inet_diag_handler_dump,
1134 .get_info = inet_diag_handler_get_info,
1084}; 1135};
1085 1136
1086static const struct sock_diag_handler inet6_diag_handler = { 1137static const struct sock_diag_handler inet6_diag_handler = {
1087 .family = AF_INET6, 1138 .family = AF_INET6,
1088 .dump = inet_diag_handler_dump, 1139 .dump = inet_diag_handler_dump,
1140 .get_info = inet_diag_handler_get_info,
1089}; 1141};
1090 1142
1091int inet_diag_register(const struct inet_diag_handler *h) 1143int inet_diag_register(const struct inet_diag_handler *h)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index c6fb80bd5826..5f9b063bbe8a 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -18,6 +18,7 @@
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/wait.h> 20#include <linux/wait.h>
21#include <linux/vmalloc.h>
21 22
22#include <net/inet_connection_sock.h> 23#include <net/inet_connection_sock.h>
23#include <net/inet_hashtables.h> 24#include <net/inet_hashtables.h>
@@ -90,10 +91,6 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
90void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, 91void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
91 const unsigned short snum) 92 const unsigned short snum)
92{ 93{
93 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
94
95 atomic_inc(&hashinfo->bsockets);
96
97 inet_sk(sk)->inet_num = snum; 94 inet_sk(sk)->inet_num = snum;
98 sk_add_bind_node(sk, &tb->owners); 95 sk_add_bind_node(sk, &tb->owners);
99 tb->num_owners++; 96 tb->num_owners++;
@@ -111,8 +108,6 @@ static void __inet_put_port(struct sock *sk)
111 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; 108 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
112 struct inet_bind_bucket *tb; 109 struct inet_bind_bucket *tb;
113 110
114 atomic_dec(&hashinfo->bsockets);
115
116 spin_lock(&head->lock); 111 spin_lock(&head->lock);
117 tb = inet_csk(sk)->icsk_bind_hash; 112 tb = inet_csk(sk)->icsk_bind_hash;
118 __sk_del_bind_node(sk); 113 __sk_del_bind_node(sk);
@@ -399,9 +394,10 @@ not_unique:
399 return -EADDRNOTAVAIL; 394 return -EADDRNOTAVAIL;
400} 395}
401 396
402static inline u32 inet_sk_port_offset(const struct sock *sk) 397static u32 inet_sk_port_offset(const struct sock *sk)
403{ 398{
404 const struct inet_sock *inet = inet_sk(sk); 399 const struct inet_sock *inet = inet_sk(sk);
400
405 return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr, 401 return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr,
406 inet->inet_daddr, 402 inet->inet_daddr,
407 inet->inet_dport); 403 inet->inet_dport);
@@ -507,8 +503,14 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
507 inet_get_local_port_range(net, &low, &high); 503 inet_get_local_port_range(net, &low, &high);
508 remaining = (high - low) + 1; 504 remaining = (high - low) + 1;
509 505
506 /* By starting with offset being an even number,
507 * we tend to leave about 50% of ports for other uses,
508 * like bind(0).
509 */
510 offset &= ~1;
511
510 local_bh_disable(); 512 local_bh_disable();
511 for (i = 1; i <= remaining; i++) { 513 for (i = 0; i < remaining; i++) {
512 port = low + (i + offset) % remaining; 514 port = low + (i + offset) % remaining;
513 if (inet_is_local_reserved_port(net, port)) 515 if (inet_is_local_reserved_port(net, port))
514 continue; 516 continue;
@@ -552,7 +554,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
552 return -EADDRNOTAVAIL; 554 return -EADDRNOTAVAIL;
553 555
554ok: 556ok:
555 hint += i; 557 hint += (i + 2) & ~1;
556 558
557 /* Head lock still held and bh's disabled */ 559 /* Head lock still held and bh's disabled */
558 inet_bind_hash(sk, tb, port); 560 inet_bind_hash(sk, tb, port);
@@ -599,7 +601,11 @@ out:
599int inet_hash_connect(struct inet_timewait_death_row *death_row, 601int inet_hash_connect(struct inet_timewait_death_row *death_row,
600 struct sock *sk) 602 struct sock *sk)
601{ 603{
602 return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk), 604 u32 port_offset = 0;
605
606 if (!inet_sk(sk)->inet_num)
607 port_offset = inet_sk_port_offset(sk);
608 return __inet_hash_connect(death_row, sk, port_offset,
603 __inet_check_established); 609 __inet_check_established);
604} 610}
605EXPORT_SYMBOL_GPL(inet_hash_connect); 611EXPORT_SYMBOL_GPL(inet_hash_connect);
@@ -608,7 +614,6 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
608{ 614{
609 int i; 615 int i;
610 616
611 atomic_set(&h->bsockets, 0);
612 for (i = 0; i < INET_LHTABLE_SIZE; i++) { 617 for (i = 0; i < INET_LHTABLE_SIZE; i++) {
613 spin_lock_init(&h->listening_hash[i].lock); 618 spin_lock_init(&h->listening_hash[i].lock);
614 INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, 619 INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head,
@@ -616,3 +621,33 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
616 } 621 }
617} 622}
618EXPORT_SYMBOL_GPL(inet_hashinfo_init); 623EXPORT_SYMBOL_GPL(inet_hashinfo_init);
624
625int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
626{
627 unsigned int i, nblocks = 1;
628
629 if (sizeof(spinlock_t) != 0) {
630 /* allocate 2 cache lines or at least one spinlock per cpu */
631 nblocks = max_t(unsigned int,
632 2 * L1_CACHE_BYTES / sizeof(spinlock_t),
633 1);
634 nblocks = roundup_pow_of_two(nblocks * num_possible_cpus());
635
636 /* no more locks than number of hash buckets */
637 nblocks = min(nblocks, hashinfo->ehash_mask + 1);
638
639 hashinfo->ehash_locks = kmalloc_array(nblocks, sizeof(spinlock_t),
640 GFP_KERNEL | __GFP_NOWARN);
641 if (!hashinfo->ehash_locks)
642 hashinfo->ehash_locks = vmalloc(nblocks * sizeof(spinlock_t));
643
644 if (!hashinfo->ehash_locks)
645 return -ENOMEM;
646
647 for (i = 0; i < nblocks; i++)
648 spin_lock_init(&hashinfo->ehash_locks[i]);
649 }
650 hashinfo->ehash_locks_mask = nblocks - 1;
651 return 0;
652}
653EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 00ec8d5d7e7e..2ffbd16b79e0 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -170,7 +170,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
170} 170}
171EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); 171EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
172 172
173void tw_timer_handler(unsigned long data) 173static void tw_timer_handler(unsigned long data)
174{ 174{
175 struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data; 175 struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data;
176 176
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 3674484946a5..2d3aa408fbdc 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -39,17 +39,21 @@
39#include <net/route.h> 39#include <net/route.h>
40#include <net/xfrm.h> 40#include <net/xfrm.h>
41 41
42static bool ip_may_fragment(const struct sk_buff *skb)
43{
44 return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
45 skb->ignore_df;
46}
47
48static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) 42static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
49{ 43{
50 if (skb->len <= mtu) 44 if (skb->len <= mtu)
51 return false; 45 return false;
52 46
47 if (unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0))
48 return false;
49
50 /* original fragment exceeds mtu and DF is set */
51 if (unlikely(IPCB(skb)->frag_max_size > mtu))
52 return true;
53
54 if (skb->ignore_df)
55 return false;
56
53 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) 57 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
54 return false; 58 return false;
55 59
@@ -114,7 +118,7 @@ int ip_forward(struct sk_buff *skb)
114 118
115 IPCB(skb)->flags |= IPSKB_FORWARDED; 119 IPCB(skb)->flags |= IPSKB_FORWARDED;
116 mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); 120 mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
117 if (!ip_may_fragment(skb) && ip_exceeds_mtu(skb, mtu)) { 121 if (ip_exceeds_mtu(skb, mtu)) {
118 IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); 122 IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
119 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 123 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
120 htonl(mtu)); 124 htonl(mtu));
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cc1da6d9cb35..a50dc6d408d1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -75,6 +75,7 @@ struct ipq {
75 __be16 id; 75 __be16 id;
76 u8 protocol; 76 u8 protocol;
77 u8 ecn; /* RFC3168 support */ 77 u8 ecn; /* RFC3168 support */
78 u16 max_df_size; /* largest frag with DF set seen */
78 int iif; 79 int iif;
79 unsigned int rid; 80 unsigned int rid;
80 struct inet_peer *peer; 81 struct inet_peer *peer;
@@ -173,6 +174,15 @@ static void ipq_kill(struct ipq *ipq)
173 inet_frag_kill(&ipq->q, &ip4_frags); 174 inet_frag_kill(&ipq->q, &ip4_frags);
174} 175}
175 176
177static bool frag_expire_skip_icmp(u32 user)
178{
179 return user == IP_DEFRAG_AF_PACKET ||
180 ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_IN,
181 __IP_DEFRAG_CONNTRACK_IN_END) ||
182 ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_BRIDGE_IN,
183 __IP_DEFRAG_CONNTRACK_BRIDGE_IN);
184}
185
176/* 186/*
177 * Oops, a fragment queue timed out. Kill it and send an ICMP reply. 187 * Oops, a fragment queue timed out. Kill it and send an ICMP reply.
178 */ 188 */
@@ -217,10 +227,8 @@ static void ip_expire(unsigned long arg)
217 /* Only an end host needs to send an ICMP 227 /* Only an end host needs to send an ICMP
218 * "Fragment Reassembly Timeout" message, per RFC792. 228 * "Fragment Reassembly Timeout" message, per RFC792.
219 */ 229 */
220 if (qp->user == IP_DEFRAG_AF_PACKET || 230 if (frag_expire_skip_icmp(qp->user) &&
221 ((qp->user >= IP_DEFRAG_CONNTRACK_IN) && 231 (skb_rtable(head)->rt_type != RTN_LOCAL))
222 (qp->user <= __IP_DEFRAG_CONNTRACK_IN_END) &&
223 (skb_rtable(head)->rt_type != RTN_LOCAL)))
224 goto out_rcu_unlock; 232 goto out_rcu_unlock;
225 233
226 /* Send an ICMP "Fragment Reassembly Timeout" message. */ 234 /* Send an ICMP "Fragment Reassembly Timeout" message. */
@@ -319,6 +327,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
319{ 327{
320 struct sk_buff *prev, *next; 328 struct sk_buff *prev, *next;
321 struct net_device *dev; 329 struct net_device *dev;
330 unsigned int fragsize;
322 int flags, offset; 331 int flags, offset;
323 int ihl, end; 332 int ihl, end;
324 int err = -ENOENT; 333 int err = -ENOENT;
@@ -474,9 +483,14 @@ found:
474 if (offset == 0) 483 if (offset == 0)
475 qp->q.flags |= INET_FRAG_FIRST_IN; 484 qp->q.flags |= INET_FRAG_FIRST_IN;
476 485
486 fragsize = skb->len + ihl;
487
488 if (fragsize > qp->q.max_size)
489 qp->q.max_size = fragsize;
490
477 if (ip_hdr(skb)->frag_off & htons(IP_DF) && 491 if (ip_hdr(skb)->frag_off & htons(IP_DF) &&
478 skb->len + ihl > qp->q.max_size) 492 fragsize > qp->max_df_size)
479 qp->q.max_size = skb->len + ihl; 493 qp->max_df_size = fragsize;
480 494
481 if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && 495 if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
482 qp->q.meat == qp->q.len) { 496 qp->q.meat == qp->q.len) {
@@ -606,13 +620,27 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
606 head->next = NULL; 620 head->next = NULL;
607 head->dev = dev; 621 head->dev = dev;
608 head->tstamp = qp->q.stamp; 622 head->tstamp = qp->q.stamp;
609 IPCB(head)->frag_max_size = qp->q.max_size; 623 IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
610 624
611 iph = ip_hdr(head); 625 iph = ip_hdr(head);
612 /* max_size != 0 implies at least one fragment had IP_DF set */
613 iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0;
614 iph->tot_len = htons(len); 626 iph->tot_len = htons(len);
615 iph->tos |= ecn; 627 iph->tos |= ecn;
628
629 /* When we set IP_DF on a refragmented skb we must also force a
630 * call to ip_fragment to avoid forwarding a DF-skb of size s while
631 * original sender only sent fragments of size f (where f < s).
632 *
633 * We only set DF/IPSKB_FRAG_PMTU if such DF fragment was the largest
634 * frag seen to avoid sending tiny DF-fragments in case skb was built
635 * from one very small df-fragment and one large non-df frag.
636 */
637 if (qp->max_df_size == qp->q.max_size) {
638 IPCB(head)->flags |= IPSKB_FRAG_PMTU;
639 iph->frag_off = htons(IP_DF);
640 } else {
641 iph->frag_off = 0;
642 }
643
616 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); 644 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
617 qp->q.fragments = NULL; 645 qp->q.fragments = NULL;
618 qp->q.fragments_tail = NULL; 646 qp->q.fragments_tail = NULL;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c65b93a7b711..6bf89a6312bc 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -83,6 +83,10 @@
83int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; 83int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
84EXPORT_SYMBOL(sysctl_ip_default_ttl); 84EXPORT_SYMBOL(sysctl_ip_default_ttl);
85 85
86static int ip_fragment(struct sock *sk, struct sk_buff *skb,
87 unsigned int mtu,
88 int (*output)(struct sock *, struct sk_buff *));
89
86/* Generate a checksum for an outgoing IP datagram. */ 90/* Generate a checksum for an outgoing IP datagram. */
87void ip_send_check(struct iphdr *iph) 91void ip_send_check(struct iphdr *iph)
88{ 92{
@@ -91,7 +95,7 @@ void ip_send_check(struct iphdr *iph)
91} 95}
92EXPORT_SYMBOL(ip_send_check); 96EXPORT_SYMBOL(ip_send_check);
93 97
94int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb) 98static int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
95{ 99{
96 struct iphdr *iph = ip_hdr(skb); 100 struct iphdr *iph = ip_hdr(skb);
97 101
@@ -168,7 +172,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
168} 172}
169EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); 173EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
170 174
171static inline int ip_finish_output2(struct sock *sk, struct sk_buff *skb) 175static int ip_finish_output2(struct sock *sk, struct sk_buff *skb)
172{ 176{
173 struct dst_entry *dst = skb_dst(skb); 177 struct dst_entry *dst = skb_dst(skb);
174 struct rtable *rt = (struct rtable *)dst; 178 struct rtable *rt = (struct rtable *)dst;
@@ -216,7 +220,8 @@ static inline int ip_finish_output2(struct sock *sk, struct sk_buff *skb)
216 return -EINVAL; 220 return -EINVAL;
217} 221}
218 222
219static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb) 223static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb,
224 unsigned int mtu)
220{ 225{
221 netdev_features_t features; 226 netdev_features_t features;
222 struct sk_buff *segs; 227 struct sk_buff *segs;
@@ -224,7 +229,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb)
224 229
225 /* common case: locally created skb or seglen is <= mtu */ 230 /* common case: locally created skb or seglen is <= mtu */
226 if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) || 231 if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
227 skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb)) 232 skb_gso_network_seglen(skb) <= mtu)
228 return ip_finish_output2(sk, skb); 233 return ip_finish_output2(sk, skb);
229 234
230 /* Slowpath - GSO segment length is exceeding the dst MTU. 235 /* Slowpath - GSO segment length is exceeding the dst MTU.
@@ -248,7 +253,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb)
248 int err; 253 int err;
249 254
250 segs->next = NULL; 255 segs->next = NULL;
251 err = ip_fragment(sk, segs, ip_finish_output2); 256 err = ip_fragment(sk, segs, mtu, ip_finish_output2);
252 257
253 if (err && ret == 0) 258 if (err && ret == 0)
254 ret = err; 259 ret = err;
@@ -260,6 +265,8 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb)
260 265
261static int ip_finish_output(struct sock *sk, struct sk_buff *skb) 266static int ip_finish_output(struct sock *sk, struct sk_buff *skb)
262{ 267{
268 unsigned int mtu;
269
263#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 270#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
264 /* Policy lookup after SNAT yielded a new policy */ 271 /* Policy lookup after SNAT yielded a new policy */
265 if (skb_dst(skb)->xfrm) { 272 if (skb_dst(skb)->xfrm) {
@@ -267,11 +274,12 @@ static int ip_finish_output(struct sock *sk, struct sk_buff *skb)
267 return dst_output_sk(sk, skb); 274 return dst_output_sk(sk, skb);
268 } 275 }
269#endif 276#endif
277 mtu = ip_skb_dst_mtu(skb);
270 if (skb_is_gso(skb)) 278 if (skb_is_gso(skb))
271 return ip_finish_output_gso(sk, skb); 279 return ip_finish_output_gso(sk, skb, mtu);
272 280
273 if (skb->len > ip_skb_dst_mtu(skb)) 281 if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
274 return ip_fragment(sk, skb, ip_finish_output2); 282 return ip_fragment(sk, skb, mtu, ip_finish_output2);
275 283
276 return ip_finish_output2(sk, skb); 284 return ip_finish_output2(sk, skb);
277} 285}
@@ -478,6 +486,31 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
478 skb_copy_secmark(to, from); 486 skb_copy_secmark(to, from);
479} 487}
480 488
489static int ip_fragment(struct sock *sk, struct sk_buff *skb,
490 unsigned int mtu,
491 int (*output)(struct sock *, struct sk_buff *))
492{
493 struct iphdr *iph = ip_hdr(skb);
494
495 if ((iph->frag_off & htons(IP_DF)) == 0)
496 return ip_do_fragment(sk, skb, output);
497
498 if (unlikely(!skb->ignore_df ||
499 (IPCB(skb)->frag_max_size &&
500 IPCB(skb)->frag_max_size > mtu))) {
501 struct rtable *rt = skb_rtable(skb);
502 struct net_device *dev = rt->dst.dev;
503
504 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
505 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
506 htonl(mtu));
507 kfree_skb(skb);
508 return -EMSGSIZE;
509 }
510
511 return ip_do_fragment(sk, skb, output);
512}
513
481/* 514/*
482 * This IP datagram is too large to be sent in one piece. Break it up into 515 * This IP datagram is too large to be sent in one piece. Break it up into
483 * smaller pieces (each of size equal to IP header plus 516 * smaller pieces (each of size equal to IP header plus
@@ -485,8 +518,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
485 * single device frame, and queue such a frame for sending. 518 * single device frame, and queue such a frame for sending.
486 */ 519 */
487 520
488int ip_fragment(struct sock *sk, struct sk_buff *skb, 521int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
489 int (*output)(struct sock *, struct sk_buff *)) 522 int (*output)(struct sock *, struct sk_buff *))
490{ 523{
491 struct iphdr *iph; 524 struct iphdr *iph;
492 int ptr; 525 int ptr;
@@ -507,15 +540,8 @@ int ip_fragment(struct sock *sk, struct sk_buff *skb,
507 iph = ip_hdr(skb); 540 iph = ip_hdr(skb);
508 541
509 mtu = ip_skb_dst_mtu(skb); 542 mtu = ip_skb_dst_mtu(skb);
510 if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || 543 if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu)
511 (IPCB(skb)->frag_max_size && 544 mtu = IPCB(skb)->frag_max_size;
512 IPCB(skb)->frag_max_size > mtu))) {
513 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
514 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
515 htonl(mtu));
516 kfree_skb(skb);
517 return -EMSGSIZE;
518 }
519 545
520 /* 546 /*
521 * Setup starting values. 547 * Setup starting values.
@@ -523,10 +549,6 @@ int ip_fragment(struct sock *sk, struct sk_buff *skb,
523 549
524 hlen = iph->ihl * 4; 550 hlen = iph->ihl * 4;
525 mtu = mtu - hlen; /* Size of data space */ 551 mtu = mtu - hlen; /* Size of data space */
526#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
527 if (skb->nf_bridge)
528 mtu -= nf_bridge_mtu_reduction(skb);
529#endif
530 IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; 552 IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
531 553
532 /* When frag_list is given, use it. First, check its validity: 554 /* When frag_list is given, use it. First, check its validity:
@@ -711,6 +733,9 @@ slow_path:
711 iph = ip_hdr(skb2); 733 iph = ip_hdr(skb2);
712 iph->frag_off = htons((offset >> 3)); 734 iph->frag_off = htons((offset >> 3));
713 735
736 if (IPCB(skb)->flags & IPSKB_FRAG_PMTU)
737 iph->frag_off |= htons(IP_DF);
738
714 /* ANK: dirty, but effective trick. Upgrade options only if 739 /* ANK: dirty, but effective trick. Upgrade options only if
715 * the segment to be fragmented was THE FIRST (otherwise, 740 * the segment to be fragmented was THE FIRST (otherwise,
716 * options are already fixed) and make it ONCE 741 * options are already fixed) and make it ONCE
@@ -751,7 +776,7 @@ fail:
751 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 776 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
752 return err; 777 return err;
753} 778}
754EXPORT_SYMBOL(ip_fragment); 779EXPORT_SYMBOL(ip_do_fragment);
755 780
756int 781int
757ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) 782ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
@@ -1217,11 +1242,9 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
1217 } 1242 }
1218 1243
1219 while (size > 0) { 1244 while (size > 0) {
1220 int i; 1245 if (skb_is_gso(skb)) {
1221
1222 if (skb_is_gso(skb))
1223 len = size; 1246 len = size;
1224 else { 1247 } else {
1225 1248
1226 /* Check if the remaining data fits into current packet. */ 1249 /* Check if the remaining data fits into current packet. */
1227 len = mtu - skb->len; 1250 len = mtu - skb->len;
@@ -1273,15 +1296,10 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
1273 continue; 1296 continue;
1274 } 1297 }
1275 1298
1276 i = skb_shinfo(skb)->nr_frags;
1277 if (len > size) 1299 if (len > size)
1278 len = size; 1300 len = size;
1279 if (skb_can_coalesce(skb, i, page, offset)) { 1301
1280 skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len); 1302 if (skb_append_pagefrags(skb, page, offset, len)) {
1281 } else if (i < MAX_SKB_FRAGS) {
1282 get_page(page);
1283 skb_fill_page_desc(skb, i, page, offset, len);
1284 } else {
1285 err = -EMSGSIZE; 1303 err = -EMSGSIZE;
1286 goto error; 1304 goto error;
1287 } 1305 }
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 7cfb0893f263..c3c359ad66e3 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -432,6 +432,15 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
432 kfree_skb(skb); 432 kfree_skb(skb);
433} 433}
434 434
435/* For some errors we have valid addr_offset even with zero payload and
436 * zero port. Also, addr_offset should be supported if port is set.
437 */
438static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr)
439{
440 return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
441 serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port;
442}
443
435/* IPv4 supports cmsg on all imcp errors and some timestamps 444/* IPv4 supports cmsg on all imcp errors and some timestamps
436 * 445 *
437 * Timestamp code paths do not initialize the fields expected by cmsg: 446 * Timestamp code paths do not initialize the fields expected by cmsg:
@@ -498,7 +507,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
498 507
499 serr = SKB_EXT_ERR(skb); 508 serr = SKB_EXT_ERR(skb);
500 509
501 if (sin && serr->port) { 510 if (sin && ipv4_datagram_support_addr(serr)) {
502 sin->sin_family = AF_INET; 511 sin->sin_family = AF_INET;
503 sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + 512 sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
504 serr->addr_offset); 513 serr->addr_offset);
@@ -582,6 +591,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
582 case IP_TRANSPARENT: 591 case IP_TRANSPARENT:
583 case IP_MINTTL: 592 case IP_MINTTL:
584 case IP_NODEFRAG: 593 case IP_NODEFRAG:
594 case IP_BIND_ADDRESS_NO_PORT:
585 case IP_UNICAST_IF: 595 case IP_UNICAST_IF:
586 case IP_MULTICAST_TTL: 596 case IP_MULTICAST_TTL:
587 case IP_MULTICAST_ALL: 597 case IP_MULTICAST_ALL:
@@ -732,6 +742,9 @@ static int do_ip_setsockopt(struct sock *sk, int level,
732 } 742 }
733 inet->nodefrag = val ? 1 : 0; 743 inet->nodefrag = val ? 1 : 0;
734 break; 744 break;
745 case IP_BIND_ADDRESS_NO_PORT:
746 inet->bind_address_no_port = val ? 1 : 0;
747 break;
735 case IP_MTU_DISCOVER: 748 case IP_MTU_DISCOVER:
736 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) 749 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
737 goto e_inval; 750 goto e_inval;
@@ -1324,6 +1337,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1324 case IP_NODEFRAG: 1337 case IP_NODEFRAG:
1325 val = inet->nodefrag; 1338 val = inet->nodefrag;
1326 break; 1339 break;
1340 case IP_BIND_ADDRESS_NO_PORT:
1341 val = inet->bind_address_no_port;
1342 break;
1327 case IP_MTU_DISCOVER: 1343 case IP_MTU_DISCOVER:
1328 val = inet->pmtudisc; 1344 val = inet->pmtudisc;
1329 break; 1345 break;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 4c2c3ba4ba65..626d9e56a6bd 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -586,7 +586,8 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
586EXPORT_SYMBOL(ip_tunnel_encap); 586EXPORT_SYMBOL(ip_tunnel_encap);
587 587
588static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, 588static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
589 struct rtable *rt, __be16 df) 589 struct rtable *rt, __be16 df,
590 const struct iphdr *inner_iph)
590{ 591{
591 struct ip_tunnel *tunnel = netdev_priv(dev); 592 struct ip_tunnel *tunnel = netdev_priv(dev);
592 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; 593 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
@@ -603,7 +604,8 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
603 604
604 if (skb->protocol == htons(ETH_P_IP)) { 605 if (skb->protocol == htons(ETH_P_IP)) {
605 if (!skb_is_gso(skb) && 606 if (!skb_is_gso(skb) &&
606 (df & htons(IP_DF)) && mtu < pkt_size) { 607 (inner_iph->frag_off & htons(IP_DF)) &&
608 mtu < pkt_size) {
607 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 609 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
608 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 610 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
609 return -E2BIG; 611 return -E2BIG;
@@ -737,7 +739,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
737 goto tx_error; 739 goto tx_error;
738 } 740 }
739 741
740 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { 742 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
741 ip_rt_put(rt); 743 ip_rt_put(rt);
742 goto tx_error; 744 goto tx_error;
743 } 745 }
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index ce63ab21b6cd..6a51a71a6c67 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -98,7 +98,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
98 return -ENOMEM; 98 return -ENOMEM;
99 99
100 eh = (struct ethhdr *)skb->data; 100 eh = (struct ethhdr *)skb->data;
101 if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) 101 if (likely(eth_proto_is_802_3(eh->h_proto)))
102 skb->protocol = eh->h_proto; 102 skb->protocol = eh->h_proto;
103 else 103 else
104 skb->protocol = htons(ETH_P_802_2); 104 skb->protocol = htons(ETH_P_802_2);
@@ -165,6 +165,8 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
165{ 165{
166 int i; 166 int i;
167 167
168 netdev_stats_to_stats64(tot, &dev->stats);
169
168 for_each_possible_cpu(i) { 170 for_each_possible_cpu(i) {
169 const struct pcpu_sw_netstats *tstats = 171 const struct pcpu_sw_netstats *tstats =
170 per_cpu_ptr(dev->tstats, i); 172 per_cpu_ptr(dev->tstats, i);
@@ -185,22 +187,6 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
185 tot->tx_bytes += tx_bytes; 187 tot->tx_bytes += tx_bytes;
186 } 188 }
187 189
188 tot->multicast = dev->stats.multicast;
189
190 tot->rx_crc_errors = dev->stats.rx_crc_errors;
191 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
192 tot->rx_length_errors = dev->stats.rx_length_errors;
193 tot->rx_frame_errors = dev->stats.rx_frame_errors;
194 tot->rx_errors = dev->stats.rx_errors;
195
196 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
197 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
198 tot->tx_dropped = dev->stats.tx_dropped;
199 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
200 tot->tx_errors = dev->stats.tx_errors;
201
202 tot->collisions = dev->stats.collisions;
203
204 return tot; 190 return tot;
205} 191}
206EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); 192EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ff96396ebec5..254238daf58b 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -251,7 +251,8 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
251 return -EINVAL; 251 return -EINVAL;
252 } 252 }
253 253
254 p.i_key = p.o_key = p.i_flags = p.o_flags = 0; 254 p.i_key = p.o_key = 0;
255 p.i_flags = p.o_flags = 0;
255 if (p.iph.ttl) 256 if (p.iph.ttl)
256 p.iph.frag_off |= htons(IP_DF); 257 p.iph.frag_off |= htons(IP_DF);
257 258
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 65de0684e22a..61eafc9b4545 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -197,11 +197,4 @@ static int __init ipv4_netfilter_init(void)
197{ 197{
198 return nf_register_afinfo(&nf_ip_afinfo); 198 return nf_register_afinfo(&nf_ip_afinfo);
199} 199}
200 200subsys_initcall(ipv4_netfilter_init);
201static void __exit ipv4_netfilter_fini(void)
202{
203 nf_unregister_afinfo(&nf_ip_afinfo);
204}
205
206module_init(ipv4_netfilter_init);
207module_exit(ipv4_netfilter_fini);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index fb20f363151f..2199a5db25e6 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -195,7 +195,8 @@ config IP_NF_MATCH_ECN
195 195
196config IP_NF_MATCH_RPFILTER 196config IP_NF_MATCH_RPFILTER
197 tristate '"rpfilter" reverse path filter match support' 197 tristate '"rpfilter" reverse path filter match support'
198 depends on NETFILTER_ADVANCED && (IP_NF_MANGLE || IP_NF_RAW) 198 depends on NETFILTER_ADVANCED
199 depends on IP_NF_MANGLE || IP_NF_RAW
199 ---help--- 200 ---help---
200 This option allows you to match packets whose replies would 201 This option allows you to match packets whose replies would
201 go out via the interface the packet came in. 202 go out via the interface the packet came in.
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index a61200754f4b..92305a1a021a 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -254,9 +254,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
254 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 254 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
255 unsigned int verdict = NF_DROP; 255 unsigned int verdict = NF_DROP;
256 const struct arphdr *arp; 256 const struct arphdr *arp;
257 struct arpt_entry *e, *back; 257 struct arpt_entry *e, **jumpstack;
258 const char *indev, *outdev; 258 const char *indev, *outdev;
259 void *table_base; 259 const void *table_base;
260 unsigned int cpu, stackidx = 0;
260 const struct xt_table_info *private; 261 const struct xt_table_info *private;
261 struct xt_action_param acpar; 262 struct xt_action_param acpar;
262 unsigned int addend; 263 unsigned int addend;
@@ -270,15 +271,16 @@ unsigned int arpt_do_table(struct sk_buff *skb,
270 local_bh_disable(); 271 local_bh_disable();
271 addend = xt_write_recseq_begin(); 272 addend = xt_write_recseq_begin();
272 private = table->private; 273 private = table->private;
274 cpu = smp_processor_id();
273 /* 275 /*
274 * Ensure we load private-> members after we've fetched the base 276 * Ensure we load private-> members after we've fetched the base
275 * pointer. 277 * pointer.
276 */ 278 */
277 smp_read_barrier_depends(); 279 smp_read_barrier_depends();
278 table_base = private->entries[smp_processor_id()]; 280 table_base = private->entries;
281 jumpstack = (struct arpt_entry **)private->jumpstack[cpu];
279 282
280 e = get_entry(table_base, private->hook_entry[hook]); 283 e = get_entry(table_base, private->hook_entry[hook]);
281 back = get_entry(table_base, private->underflow[hook]);
282 284
283 acpar.in = state->in; 285 acpar.in = state->in;
284 acpar.out = state->out; 286 acpar.out = state->out;
@@ -289,13 +291,15 @@ unsigned int arpt_do_table(struct sk_buff *skb,
289 arp = arp_hdr(skb); 291 arp = arp_hdr(skb);
290 do { 292 do {
291 const struct xt_entry_target *t; 293 const struct xt_entry_target *t;
294 struct xt_counters *counter;
292 295
293 if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { 296 if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
294 e = arpt_next_entry(e); 297 e = arpt_next_entry(e);
295 continue; 298 continue;
296 } 299 }
297 300
298 ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1); 301 counter = xt_get_this_cpu_counter(&e->counters);
302 ADD_COUNTER(*counter, arp_hdr_len(skb->dev), 1);
299 303
300 t = arpt_get_target_c(e); 304 t = arpt_get_target_c(e);
301 305
@@ -310,18 +314,23 @@ unsigned int arpt_do_table(struct sk_buff *skb,
310 verdict = (unsigned int)(-v) - 1; 314 verdict = (unsigned int)(-v) - 1;
311 break; 315 break;
312 } 316 }
313 e = back; 317 if (stackidx == 0) {
314 back = get_entry(table_base, back->comefrom); 318 e = get_entry(table_base,
319 private->underflow[hook]);
320 } else {
321 e = jumpstack[--stackidx];
322 e = arpt_next_entry(e);
323 }
315 continue; 324 continue;
316 } 325 }
317 if (table_base + v 326 if (table_base + v
318 != arpt_next_entry(e)) { 327 != arpt_next_entry(e)) {
319 /* Save old back ptr in next entry */
320 struct arpt_entry *next = arpt_next_entry(e);
321 next->comefrom = (void *)back - table_base;
322 328
323 /* set back pointer to next entry */ 329 if (stackidx >= private->stacksize) {
324 back = next; 330 verdict = NF_DROP;
331 break;
332 }
333 jumpstack[stackidx++] = e;
325 } 334 }
326 335
327 e = get_entry(table_base, v); 336 e = get_entry(table_base, v);
@@ -521,6 +530,10 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
521 if (ret) 530 if (ret)
522 return ret; 531 return ret;
523 532
533 e->counters.pcnt = xt_percpu_counter_alloc();
534 if (IS_ERR_VALUE(e->counters.pcnt))
535 return -ENOMEM;
536
524 t = arpt_get_target(e); 537 t = arpt_get_target(e);
525 target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, 538 target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
526 t->u.user.revision); 539 t->u.user.revision);
@@ -538,6 +551,8 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
538err: 551err:
539 module_put(t->u.kernel.target->me); 552 module_put(t->u.kernel.target->me);
540out: 553out:
554 xt_percpu_counter_free(e->counters.pcnt);
555
541 return ret; 556 return ret;
542} 557}
543 558
@@ -614,6 +629,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
614 if (par.target->destroy != NULL) 629 if (par.target->destroy != NULL)
615 par.target->destroy(&par); 630 par.target->destroy(&par);
616 module_put(par.target->me); 631 module_put(par.target->me);
632 xt_percpu_counter_free(e->counters.pcnt);
617} 633}
618 634
619/* Checks and translates the user-supplied table segment (held in 635/* Checks and translates the user-supplied table segment (held in
@@ -702,12 +718,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
702 return ret; 718 return ret;
703 } 719 }
704 720
705 /* And one copy for every other CPU */
706 for_each_possible_cpu(i) {
707 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
708 memcpy(newinfo->entries[i], entry0, newinfo->size);
709 }
710
711 return ret; 721 return ret;
712} 722}
713 723
@@ -722,14 +732,16 @@ static void get_counters(const struct xt_table_info *t,
722 seqcount_t *s = &per_cpu(xt_recseq, cpu); 732 seqcount_t *s = &per_cpu(xt_recseq, cpu);
723 733
724 i = 0; 734 i = 0;
725 xt_entry_foreach(iter, t->entries[cpu], t->size) { 735 xt_entry_foreach(iter, t->entries, t->size) {
736 struct xt_counters *tmp;
726 u64 bcnt, pcnt; 737 u64 bcnt, pcnt;
727 unsigned int start; 738 unsigned int start;
728 739
740 tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
729 do { 741 do {
730 start = read_seqcount_begin(s); 742 start = read_seqcount_begin(s);
731 bcnt = iter->counters.bcnt; 743 bcnt = tmp->bcnt;
732 pcnt = iter->counters.pcnt; 744 pcnt = tmp->pcnt;
733 } while (read_seqcount_retry(s, start)); 745 } while (read_seqcount_retry(s, start));
734 746
735 ADD_COUNTER(counters[i], bcnt, pcnt); 747 ADD_COUNTER(counters[i], bcnt, pcnt);
@@ -774,7 +786,7 @@ static int copy_entries_to_user(unsigned int total_size,
774 if (IS_ERR(counters)) 786 if (IS_ERR(counters))
775 return PTR_ERR(counters); 787 return PTR_ERR(counters);
776 788
777 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 789 loc_cpu_entry = private->entries;
778 /* ... then copy entire thing ... */ 790 /* ... then copy entire thing ... */
779 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { 791 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
780 ret = -EFAULT; 792 ret = -EFAULT;
@@ -863,16 +875,16 @@ static int compat_table_info(const struct xt_table_info *info,
863 struct xt_table_info *newinfo) 875 struct xt_table_info *newinfo)
864{ 876{
865 struct arpt_entry *iter; 877 struct arpt_entry *iter;
866 void *loc_cpu_entry; 878 const void *loc_cpu_entry;
867 int ret; 879 int ret;
868 880
869 if (!newinfo || !info) 881 if (!newinfo || !info)
870 return -EINVAL; 882 return -EINVAL;
871 883
872 /* we dont care about newinfo->entries[] */ 884 /* we dont care about newinfo->entries */
873 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 885 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
874 newinfo->initial_entries = 0; 886 newinfo->initial_entries = 0;
875 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 887 loc_cpu_entry = info->entries;
876 xt_compat_init_offsets(NFPROTO_ARP, info->number); 888 xt_compat_init_offsets(NFPROTO_ARP, info->number);
877 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 889 xt_entry_foreach(iter, loc_cpu_entry, info->size) {
878 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 890 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
@@ -1037,7 +1049,7 @@ static int __do_replace(struct net *net, const char *name,
1037 get_counters(oldinfo, counters); 1049 get_counters(oldinfo, counters);
1038 1050
1039 /* Decrease module usage counts and free resource */ 1051 /* Decrease module usage counts and free resource */
1040 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; 1052 loc_cpu_old_entry = oldinfo->entries;
1041 xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) 1053 xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
1042 cleanup_entry(iter); 1054 cleanup_entry(iter);
1043 1055
@@ -1084,8 +1096,7 @@ static int do_replace(struct net *net, const void __user *user,
1084 if (!newinfo) 1096 if (!newinfo)
1085 return -ENOMEM; 1097 return -ENOMEM;
1086 1098
1087 /* choose the copy that is on our node/cpu */ 1099 loc_cpu_entry = newinfo->entries;
1088 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1089 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), 1100 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1090 tmp.size) != 0) { 1101 tmp.size) != 0) {
1091 ret = -EFAULT; 1102 ret = -EFAULT;
@@ -1115,7 +1126,7 @@ static int do_replace(struct net *net, const void __user *user,
1115static int do_add_counters(struct net *net, const void __user *user, 1126static int do_add_counters(struct net *net, const void __user *user,
1116 unsigned int len, int compat) 1127 unsigned int len, int compat)
1117{ 1128{
1118 unsigned int i, curcpu; 1129 unsigned int i;
1119 struct xt_counters_info tmp; 1130 struct xt_counters_info tmp;
1120 struct xt_counters *paddc; 1131 struct xt_counters *paddc;
1121 unsigned int num_counters; 1132 unsigned int num_counters;
@@ -1125,7 +1136,6 @@ static int do_add_counters(struct net *net, const void __user *user,
1125 struct xt_table *t; 1136 struct xt_table *t;
1126 const struct xt_table_info *private; 1137 const struct xt_table_info *private;
1127 int ret = 0; 1138 int ret = 0;
1128 void *loc_cpu_entry;
1129 struct arpt_entry *iter; 1139 struct arpt_entry *iter;
1130 unsigned int addend; 1140 unsigned int addend;
1131#ifdef CONFIG_COMPAT 1141#ifdef CONFIG_COMPAT
@@ -1181,12 +1191,13 @@ static int do_add_counters(struct net *net, const void __user *user,
1181 } 1191 }
1182 1192
1183 i = 0; 1193 i = 0;
1184 /* Choose the copy that is on our node */ 1194
1185 curcpu = smp_processor_id();
1186 loc_cpu_entry = private->entries[curcpu];
1187 addend = xt_write_recseq_begin(); 1195 addend = xt_write_recseq_begin();
1188 xt_entry_foreach(iter, loc_cpu_entry, private->size) { 1196 xt_entry_foreach(iter, private->entries, private->size) {
1189 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); 1197 struct xt_counters *tmp;
1198
1199 tmp = xt_get_this_cpu_counter(&iter->counters);
1200 ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
1190 ++i; 1201 ++i;
1191 } 1202 }
1192 xt_write_recseq_end(addend); 1203 xt_write_recseq_end(addend);
@@ -1396,7 +1407,7 @@ static int translate_compat_table(const char *name,
1396 newinfo->hook_entry[i] = info->hook_entry[i]; 1407 newinfo->hook_entry[i] = info->hook_entry[i];
1397 newinfo->underflow[i] = info->underflow[i]; 1408 newinfo->underflow[i] = info->underflow[i];
1398 } 1409 }
1399 entry1 = newinfo->entries[raw_smp_processor_id()]; 1410 entry1 = newinfo->entries;
1400 pos = entry1; 1411 pos = entry1;
1401 size = total_size; 1412 size = total_size;
1402 xt_entry_foreach(iter0, entry0, total_size) { 1413 xt_entry_foreach(iter0, entry0, total_size) {
@@ -1416,9 +1427,17 @@ static int translate_compat_table(const char *name,
1416 1427
1417 i = 0; 1428 i = 0;
1418 xt_entry_foreach(iter1, entry1, newinfo->size) { 1429 xt_entry_foreach(iter1, entry1, newinfo->size) {
1430 iter1->counters.pcnt = xt_percpu_counter_alloc();
1431 if (IS_ERR_VALUE(iter1->counters.pcnt)) {
1432 ret = -ENOMEM;
1433 break;
1434 }
1435
1419 ret = check_target(iter1, name); 1436 ret = check_target(iter1, name);
1420 if (ret != 0) 1437 if (ret != 0) {
1438 xt_percpu_counter_free(iter1->counters.pcnt);
1421 break; 1439 break;
1440 }
1422 ++i; 1441 ++i;
1423 if (strcmp(arpt_get_target(iter1)->u.user.name, 1442 if (strcmp(arpt_get_target(iter1)->u.user.name,
1424 XT_ERROR_TARGET) == 0) 1443 XT_ERROR_TARGET) == 0)
@@ -1448,11 +1467,6 @@ static int translate_compat_table(const char *name,
1448 return ret; 1467 return ret;
1449 } 1468 }
1450 1469
1451 /* And one copy for every other CPU */
1452 for_each_possible_cpu(i)
1453 if (newinfo->entries[i] && newinfo->entries[i] != entry1)
1454 memcpy(newinfo->entries[i], entry1, newinfo->size);
1455
1456 *pinfo = newinfo; 1470 *pinfo = newinfo;
1457 *pentry0 = entry1; 1471 *pentry0 = entry1;
1458 xt_free_table_info(info); 1472 xt_free_table_info(info);
@@ -1511,8 +1525,7 @@ static int compat_do_replace(struct net *net, void __user *user,
1511 if (!newinfo) 1525 if (!newinfo)
1512 return -ENOMEM; 1526 return -ENOMEM;
1513 1527
1514 /* choose the copy that is on our node/cpu */ 1528 loc_cpu_entry = newinfo->entries;
1515 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1516 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { 1529 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) {
1517 ret = -EFAULT; 1530 ret = -EFAULT;
1518 goto free_newinfo; 1531 goto free_newinfo;
@@ -1609,7 +1622,6 @@ static int compat_copy_entries_to_user(unsigned int total_size,
1609 void __user *pos; 1622 void __user *pos;
1610 unsigned int size; 1623 unsigned int size;
1611 int ret = 0; 1624 int ret = 0;
1612 void *loc_cpu_entry;
1613 unsigned int i = 0; 1625 unsigned int i = 0;
1614 struct arpt_entry *iter; 1626 struct arpt_entry *iter;
1615 1627
@@ -1617,11 +1629,9 @@ static int compat_copy_entries_to_user(unsigned int total_size,
1617 if (IS_ERR(counters)) 1629 if (IS_ERR(counters))
1618 return PTR_ERR(counters); 1630 return PTR_ERR(counters);
1619 1631
1620 /* choose the copy on our node/cpu */
1621 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1622 pos = userptr; 1632 pos = userptr;
1623 size = total_size; 1633 size = total_size;
1624 xt_entry_foreach(iter, loc_cpu_entry, total_size) { 1634 xt_entry_foreach(iter, private->entries, total_size) {
1625 ret = compat_copy_entry_to_user(iter, &pos, 1635 ret = compat_copy_entry_to_user(iter, &pos,
1626 &size, counters, i++); 1636 &size, counters, i++);
1627 if (ret != 0) 1637 if (ret != 0)
@@ -1790,8 +1800,7 @@ struct xt_table *arpt_register_table(struct net *net,
1790 goto out; 1800 goto out;
1791 } 1801 }
1792 1802
1793 /* choose the copy on our node/cpu */ 1803 loc_cpu_entry = newinfo->entries;
1794 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1795 memcpy(loc_cpu_entry, repl->entries, repl->size); 1804 memcpy(loc_cpu_entry, repl->entries, repl->size);
1796 1805
1797 ret = translate_table(newinfo, loc_cpu_entry, repl); 1806 ret = translate_table(newinfo, loc_cpu_entry, repl);
@@ -1822,7 +1831,7 @@ void arpt_unregister_table(struct xt_table *table)
1822 private = xt_unregister_table(table); 1831 private = xt_unregister_table(table);
1823 1832
1824 /* Decrease module usage counts and free resources */ 1833 /* Decrease module usage counts and free resources */
1825 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 1834 loc_cpu_entry = private->entries;
1826 xt_entry_foreach(iter, loc_cpu_entry, private->size) 1835 xt_entry_foreach(iter, loc_cpu_entry, private->size)
1827 cleanup_entry(iter); 1836 cleanup_entry(iter);
1828 if (private->number > private->initial_entries) 1837 if (private->number > private->initial_entries)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2d0e265fef6e..6c72fbb7b49e 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -254,15 +254,13 @@ static void trace_packet(const struct sk_buff *skb,
254 const struct xt_table_info *private, 254 const struct xt_table_info *private,
255 const struct ipt_entry *e) 255 const struct ipt_entry *e)
256{ 256{
257 const void *table_base;
258 const struct ipt_entry *root; 257 const struct ipt_entry *root;
259 const char *hookname, *chainname, *comment; 258 const char *hookname, *chainname, *comment;
260 const struct ipt_entry *iter; 259 const struct ipt_entry *iter;
261 unsigned int rulenum = 0; 260 unsigned int rulenum = 0;
262 struct net *net = dev_net(in ? in : out); 261 struct net *net = dev_net(in ? in : out);
263 262
264 table_base = private->entries[smp_processor_id()]; 263 root = get_entry(private->entries, private->hook_entry[hook]);
265 root = get_entry(table_base, private->hook_entry[hook]);
266 264
267 hookname = chainname = hooknames[hook]; 265 hookname = chainname = hooknames[hook];
268 comment = comments[NF_IP_TRACE_COMMENT_RULE]; 266 comment = comments[NF_IP_TRACE_COMMENT_RULE];
@@ -331,7 +329,7 @@ ipt_do_table(struct sk_buff *skb,
331 * pointer. 329 * pointer.
332 */ 330 */
333 smp_read_barrier_depends(); 331 smp_read_barrier_depends();
334 table_base = private->entries[cpu]; 332 table_base = private->entries;
335 jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; 333 jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
336 stackptr = per_cpu_ptr(private->stackptr, cpu); 334 stackptr = per_cpu_ptr(private->stackptr, cpu);
337 origptr = *stackptr; 335 origptr = *stackptr;
@@ -345,6 +343,7 @@ ipt_do_table(struct sk_buff *skb,
345 do { 343 do {
346 const struct xt_entry_target *t; 344 const struct xt_entry_target *t;
347 const struct xt_entry_match *ematch; 345 const struct xt_entry_match *ematch;
346 struct xt_counters *counter;
348 347
349 IP_NF_ASSERT(e); 348 IP_NF_ASSERT(e);
350 if (!ip_packet_match(ip, indev, outdev, 349 if (!ip_packet_match(ip, indev, outdev,
@@ -361,7 +360,8 @@ ipt_do_table(struct sk_buff *skb,
361 goto no_match; 360 goto no_match;
362 } 361 }
363 362
364 ADD_COUNTER(e->counters, skb->len, 1); 363 counter = xt_get_this_cpu_counter(&e->counters);
364 ADD_COUNTER(*counter, skb->len, 1);
365 365
366 t = ipt_get_target(e); 366 t = ipt_get_target(e);
367 IP_NF_ASSERT(t->u.kernel.target); 367 IP_NF_ASSERT(t->u.kernel.target);
@@ -665,6 +665,10 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
665 if (ret) 665 if (ret)
666 return ret; 666 return ret;
667 667
668 e->counters.pcnt = xt_percpu_counter_alloc();
669 if (IS_ERR_VALUE(e->counters.pcnt))
670 return -ENOMEM;
671
668 j = 0; 672 j = 0;
669 mtpar.net = net; 673 mtpar.net = net;
670 mtpar.table = name; 674 mtpar.table = name;
@@ -691,6 +695,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
691 ret = check_target(e, net, name); 695 ret = check_target(e, net, name);
692 if (ret) 696 if (ret)
693 goto err; 697 goto err;
698
694 return 0; 699 return 0;
695 err: 700 err:
696 module_put(t->u.kernel.target->me); 701 module_put(t->u.kernel.target->me);
@@ -700,6 +705,9 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
700 break; 705 break;
701 cleanup_match(ematch, net); 706 cleanup_match(ematch, net);
702 } 707 }
708
709 xt_percpu_counter_free(e->counters.pcnt);
710
703 return ret; 711 return ret;
704} 712}
705 713
@@ -784,6 +792,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net)
784 if (par.target->destroy != NULL) 792 if (par.target->destroy != NULL)
785 par.target->destroy(&par); 793 par.target->destroy(&par);
786 module_put(par.target->me); 794 module_put(par.target->me);
795 xt_percpu_counter_free(e->counters.pcnt);
787} 796}
788 797
789/* Checks and translates the user-supplied table segment (held in 798/* Checks and translates the user-supplied table segment (held in
@@ -866,12 +875,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
866 return ret; 875 return ret;
867 } 876 }
868 877
869 /* And one copy for every other CPU */
870 for_each_possible_cpu(i) {
871 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
872 memcpy(newinfo->entries[i], entry0, newinfo->size);
873 }
874
875 return ret; 878 return ret;
876} 879}
877 880
@@ -887,14 +890,16 @@ get_counters(const struct xt_table_info *t,
887 seqcount_t *s = &per_cpu(xt_recseq, cpu); 890 seqcount_t *s = &per_cpu(xt_recseq, cpu);
888 891
889 i = 0; 892 i = 0;
890 xt_entry_foreach(iter, t->entries[cpu], t->size) { 893 xt_entry_foreach(iter, t->entries, t->size) {
894 struct xt_counters *tmp;
891 u64 bcnt, pcnt; 895 u64 bcnt, pcnt;
892 unsigned int start; 896 unsigned int start;
893 897
898 tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
894 do { 899 do {
895 start = read_seqcount_begin(s); 900 start = read_seqcount_begin(s);
896 bcnt = iter->counters.bcnt; 901 bcnt = tmp->bcnt;
897 pcnt = iter->counters.pcnt; 902 pcnt = tmp->pcnt;
898 } while (read_seqcount_retry(s, start)); 903 } while (read_seqcount_retry(s, start));
899 904
900 ADD_COUNTER(counters[i], bcnt, pcnt); 905 ADD_COUNTER(counters[i], bcnt, pcnt);
@@ -939,11 +944,7 @@ copy_entries_to_user(unsigned int total_size,
939 if (IS_ERR(counters)) 944 if (IS_ERR(counters))
940 return PTR_ERR(counters); 945 return PTR_ERR(counters);
941 946
942 /* choose the copy that is on our node/cpu, ... 947 loc_cpu_entry = private->entries;
943 * This choice is lazy (because current thread is
944 * allowed to migrate to another cpu)
945 */
946 loc_cpu_entry = private->entries[raw_smp_processor_id()];
947 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { 948 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
948 ret = -EFAULT; 949 ret = -EFAULT;
949 goto free_counters; 950 goto free_counters;
@@ -1051,16 +1052,16 @@ static int compat_table_info(const struct xt_table_info *info,
1051 struct xt_table_info *newinfo) 1052 struct xt_table_info *newinfo)
1052{ 1053{
1053 struct ipt_entry *iter; 1054 struct ipt_entry *iter;
1054 void *loc_cpu_entry; 1055 const void *loc_cpu_entry;
1055 int ret; 1056 int ret;
1056 1057
1057 if (!newinfo || !info) 1058 if (!newinfo || !info)
1058 return -EINVAL; 1059 return -EINVAL;
1059 1060
1060 /* we dont care about newinfo->entries[] */ 1061 /* we dont care about newinfo->entries */
1061 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 1062 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1062 newinfo->initial_entries = 0; 1063 newinfo->initial_entries = 0;
1063 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 1064 loc_cpu_entry = info->entries;
1064 xt_compat_init_offsets(AF_INET, info->number); 1065 xt_compat_init_offsets(AF_INET, info->number);
1065 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 1066 xt_entry_foreach(iter, loc_cpu_entry, info->size) {
1066 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 1067 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
@@ -1181,7 +1182,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1181 struct xt_table *t; 1182 struct xt_table *t;
1182 struct xt_table_info *oldinfo; 1183 struct xt_table_info *oldinfo;
1183 struct xt_counters *counters; 1184 struct xt_counters *counters;
1184 void *loc_cpu_old_entry;
1185 struct ipt_entry *iter; 1185 struct ipt_entry *iter;
1186 1186
1187 ret = 0; 1187 ret = 0;
@@ -1224,8 +1224,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1224 get_counters(oldinfo, counters); 1224 get_counters(oldinfo, counters);
1225 1225
1226 /* Decrease module usage counts and free resource */ 1226 /* Decrease module usage counts and free resource */
1227 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; 1227 xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
1228 xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
1229 cleanup_entry(iter, net); 1228 cleanup_entry(iter, net);
1230 1229
1231 xt_free_table_info(oldinfo); 1230 xt_free_table_info(oldinfo);
@@ -1271,8 +1270,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
1271 if (!newinfo) 1270 if (!newinfo)
1272 return -ENOMEM; 1271 return -ENOMEM;
1273 1272
1274 /* choose the copy that is on our node/cpu */ 1273 loc_cpu_entry = newinfo->entries;
1275 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1276 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), 1274 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1277 tmp.size) != 0) { 1275 tmp.size) != 0) {
1278 ret = -EFAULT; 1276 ret = -EFAULT;
@@ -1303,7 +1301,7 @@ static int
1303do_add_counters(struct net *net, const void __user *user, 1301do_add_counters(struct net *net, const void __user *user,
1304 unsigned int len, int compat) 1302 unsigned int len, int compat)
1305{ 1303{
1306 unsigned int i, curcpu; 1304 unsigned int i;
1307 struct xt_counters_info tmp; 1305 struct xt_counters_info tmp;
1308 struct xt_counters *paddc; 1306 struct xt_counters *paddc;
1309 unsigned int num_counters; 1307 unsigned int num_counters;
@@ -1313,7 +1311,6 @@ do_add_counters(struct net *net, const void __user *user,
1313 struct xt_table *t; 1311 struct xt_table *t;
1314 const struct xt_table_info *private; 1312 const struct xt_table_info *private;
1315 int ret = 0; 1313 int ret = 0;
1316 void *loc_cpu_entry;
1317 struct ipt_entry *iter; 1314 struct ipt_entry *iter;
1318 unsigned int addend; 1315 unsigned int addend;
1319#ifdef CONFIG_COMPAT 1316#ifdef CONFIG_COMPAT
@@ -1369,12 +1366,12 @@ do_add_counters(struct net *net, const void __user *user,
1369 } 1366 }
1370 1367
1371 i = 0; 1368 i = 0;
1372 /* Choose the copy that is on our node */
1373 curcpu = smp_processor_id();
1374 loc_cpu_entry = private->entries[curcpu];
1375 addend = xt_write_recseq_begin(); 1369 addend = xt_write_recseq_begin();
1376 xt_entry_foreach(iter, loc_cpu_entry, private->size) { 1370 xt_entry_foreach(iter, private->entries, private->size) {
1377 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); 1371 struct xt_counters *tmp;
1372
1373 tmp = xt_get_this_cpu_counter(&iter->counters);
1374 ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
1378 ++i; 1375 ++i;
1379 } 1376 }
1380 xt_write_recseq_end(addend); 1377 xt_write_recseq_end(addend);
@@ -1444,7 +1441,6 @@ static int
1444compat_find_calc_match(struct xt_entry_match *m, 1441compat_find_calc_match(struct xt_entry_match *m,
1445 const char *name, 1442 const char *name,
1446 const struct ipt_ip *ip, 1443 const struct ipt_ip *ip,
1447 unsigned int hookmask,
1448 int *size) 1444 int *size)
1449{ 1445{
1450 struct xt_match *match; 1446 struct xt_match *match;
@@ -1513,8 +1509,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1513 entry_offset = (void *)e - (void *)base; 1509 entry_offset = (void *)e - (void *)base;
1514 j = 0; 1510 j = 0;
1515 xt_ematch_foreach(ematch, e) { 1511 xt_ematch_foreach(ematch, e) {
1516 ret = compat_find_calc_match(ematch, name, 1512 ret = compat_find_calc_match(ematch, name, &e->ip, &off);
1517 &e->ip, e->comefrom, &off);
1518 if (ret != 0) 1513 if (ret != 0)
1519 goto release_matches; 1514 goto release_matches;
1520 ++j; 1515 ++j;
@@ -1610,6 +1605,10 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
1610 unsigned int j; 1605 unsigned int j;
1611 int ret = 0; 1606 int ret = 0;
1612 1607
1608 e->counters.pcnt = xt_percpu_counter_alloc();
1609 if (IS_ERR_VALUE(e->counters.pcnt))
1610 return -ENOMEM;
1611
1613 j = 0; 1612 j = 0;
1614 mtpar.net = net; 1613 mtpar.net = net;
1615 mtpar.table = name; 1614 mtpar.table = name;
@@ -1634,6 +1633,9 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
1634 break; 1633 break;
1635 cleanup_match(ematch, net); 1634 cleanup_match(ematch, net);
1636 } 1635 }
1636
1637 xt_percpu_counter_free(e->counters.pcnt);
1638
1637 return ret; 1639 return ret;
1638} 1640}
1639 1641
@@ -1718,7 +1720,7 @@ translate_compat_table(struct net *net,
1718 newinfo->hook_entry[i] = info->hook_entry[i]; 1720 newinfo->hook_entry[i] = info->hook_entry[i];
1719 newinfo->underflow[i] = info->underflow[i]; 1721 newinfo->underflow[i] = info->underflow[i];
1720 } 1722 }
1721 entry1 = newinfo->entries[raw_smp_processor_id()]; 1723 entry1 = newinfo->entries;
1722 pos = entry1; 1724 pos = entry1;
1723 size = total_size; 1725 size = total_size;
1724 xt_entry_foreach(iter0, entry0, total_size) { 1726 xt_entry_foreach(iter0, entry0, total_size) {
@@ -1770,11 +1772,6 @@ translate_compat_table(struct net *net,
1770 return ret; 1772 return ret;
1771 } 1773 }
1772 1774
1773 /* And one copy for every other CPU */
1774 for_each_possible_cpu(i)
1775 if (newinfo->entries[i] && newinfo->entries[i] != entry1)
1776 memcpy(newinfo->entries[i], entry1, newinfo->size);
1777
1778 *pinfo = newinfo; 1775 *pinfo = newinfo;
1779 *pentry0 = entry1; 1776 *pentry0 = entry1;
1780 xt_free_table_info(info); 1777 xt_free_table_info(info);
@@ -1821,8 +1818,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
1821 if (!newinfo) 1818 if (!newinfo)
1822 return -ENOMEM; 1819 return -ENOMEM;
1823 1820
1824 /* choose the copy that is on our node/cpu */ 1821 loc_cpu_entry = newinfo->entries;
1825 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1826 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), 1822 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1827 tmp.size) != 0) { 1823 tmp.size) != 0) {
1828 ret = -EFAULT; 1824 ret = -EFAULT;
@@ -1893,7 +1889,6 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1893 void __user *pos; 1889 void __user *pos;
1894 unsigned int size; 1890 unsigned int size;
1895 int ret = 0; 1891 int ret = 0;
1896 const void *loc_cpu_entry;
1897 unsigned int i = 0; 1892 unsigned int i = 0;
1898 struct ipt_entry *iter; 1893 struct ipt_entry *iter;
1899 1894
@@ -1901,14 +1896,9 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1901 if (IS_ERR(counters)) 1896 if (IS_ERR(counters))
1902 return PTR_ERR(counters); 1897 return PTR_ERR(counters);
1903 1898
1904 /* choose the copy that is on our node/cpu, ...
1905 * This choice is lazy (because current thread is
1906 * allowed to migrate to another cpu)
1907 */
1908 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1909 pos = userptr; 1899 pos = userptr;
1910 size = total_size; 1900 size = total_size;
1911 xt_entry_foreach(iter, loc_cpu_entry, total_size) { 1901 xt_entry_foreach(iter, private->entries, total_size) {
1912 ret = compat_copy_entry_to_user(iter, &pos, 1902 ret = compat_copy_entry_to_user(iter, &pos,
1913 &size, counters, i++); 1903 &size, counters, i++);
1914 if (ret != 0) 1904 if (ret != 0)
@@ -2083,8 +2073,7 @@ struct xt_table *ipt_register_table(struct net *net,
2083 goto out; 2073 goto out;
2084 } 2074 }
2085 2075
2086 /* choose the copy on our node/cpu, but dont care about preemption */ 2076 loc_cpu_entry = newinfo->entries;
2087 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
2088 memcpy(loc_cpu_entry, repl->entries, repl->size); 2077 memcpy(loc_cpu_entry, repl->entries, repl->size);
2089 2078
2090 ret = translate_table(net, newinfo, loc_cpu_entry, repl); 2079 ret = translate_table(net, newinfo, loc_cpu_entry, repl);
@@ -2115,7 +2104,7 @@ void ipt_unregister_table(struct net *net, struct xt_table *table)
2115 private = xt_unregister_table(table); 2104 private = xt_unregister_table(table);
2116 2105
2117 /* Decrease module usage counts and free resources */ 2106 /* Decrease module usage counts and free resources */
2118 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 2107 loc_cpu_entry = private->entries;
2119 xt_entry_foreach(iter, loc_cpu_entry, private->size) 2108 xt_entry_foreach(iter, loc_cpu_entry, private->size)
2120 cleanup_entry(iter, net); 2109 cleanup_entry(iter, net);
2121 if (private->number > private->initial_entries) 2110 if (private->number > private->initial_entries)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 771ab3d01ad3..45cb16a6a4a3 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -367,6 +367,11 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
367 struct clusterip_config *config; 367 struct clusterip_config *config;
368 int ret; 368 int ret;
369 369
370 if (par->nft_compat) {
371 pr_err("cannot use CLUSTERIP target from nftables compat\n");
372 return -EOPNOTSUPP;
373 }
374
370 if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && 375 if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
371 cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && 376 cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
372 cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { 377 cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index e9e67793055f..fe8cc183411e 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -18,7 +18,7 @@
18#include <net/netfilter/nf_conntrack_synproxy.h> 18#include <net/netfilter/nf_conntrack_synproxy.h>
19 19
20static struct iphdr * 20static struct iphdr *
21synproxy_build_ip(struct sk_buff *skb, u32 saddr, u32 daddr) 21synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr)
22{ 22{
23 struct iphdr *iph; 23 struct iphdr *iph;
24 24
@@ -220,7 +220,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet,
220 nth->ack_seq = th->ack_seq; 220 nth->ack_seq = th->ack_seq;
221 tcp_flag_word(nth) = TCP_FLAG_ACK; 221 tcp_flag_word(nth) = TCP_FLAG_ACK;
222 nth->doff = tcp_hdr_size / 4; 222 nth->doff = tcp_hdr_size / 4;
223 nth->window = ntohs(htons(th->window) >> opts->wscale); 223 nth->window = htons(ntohs(th->window) >> opts->wscale);
224 nth->check = 0; 224 nth->check = 0;
225 nth->urg_ptr = 0; 225 nth->urg_ptr = 0;
226 226
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 4bfaedf9b34e..8618fd150c96 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -40,7 +40,7 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
40 struct net *net = dev_net(dev); 40 struct net *net = dev_net(dev);
41 int ret __maybe_unused; 41 int ret __maybe_unused;
42 42
43 if (fib_lookup(net, fl4, &res)) 43 if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE))
44 return false; 44 return false;
45 45
46 if (res.type != RTN_UNICAST) { 46 if (res.type != RTN_UNICAST) {
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index e1f3b911dd1e..da5d483e236a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -298,6 +298,8 @@ static const struct snmp_mib snmp4_net_list[] = {
298 SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2), 298 SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2),
299 SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT), 299 SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT),
300 SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE), 300 SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE),
301 SNMP_MIB_ITEM("TCPWinProbe", LINUX_MIB_TCPWINPROBE),
302 SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE),
301 SNMP_MIB_SENTINEL 303 SNMP_MIB_SENTINEL
302}; 304};
303 305
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f45f2a12f37b..d0362a2de3d3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -457,12 +457,9 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
457} 457}
458 458
459#define IP_IDENTS_SZ 2048u 459#define IP_IDENTS_SZ 2048u
460struct ip_ident_bucket {
461 atomic_t id;
462 u32 stamp32;
463};
464 460
465static struct ip_ident_bucket *ip_idents __read_mostly; 461static atomic_t *ip_idents __read_mostly;
462static u32 *ip_tstamps __read_mostly;
466 463
467/* In order to protect privacy, we add a perturbation to identifiers 464/* In order to protect privacy, we add a perturbation to identifiers
468 * if one generator is seldom used. This makes hard for an attacker 465 * if one generator is seldom used. This makes hard for an attacker
@@ -470,15 +467,16 @@ static struct ip_ident_bucket *ip_idents __read_mostly;
470 */ 467 */
471u32 ip_idents_reserve(u32 hash, int segs) 468u32 ip_idents_reserve(u32 hash, int segs)
472{ 469{
473 struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ; 470 u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
474 u32 old = ACCESS_ONCE(bucket->stamp32); 471 atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
472 u32 old = ACCESS_ONCE(*p_tstamp);
475 u32 now = (u32)jiffies; 473 u32 now = (u32)jiffies;
476 u32 delta = 0; 474 u32 delta = 0;
477 475
478 if (old != now && cmpxchg(&bucket->stamp32, old, now) == old) 476 if (old != now && cmpxchg(p_tstamp, old, now) == old)
479 delta = prandom_u32_max(now - old); 477 delta = prandom_u32_max(now - old);
480 478
481 return atomic_add_return(segs + delta, &bucket->id) - segs; 479 return atomic_add_return(segs + delta, p_id) - segs;
482} 480}
483EXPORT_SYMBOL(ip_idents_reserve); 481EXPORT_SYMBOL(ip_idents_reserve);
484 482
@@ -749,7 +747,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
749 if (!(n->nud_state & NUD_VALID)) { 747 if (!(n->nud_state & NUD_VALID)) {
750 neigh_event_send(n, NULL); 748 neigh_event_send(n, NULL);
751 } else { 749 } else {
752 if (fib_lookup(net, fl4, &res) == 0) { 750 if (fib_lookup(net, fl4, &res, 0) == 0) {
753 struct fib_nh *nh = &FIB_RES_NH(res); 751 struct fib_nh *nh = &FIB_RES_NH(res);
754 752
755 update_or_create_fnhe(nh, fl4->daddr, new_gw, 753 update_or_create_fnhe(nh, fl4->daddr, new_gw,
@@ -977,7 +975,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
977 return; 975 return;
978 976
979 rcu_read_lock(); 977 rcu_read_lock();
980 if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { 978 if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
981 struct fib_nh *nh = &FIB_RES_NH(res); 979 struct fib_nh *nh = &FIB_RES_NH(res);
982 980
983 update_or_create_fnhe(nh, fl4->daddr, 0, mtu, 981 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
@@ -1188,7 +1186,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
1188 fl4.flowi4_mark = skb->mark; 1186 fl4.flowi4_mark = skb->mark;
1189 1187
1190 rcu_read_lock(); 1188 rcu_read_lock();
1191 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) 1189 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
1192 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); 1190 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
1193 else 1191 else
1194 src = inet_select_addr(rt->dst.dev, 1192 src = inet_select_addr(rt->dst.dev,
@@ -1718,7 +1716,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1718 fl4.flowi4_scope = RT_SCOPE_UNIVERSE; 1716 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1719 fl4.daddr = daddr; 1717 fl4.daddr = daddr;
1720 fl4.saddr = saddr; 1718 fl4.saddr = saddr;
1721 err = fib_lookup(net, &fl4, &res); 1719 err = fib_lookup(net, &fl4, &res, 0);
1722 if (err != 0) { 1720 if (err != 0) {
1723 if (!IN_DEV_FORWARD(in_dev)) 1721 if (!IN_DEV_FORWARD(in_dev))
1724 err = -EHOSTUNREACH; 1722 err = -EHOSTUNREACH;
@@ -2097,7 +2095,8 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
2097 goto out; 2095 goto out;
2098 } 2096 }
2099 if (ipv4_is_local_multicast(fl4->daddr) || 2097 if (ipv4_is_local_multicast(fl4->daddr) ||
2100 ipv4_is_lbcast(fl4->daddr)) { 2098 ipv4_is_lbcast(fl4->daddr) ||
2099 fl4->flowi4_proto == IPPROTO_IGMP) {
2101 if (!fl4->saddr) 2100 if (!fl4->saddr)
2102 fl4->saddr = inet_select_addr(dev_out, 0, 2101 fl4->saddr = inet_select_addr(dev_out, 0,
2103 RT_SCOPE_LINK); 2102 RT_SCOPE_LINK);
@@ -2124,7 +2123,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
2124 goto make_route; 2123 goto make_route;
2125 } 2124 }
2126 2125
2127 if (fib_lookup(net, fl4, &res)) { 2126 if (fib_lookup(net, fl4, &res, 0)) {
2128 res.fi = NULL; 2127 res.fi = NULL;
2129 res.table = NULL; 2128 res.table = NULL;
2130 if (fl4->flowi4_oif) { 2129 if (fl4->flowi4_oif) {
@@ -2742,6 +2741,10 @@ int __init ip_rt_init(void)
2742 2741
2743 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents)); 2742 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
2744 2743
2744 ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
2745 if (!ip_tstamps)
2746 panic("IP: failed to allocate ip_tstamps\n");
2747
2745 for_each_possible_cpu(cpu) { 2748 for_each_possible_cpu(cpu) {
2746 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); 2749 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
2747 2750
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index df849e5a10f1..d70b1f603692 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -219,9 +219,9 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
219} 219}
220EXPORT_SYMBOL_GPL(__cookie_v4_check); 220EXPORT_SYMBOL_GPL(__cookie_v4_check);
221 221
222static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, 222struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
223 struct request_sock *req, 223 struct request_sock *req,
224 struct dst_entry *dst) 224 struct dst_entry *dst)
225{ 225{
226 struct inet_connection_sock *icsk = inet_csk(sk); 226 struct inet_connection_sock *icsk = inet_csk(sk);
227 struct sock *child; 227 struct sock *child;
@@ -235,7 +235,7 @@ static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
235 } 235 }
236 return child; 236 return child;
237} 237}
238 238EXPORT_SYMBOL(tcp_get_cookie_sock);
239 239
240/* 240/*
241 * when syncookies are in effect and tcp timestamps are enabled we stored 241 * when syncookies are in effect and tcp timestamps are enabled we stored
@@ -391,7 +391,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
391 ireq->rcv_wscale = rcv_wscale; 391 ireq->rcv_wscale = rcv_wscale;
392 ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst); 392 ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst);
393 393
394 ret = get_cookie_sock(sk, skb, req, &rt->dst); 394 ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst);
395 /* ip_queue_xmit() depends on our flow being setup 395 /* ip_queue_xmit() depends on our flow being setup
396 * Normal sockets get it right from inet_csk_route_child_sock() 396 * Normal sockets get it right from inet_csk_route_child_sock()
397 */ 397 */
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index c3852a7ff3c7..433231ccfb17 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -41,11 +41,19 @@ static int tcp_syn_retries_min = 1;
41static int tcp_syn_retries_max = MAX_TCP_SYNCNT; 41static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
42static int ip_ping_group_range_min[] = { 0, 0 }; 42static int ip_ping_group_range_min[] = { 0, 0 };
43static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; 43static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
44static int min_sndbuf = SOCK_MIN_SNDBUF;
45static int min_rcvbuf = SOCK_MIN_RCVBUF;
44 46
45/* Update system visible IP port range */ 47/* Update system visible IP port range */
46static void set_local_port_range(struct net *net, int range[2]) 48static void set_local_port_range(struct net *net, int range[2])
47{ 49{
50 bool same_parity = !((range[0] ^ range[1]) & 1);
51
48 write_seqlock(&net->ipv4.ip_local_ports.lock); 52 write_seqlock(&net->ipv4.ip_local_ports.lock);
53 if (same_parity && !net->ipv4.ip_local_ports.warned) {
54 net->ipv4.ip_local_ports.warned = true;
55 pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
56 }
49 net->ipv4.ip_local_ports.range[0] = range[0]; 57 net->ipv4.ip_local_ports.range[0] = range[0];
50 net->ipv4.ip_local_ports.range[1] = range[1]; 58 net->ipv4.ip_local_ports.range[1] = range[1];
51 write_sequnlock(&net->ipv4.ip_local_ports.lock); 59 write_sequnlock(&net->ipv4.ip_local_ports.lock);
@@ -522,7 +530,7 @@ static struct ctl_table ipv4_table[] = {
522 .maxlen = sizeof(sysctl_tcp_wmem), 530 .maxlen = sizeof(sysctl_tcp_wmem),
523 .mode = 0644, 531 .mode = 0644,
524 .proc_handler = proc_dointvec_minmax, 532 .proc_handler = proc_dointvec_minmax,
525 .extra1 = &one, 533 .extra1 = &min_sndbuf,
526 }, 534 },
527 { 535 {
528 .procname = "tcp_notsent_lowat", 536 .procname = "tcp_notsent_lowat",
@@ -537,7 +545,7 @@ static struct ctl_table ipv4_table[] = {
537 .maxlen = sizeof(sysctl_tcp_rmem), 545 .maxlen = sizeof(sysctl_tcp_rmem),
538 .mode = 0644, 546 .mode = 0644,
539 .proc_handler = proc_dointvec_minmax, 547 .proc_handler = proc_dointvec_minmax,
540 .extra1 = &one, 548 .extra1 = &min_rcvbuf,
541 }, 549 },
542 { 550 {
543 .procname = "tcp_app_win", 551 .procname = "tcp_app_win",
@@ -702,7 +710,7 @@ static struct ctl_table ipv4_table[] = {
702 .maxlen = sizeof(int), 710 .maxlen = sizeof(int),
703 .mode = 0644, 711 .mode = 0644,
704 .proc_handler = proc_dointvec_minmax, 712 .proc_handler = proc_dointvec_minmax,
705 .extra1 = &zero, 713 .extra1 = &one,
706 .extra2 = &gso_max_segs, 714 .extra2 = &gso_max_segs,
707 }, 715 },
708 { 716 {
@@ -750,7 +758,7 @@ static struct ctl_table ipv4_table[] = {
750 .maxlen = sizeof(sysctl_udp_rmem_min), 758 .maxlen = sizeof(sysctl_udp_rmem_min),
751 .mode = 0644, 759 .mode = 0644,
752 .proc_handler = proc_dointvec_minmax, 760 .proc_handler = proc_dointvec_minmax,
753 .extra1 = &one 761 .extra1 = &min_rcvbuf,
754 }, 762 },
755 { 763 {
756 .procname = "udp_wmem_min", 764 .procname = "udp_wmem_min",
@@ -758,7 +766,7 @@ static struct ctl_table ipv4_table[] = {
758 .maxlen = sizeof(sysctl_udp_wmem_min), 766 .maxlen = sizeof(sysctl_udp_wmem_min),
759 .mode = 0644, 767 .mode = 0644,
760 .proc_handler = proc_dointvec_minmax, 768 .proc_handler = proc_dointvec_minmax,
761 .extra1 = &one 769 .extra1 = &min_sndbuf,
762 }, 770 },
763 { } 771 { }
764}; 772};
@@ -821,6 +829,13 @@ static struct ctl_table ipv4_net_table[] = {
821 .proc_handler = proc_dointvec 829 .proc_handler = proc_dointvec
822 }, 830 },
823 { 831 {
832 .procname = "tcp_ecn_fallback",
833 .data = &init_net.ipv4.sysctl_tcp_ecn_fallback,
834 .maxlen = sizeof(int),
835 .mode = 0644,
836 .proc_handler = proc_dointvec
837 },
838 {
824 .procname = "ip_local_port_range", 839 .procname = "ip_local_port_range",
825 .maxlen = sizeof(init_net.ipv4.ip_local_ports.range), 840 .maxlen = sizeof(init_net.ipv4.ip_local_ports.range),
826 .data = &init_net.ipv4.ip_local_ports.range, 841 .data = &init_net.ipv4.ip_local_ports.range,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f1377f2a0472..7f4056785acc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -695,8 +695,9 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
695 struct tcp_splice_state *tss = rd_desc->arg.data; 695 struct tcp_splice_state *tss = rd_desc->arg.data;
696 int ret; 696 int ret;
697 697
698 ret = skb_splice_bits(skb, offset, tss->pipe, min(rd_desc->count, len), 698 ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe,
699 tss->flags); 699 min(rd_desc->count, len), tss->flags,
700 skb_socket_splice);
700 if (ret > 0) 701 if (ret > 0)
701 rd_desc->count -= ret; 702 rd_desc->count -= ret;
702 return ret; 703 return ret;
@@ -809,16 +810,28 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
809} 810}
810EXPORT_SYMBOL(tcp_splice_read); 811EXPORT_SYMBOL(tcp_splice_read);
811 812
812struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) 813struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
814 bool force_schedule)
813{ 815{
814 struct sk_buff *skb; 816 struct sk_buff *skb;
815 817
816 /* The TCP header must be at least 32-bit aligned. */ 818 /* The TCP header must be at least 32-bit aligned. */
817 size = ALIGN(size, 4); 819 size = ALIGN(size, 4);
818 820
821 if (unlikely(tcp_under_memory_pressure(sk)))
822 sk_mem_reclaim_partial(sk);
823
819 skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); 824 skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
820 if (skb) { 825 if (likely(skb)) {
821 if (sk_wmem_schedule(sk, skb->truesize)) { 826 bool mem_scheduled;
827
828 if (force_schedule) {
829 mem_scheduled = true;
830 sk_forced_mem_schedule(sk, skb->truesize);
831 } else {
832 mem_scheduled = sk_wmem_schedule(sk, skb->truesize);
833 }
834 if (likely(mem_scheduled)) {
822 skb_reserve(skb, sk->sk_prot->max_header); 835 skb_reserve(skb, sk->sk_prot->max_header);
823 /* 836 /*
824 * Make sure that we have exactly size bytes 837 * Make sure that we have exactly size bytes
@@ -908,7 +921,8 @@ new_segment:
908 if (!sk_stream_memory_free(sk)) 921 if (!sk_stream_memory_free(sk))
909 goto wait_for_sndbuf; 922 goto wait_for_sndbuf;
910 923
911 skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); 924 skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
925 skb_queue_empty(&sk->sk_write_queue));
912 if (!skb) 926 if (!skb)
913 goto wait_for_memory; 927 goto wait_for_memory;
914 928
@@ -987,6 +1001,9 @@ do_error:
987 if (copied) 1001 if (copied)
988 goto out; 1002 goto out;
989out_err: 1003out_err:
1004 /* make sure we wake any epoll edge trigger waiter */
1005 if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
1006 sk->sk_write_space(sk);
990 return sk_stream_error(sk, flags, err); 1007 return sk_stream_error(sk, flags, err);
991} 1008}
992 1009
@@ -1144,7 +1161,8 @@ new_segment:
1144 1161
1145 skb = sk_stream_alloc_skb(sk, 1162 skb = sk_stream_alloc_skb(sk,
1146 select_size(sk, sg), 1163 select_size(sk, sg),
1147 sk->sk_allocation); 1164 sk->sk_allocation,
1165 skb_queue_empty(&sk->sk_write_queue));
1148 if (!skb) 1166 if (!skb)
1149 goto wait_for_memory; 1167 goto wait_for_memory;
1150 1168
@@ -1275,6 +1293,9 @@ do_error:
1275 goto out; 1293 goto out;
1276out_err: 1294out_err:
1277 err = sk_stream_error(sk, flags, err); 1295 err = sk_stream_error(sk, flags, err);
1296 /* make sure we wake any epoll edge trigger waiter */
1297 if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
1298 sk->sk_write_space(sk);
1278 release_sock(sk); 1299 release_sock(sk);
1279 return err; 1300 return err;
1280} 1301}
@@ -2483,6 +2504,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2483 icsk->icsk_syn_retries = val; 2504 icsk->icsk_syn_retries = val;
2484 break; 2505 break;
2485 2506
2507 case TCP_SAVE_SYN:
2508 if (val < 0 || val > 1)
2509 err = -EINVAL;
2510 else
2511 tp->save_syn = val;
2512 break;
2513
2486 case TCP_LINGER2: 2514 case TCP_LINGER2:
2487 if (val < 0) 2515 if (val < 0)
2488 tp->linger2 = -1; 2516 tp->linger2 = -1;
@@ -2545,10 +2573,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2545 2573
2546 case TCP_FASTOPEN: 2574 case TCP_FASTOPEN:
2547 if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | 2575 if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
2548 TCPF_LISTEN))) 2576 TCPF_LISTEN))) {
2577 tcp_fastopen_init_key_once(true);
2578
2549 err = fastopen_init_queue(sk, val); 2579 err = fastopen_init_queue(sk, val);
2550 else 2580 } else {
2551 err = -EINVAL; 2581 err = -EINVAL;
2582 }
2552 break; 2583 break;
2553 case TCP_TIMESTAMP: 2584 case TCP_TIMESTAMP:
2554 if (!tp->repair) 2585 if (!tp->repair)
@@ -2596,13 +2627,15 @@ EXPORT_SYMBOL(compat_tcp_setsockopt);
2596/* Return information about state of tcp endpoint in API format. */ 2627/* Return information about state of tcp endpoint in API format. */
2597void tcp_get_info(struct sock *sk, struct tcp_info *info) 2628void tcp_get_info(struct sock *sk, struct tcp_info *info)
2598{ 2629{
2599 const struct tcp_sock *tp = tcp_sk(sk); 2630 const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
2600 const struct inet_connection_sock *icsk = inet_csk(sk); 2631 const struct inet_connection_sock *icsk = inet_csk(sk);
2601 u32 now = tcp_time_stamp; 2632 u32 now = tcp_time_stamp;
2602 unsigned int start; 2633 unsigned int start;
2603 u32 rate; 2634 u32 rate;
2604 2635
2605 memset(info, 0, sizeof(*info)); 2636 memset(info, 0, sizeof(*info));
2637 if (sk->sk_type != SOCK_STREAM)
2638 return;
2606 2639
2607 info->tcpi_state = sk->sk_state; 2640 info->tcpi_state = sk->sk_state;
2608 info->tcpi_ca_state = icsk->icsk_ca_state; 2641 info->tcpi_ca_state = icsk->icsk_ca_state;
@@ -2672,6 +2705,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2672 info->tcpi_bytes_acked = tp->bytes_acked; 2705 info->tcpi_bytes_acked = tp->bytes_acked;
2673 info->tcpi_bytes_received = tp->bytes_received; 2706 info->tcpi_bytes_received = tp->bytes_received;
2674 } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); 2707 } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
2708 info->tcpi_segs_out = tp->segs_out;
2709 info->tcpi_segs_in = tp->segs_in;
2675} 2710}
2676EXPORT_SYMBOL_GPL(tcp_get_info); 2711EXPORT_SYMBOL_GPL(tcp_get_info);
2677 2712
@@ -2821,6 +2856,42 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2821 case TCP_NOTSENT_LOWAT: 2856 case TCP_NOTSENT_LOWAT:
2822 val = tp->notsent_lowat; 2857 val = tp->notsent_lowat;
2823 break; 2858 break;
2859 case TCP_SAVE_SYN:
2860 val = tp->save_syn;
2861 break;
2862 case TCP_SAVED_SYN: {
2863 if (get_user(len, optlen))
2864 return -EFAULT;
2865
2866 lock_sock(sk);
2867 if (tp->saved_syn) {
2868 if (len < tp->saved_syn[0]) {
2869 if (put_user(tp->saved_syn[0], optlen)) {
2870 release_sock(sk);
2871 return -EFAULT;
2872 }
2873 release_sock(sk);
2874 return -EINVAL;
2875 }
2876 len = tp->saved_syn[0];
2877 if (put_user(len, optlen)) {
2878 release_sock(sk);
2879 return -EFAULT;
2880 }
2881 if (copy_to_user(optval, tp->saved_syn + 1, len)) {
2882 release_sock(sk);
2883 return -EFAULT;
2884 }
2885 tcp_saved_syn_free(tp);
2886 release_sock(sk);
2887 } else {
2888 release_sock(sk);
2889 len = 0;
2890 if (put_user(len, optlen))
2891 return -EFAULT;
2892 }
2893 return 0;
2894 }
2824 default: 2895 default:
2825 return -ENOPROTOOPT; 2896 return -ENOPROTOOPT;
2826 } 2897 }
@@ -3025,11 +3096,12 @@ __setup("thash_entries=", set_thash_entries);
3025 3096
3026static void __init tcp_init_mem(void) 3097static void __init tcp_init_mem(void)
3027{ 3098{
3028 unsigned long limit = nr_free_buffer_pages() / 8; 3099 unsigned long limit = nr_free_buffer_pages() / 16;
3100
3029 limit = max(limit, 128UL); 3101 limit = max(limit, 128UL);
3030 sysctl_tcp_mem[0] = limit / 4 * 3; 3102 sysctl_tcp_mem[0] = limit / 4 * 3; /* 4.68 % */
3031 sysctl_tcp_mem[1] = limit; 3103 sysctl_tcp_mem[1] = limit; /* 6.25 % */
3032 sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; 3104 sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; /* 9.37 % */
3033} 3105}
3034 3106
3035void __init tcp_init(void) 3107void __init tcp_init(void)
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
new file mode 100644
index 000000000000..8c6fd3d5e40f
--- /dev/null
+++ b/net/ipv4/tcp_cdg.c
@@ -0,0 +1,433 @@
1/*
2 * CAIA Delay-Gradient (CDG) congestion control
3 *
4 * This implementation is based on the paper:
5 * D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using
6 * delay gradients." In IFIP Networking, pages 328-341. Springer, 2011.
7 *
8 * Scavenger traffic (Less-than-Best-Effort) should disable coexistence
9 * heuristics using parameters use_shadow=0 and use_ineff=0.
10 *
11 * Parameters window, backoff_beta, and backoff_factor are crucial for
12 * throughput and delay. Future work is needed to determine better defaults,
13 * and to provide guidelines for use in different environments/contexts.
14 *
15 * Except for window, knobs are configured via /sys/module/tcp_cdg/parameters/.
16 * Parameter window is only configurable when loading tcp_cdg as a module.
17 *
18 * Notable differences from paper/FreeBSD:
19 * o Using Hybrid Slow start and Proportional Rate Reduction.
20 * o Add toggle for shadow window mechanism. Suggested by David Hayes.
21 * o Add toggle for non-congestion loss tolerance.
22 * o Scaling parameter G is changed to a backoff factor;
23 * conversion is given by: backoff_factor = 1000/(G * window).
24 * o Limit shadow window to 2 * cwnd, or to cwnd when application limited.
25 * o More accurate e^-x.
26 */
27#include <linux/kernel.h>
28#include <linux/random.h>
29#include <linux/module.h>
30#include <net/tcp.h>
31
32#define HYSTART_ACK_TRAIN 1
33#define HYSTART_DELAY 2
34
35static int window __read_mostly = 8;
36static unsigned int backoff_beta __read_mostly = 0.7071 * 1024; /* sqrt 0.5 */
37static unsigned int backoff_factor __read_mostly = 42;
38static unsigned int hystart_detect __read_mostly = 3;
39static unsigned int use_ineff __read_mostly = 5;
40static bool use_shadow __read_mostly = true;
41static bool use_tolerance __read_mostly;
42
43module_param(window, int, 0444);
44MODULE_PARM_DESC(window, "gradient window size (power of two <= 256)");
45module_param(backoff_beta, uint, 0644);
46MODULE_PARM_DESC(backoff_beta, "backoff beta (0-1024)");
47module_param(backoff_factor, uint, 0644);
48MODULE_PARM_DESC(backoff_factor, "backoff probability scale factor");
49module_param(hystart_detect, uint, 0644);
50MODULE_PARM_DESC(hystart_detect, "use Hybrid Slow start "
51 "(0: disabled, 1: ACK train, 2: delay threshold, 3: both)");
52module_param(use_ineff, uint, 0644);
53MODULE_PARM_DESC(use_ineff, "use ineffectual backoff detection (threshold)");
54module_param(use_shadow, bool, 0644);
55MODULE_PARM_DESC(use_shadow, "use shadow window heuristic");
56module_param(use_tolerance, bool, 0644);
57MODULE_PARM_DESC(use_tolerance, "use loss tolerance heuristic");
58
59struct minmax {
60 union {
61 struct {
62 s32 min;
63 s32 max;
64 };
65 u64 v64;
66 };
67};
68
69enum cdg_state {
70 CDG_UNKNOWN = 0,
71 CDG_NONFULL = 1,
72 CDG_FULL = 2,
73 CDG_BACKOFF = 3,
74};
75
76struct cdg {
77 struct minmax rtt;
78 struct minmax rtt_prev;
79 struct minmax *gradients;
80 struct minmax gsum;
81 bool gfilled;
82 u8 tail;
83 u8 state;
84 u8 delack;
85 u32 rtt_seq;
86 u32 undo_cwnd;
87 u32 shadow_wnd;
88 u16 backoff_cnt;
89 u16 sample_cnt;
90 s32 delay_min;
91 u32 last_ack;
92 u32 round_start;
93};
94
95/**
96 * nexp_u32 - negative base-e exponential
97 * @ux: x in units of micro
98 *
99 * Returns exp(ux * -1e-6) * U32_MAX.
100 */
101static u32 __pure nexp_u32(u32 ux)
102{
103 static const u16 v[] = {
104 /* exp(-x)*65536-1 for x = 0, 0.000256, 0.000512, ... */
105 65535,
106 65518, 65501, 65468, 65401, 65267, 65001, 64470, 63422,
107 61378, 57484, 50423, 38795, 22965, 8047, 987, 14,
108 };
109 u32 msb = ux >> 8;
110 u32 res;
111 int i;
112
113 /* Cut off when ux >= 2^24 (actual result is <= 222/U32_MAX). */
114 if (msb > U16_MAX)
115 return 0;
116
117 /* Scale first eight bits linearly: */
118 res = U32_MAX - (ux & 0xff) * (U32_MAX / 1000000);
119
120 /* Obtain e^(x + y + ...) by computing e^x * e^y * ...: */
121 for (i = 1; msb; i++, msb >>= 1) {
122 u32 y = v[i & -(msb & 1)] + U32_C(1);
123
124 res = ((u64)res * y) >> 16;
125 }
126
127 return res;
128}
129
130/* Based on the HyStart algorithm (by Ha et al.) that is implemented in
131 * tcp_cubic. Differences/experimental changes:
132 * o Using Hayes' delayed ACK filter.
133 * o Using a usec clock for the ACK train.
134 * o Reset ACK train when application limited.
135 * o Invoked at any cwnd (i.e. also when cwnd < 16).
136 * o Invoked only when cwnd < ssthresh (i.e. not when cwnd == ssthresh).
137 */
138static void tcp_cdg_hystart_update(struct sock *sk)
139{
140 struct cdg *ca = inet_csk_ca(sk);
141 struct tcp_sock *tp = tcp_sk(sk);
142
143 ca->delay_min = min_not_zero(ca->delay_min, ca->rtt.min);
144 if (ca->delay_min == 0)
145 return;
146
147 if (hystart_detect & HYSTART_ACK_TRAIN) {
148 u32 now_us = div_u64(local_clock(), NSEC_PER_USEC);
149
150 if (ca->last_ack == 0 || !tcp_is_cwnd_limited(sk)) {
151 ca->last_ack = now_us;
152 ca->round_start = now_us;
153 } else if (before(now_us, ca->last_ack + 3000)) {
154 u32 base_owd = max(ca->delay_min / 2U, 125U);
155
156 ca->last_ack = now_us;
157 if (after(now_us, ca->round_start + base_owd)) {
158 NET_INC_STATS_BH(sock_net(sk),
159 LINUX_MIB_TCPHYSTARTTRAINDETECT);
160 NET_ADD_STATS_BH(sock_net(sk),
161 LINUX_MIB_TCPHYSTARTTRAINCWND,
162 tp->snd_cwnd);
163 tp->snd_ssthresh = tp->snd_cwnd;
164 return;
165 }
166 }
167 }
168
169 if (hystart_detect & HYSTART_DELAY) {
170 if (ca->sample_cnt < 8) {
171 ca->sample_cnt++;
172 } else {
173 s32 thresh = max(ca->delay_min + ca->delay_min / 8U,
174 125U);
175
176 if (ca->rtt.min > thresh) {
177 NET_INC_STATS_BH(sock_net(sk),
178 LINUX_MIB_TCPHYSTARTDELAYDETECT);
179 NET_ADD_STATS_BH(sock_net(sk),
180 LINUX_MIB_TCPHYSTARTDELAYCWND,
181 tp->snd_cwnd);
182 tp->snd_ssthresh = tp->snd_cwnd;
183 }
184 }
185 }
186}
187
188static s32 tcp_cdg_grad(struct cdg *ca)
189{
190 s32 gmin = ca->rtt.min - ca->rtt_prev.min;
191 s32 gmax = ca->rtt.max - ca->rtt_prev.max;
192 s32 grad;
193
194 if (ca->gradients) {
195 ca->gsum.min += gmin - ca->gradients[ca->tail].min;
196 ca->gsum.max += gmax - ca->gradients[ca->tail].max;
197 ca->gradients[ca->tail].min = gmin;
198 ca->gradients[ca->tail].max = gmax;
199 ca->tail = (ca->tail + 1) & (window - 1);
200 gmin = ca->gsum.min;
201 gmax = ca->gsum.max;
202 }
203
204 /* We keep sums to ignore gradients during cwnd reductions;
205 * the paper's smoothed gradients otherwise simplify to:
206 * (rtt_latest - rtt_oldest) / window.
207 *
208 * We also drop division by window here.
209 */
210 grad = gmin > 0 ? gmin : gmax;
211
212 /* Extrapolate missing values in gradient window: */
213 if (!ca->gfilled) {
214 if (!ca->gradients && window > 1)
215 grad *= window; /* Memory allocation failed. */
216 else if (ca->tail == 0)
217 ca->gfilled = true;
218 else
219 grad = (grad * window) / (int)ca->tail;
220 }
221
222 /* Backoff was effectual: */
223 if (gmin <= -32 || gmax <= -32)
224 ca->backoff_cnt = 0;
225
226 if (use_tolerance) {
227 /* Reduce small variations to zero: */
228 gmin = DIV_ROUND_CLOSEST(gmin, 64);
229 gmax = DIV_ROUND_CLOSEST(gmax, 64);
230
231 if (gmin > 0 && gmax <= 0)
232 ca->state = CDG_FULL;
233 else if ((gmin > 0 && gmax > 0) || gmax < 0)
234 ca->state = CDG_NONFULL;
235 }
236 return grad;
237}
238
239static bool tcp_cdg_backoff(struct sock *sk, u32 grad)
240{
241 struct cdg *ca = inet_csk_ca(sk);
242 struct tcp_sock *tp = tcp_sk(sk);
243
244 if (prandom_u32() <= nexp_u32(grad * backoff_factor))
245 return false;
246
247 if (use_ineff) {
248 ca->backoff_cnt++;
249 if (ca->backoff_cnt > use_ineff)
250 return false;
251 }
252
253 ca->shadow_wnd = max(ca->shadow_wnd, tp->snd_cwnd);
254 ca->state = CDG_BACKOFF;
255 tcp_enter_cwr(sk);
256 return true;
257}
258
259/* Not called in CWR or Recovery state. */
260static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked)
261{
262 struct cdg *ca = inet_csk_ca(sk);
263 struct tcp_sock *tp = tcp_sk(sk);
264 u32 prior_snd_cwnd;
265 u32 incr;
266
267 if (tp->snd_cwnd < tp->snd_ssthresh && hystart_detect)
268 tcp_cdg_hystart_update(sk);
269
270 if (after(ack, ca->rtt_seq) && ca->rtt.v64) {
271 s32 grad = 0;
272
273 if (ca->rtt_prev.v64)
274 grad = tcp_cdg_grad(ca);
275 ca->rtt_seq = tp->snd_nxt;
276 ca->rtt_prev = ca->rtt;
277 ca->rtt.v64 = 0;
278 ca->last_ack = 0;
279 ca->sample_cnt = 0;
280
281 if (grad > 0 && tcp_cdg_backoff(sk, grad))
282 return;
283 }
284
285 if (!tcp_is_cwnd_limited(sk)) {
286 ca->shadow_wnd = min(ca->shadow_wnd, tp->snd_cwnd);
287 return;
288 }
289
290 prior_snd_cwnd = tp->snd_cwnd;
291 tcp_reno_cong_avoid(sk, ack, acked);
292
293 incr = tp->snd_cwnd - prior_snd_cwnd;
294 ca->shadow_wnd = max(ca->shadow_wnd, ca->shadow_wnd + incr);
295}
296
297static void tcp_cdg_acked(struct sock *sk, u32 num_acked, s32 rtt_us)
298{
299 struct cdg *ca = inet_csk_ca(sk);
300 struct tcp_sock *tp = tcp_sk(sk);
301
302 if (rtt_us <= 0)
303 return;
304
305 /* A heuristic for filtering delayed ACKs, adapted from:
306 * D.A. Hayes. "Timing enhancements to the FreeBSD kernel to support
307 * delay and rate based TCP mechanisms." TR 100219A. CAIA, 2010.
308 */
309 if (tp->sacked_out == 0) {
310 if (num_acked == 1 && ca->delack) {
311 /* A delayed ACK is only used for the minimum if it is
312 * provenly lower than an existing non-zero minimum.
313 */
314 ca->rtt.min = min(ca->rtt.min, rtt_us);
315 ca->delack--;
316 return;
317 } else if (num_acked > 1 && ca->delack < 5) {
318 ca->delack++;
319 }
320 }
321
322 ca->rtt.min = min_not_zero(ca->rtt.min, rtt_us);
323 ca->rtt.max = max(ca->rtt.max, rtt_us);
324}
325
326static u32 tcp_cdg_ssthresh(struct sock *sk)
327{
328 struct cdg *ca = inet_csk_ca(sk);
329 struct tcp_sock *tp = tcp_sk(sk);
330
331 ca->undo_cwnd = tp->snd_cwnd;
332
333 if (ca->state == CDG_BACKOFF)
334 return max(2U, (tp->snd_cwnd * min(1024U, backoff_beta)) >> 10);
335
336 if (ca->state == CDG_NONFULL && use_tolerance)
337 return tp->snd_cwnd;
338
339 ca->shadow_wnd = min(ca->shadow_wnd >> 1, tp->snd_cwnd);
340 if (use_shadow)
341 return max3(2U, ca->shadow_wnd, tp->snd_cwnd >> 1);
342 return max(2U, tp->snd_cwnd >> 1);
343}
344
345static u32 tcp_cdg_undo_cwnd(struct sock *sk)
346{
347 struct cdg *ca = inet_csk_ca(sk);
348
349 return max(tcp_sk(sk)->snd_cwnd, ca->undo_cwnd);
350}
351
352static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev)
353{
354 struct cdg *ca = inet_csk_ca(sk);
355 struct tcp_sock *tp = tcp_sk(sk);
356 struct minmax *gradients;
357
358 switch (ev) {
359 case CA_EVENT_CWND_RESTART:
360 gradients = ca->gradients;
361 if (gradients)
362 memset(gradients, 0, window * sizeof(gradients[0]));
363 memset(ca, 0, sizeof(*ca));
364
365 ca->gradients = gradients;
366 ca->rtt_seq = tp->snd_nxt;
367 ca->shadow_wnd = tp->snd_cwnd;
368 break;
369 case CA_EVENT_COMPLETE_CWR:
370 ca->state = CDG_UNKNOWN;
371 ca->rtt_seq = tp->snd_nxt;
372 ca->rtt_prev = ca->rtt;
373 ca->rtt.v64 = 0;
374 break;
375 default:
376 break;
377 }
378}
379
380static void tcp_cdg_init(struct sock *sk)
381{
382 struct cdg *ca = inet_csk_ca(sk);
383 struct tcp_sock *tp = tcp_sk(sk);
384
385 /* We silently fall back to window = 1 if allocation fails. */
386 if (window > 1)
387 ca->gradients = kcalloc(window, sizeof(ca->gradients[0]),
388 GFP_NOWAIT | __GFP_NOWARN);
389 ca->rtt_seq = tp->snd_nxt;
390 ca->shadow_wnd = tp->snd_cwnd;
391}
392
393static void tcp_cdg_release(struct sock *sk)
394{
395 struct cdg *ca = inet_csk_ca(sk);
396
397 kfree(ca->gradients);
398}
399
400struct tcp_congestion_ops tcp_cdg __read_mostly = {
401 .cong_avoid = tcp_cdg_cong_avoid,
402 .cwnd_event = tcp_cdg_cwnd_event,
403 .pkts_acked = tcp_cdg_acked,
404 .undo_cwnd = tcp_cdg_undo_cwnd,
405 .ssthresh = tcp_cdg_ssthresh,
406 .release = tcp_cdg_release,
407 .init = tcp_cdg_init,
408 .owner = THIS_MODULE,
409 .name = "cdg",
410};
411
412static int __init tcp_cdg_register(void)
413{
414 if (backoff_beta > 1024 || window < 1 || window > 256)
415 return -ERANGE;
416 if (!is_power_of_2(window))
417 return -EINVAL;
418
419 BUILD_BUG_ON(sizeof(struct cdg) > ICSK_CA_PRIV_SIZE);
420 tcp_register_congestion_control(&tcp_cdg);
421 return 0;
422}
423
424static void __exit tcp_cdg_unregister(void)
425{
426 tcp_unregister_congestion_control(&tcp_cdg);
427}
428
429module_init(tcp_cdg_register);
430module_exit(tcp_cdg_unregister);
431MODULE_AUTHOR("Kenneth Klette Jonassen");
432MODULE_LICENSE("GPL");
433MODULE_DESCRIPTION("TCP CDG");
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 4c41c1287197..7092a61c4dc8 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -204,20 +204,26 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
204 204
205 /* Expired RTT */ 205 /* Expired RTT */
206 if (!before(tp->snd_una, ca->next_seq)) { 206 if (!before(tp->snd_una, ca->next_seq)) {
207 /* For avoiding denominator == 1. */ 207 u64 bytes_ecn = ca->acked_bytes_ecn;
208 if (ca->acked_bytes_total == 0) 208 u32 alpha = ca->dctcp_alpha;
209 ca->acked_bytes_total = 1;
210 209
211 /* alpha = (1 - g) * alpha + g * F */ 210 /* alpha = (1 - g) * alpha + g * F */
212 ca->dctcp_alpha = ca->dctcp_alpha -
213 (ca->dctcp_alpha >> dctcp_shift_g) +
214 (ca->acked_bytes_ecn << (10U - dctcp_shift_g)) /
215 ca->acked_bytes_total;
216 211
217 if (ca->dctcp_alpha > DCTCP_MAX_ALPHA) 212 alpha -= alpha >> dctcp_shift_g;
218 /* Clamp dctcp_alpha to max. */ 213 if (bytes_ecn) {
219 ca->dctcp_alpha = DCTCP_MAX_ALPHA; 214 /* If dctcp_shift_g == 1, a 32bit value would overflow
215 * after 8 Mbytes.
216 */
217 bytes_ecn <<= (10 - dctcp_shift_g);
218 do_div(bytes_ecn, max(1U, ca->acked_bytes_total));
220 219
220 alpha = min(alpha + (u32)bytes_ecn, DCTCP_MAX_ALPHA);
221 }
222 /* dctcp_alpha can be read from dctcp_get_info() without
223 * synchro, so we ask compiler to not use dctcp_alpha
224 * as a temporary variable in prior operations.
225 */
226 WRITE_ONCE(ca->dctcp_alpha, alpha);
221 dctcp_reset(tp, ca); 227 dctcp_reset(tp, ca);
222 } 228 }
223} 229}
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 79b34a0f4a4a..479f34946177 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -19,13 +19,14 @@
19static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, 19static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
20 void *_info) 20 void *_info)
21{ 21{
22 const struct tcp_sock *tp = tcp_sk(sk);
23 struct tcp_info *info = _info; 22 struct tcp_info *info = _info;
24 23
25 if (sk->sk_state == TCP_LISTEN) { 24 if (sk->sk_state == TCP_LISTEN) {
26 r->idiag_rqueue = sk->sk_ack_backlog; 25 r->idiag_rqueue = sk->sk_ack_backlog;
27 r->idiag_wqueue = sk->sk_max_ack_backlog; 26 r->idiag_wqueue = sk->sk_max_ack_backlog;
28 } else { 27 } else if (sk->sk_type == SOCK_STREAM) {
28 const struct tcp_sock *tp = tcp_sk(sk);
29
29 r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 30 r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
30 r->idiag_wqueue = tp->write_seq - tp->snd_una; 31 r->idiag_wqueue = tp->write_seq - tp->snd_una;
31 } 32 }
@@ -50,6 +51,7 @@ static const struct inet_diag_handler tcp_diag_handler = {
50 .dump_one = tcp_diag_dump_one, 51 .dump_one = tcp_diag_dump_one,
51 .idiag_get_info = tcp_diag_get_info, 52 .idiag_get_info = tcp_diag_get_info,
52 .idiag_type = IPPROTO_TCP, 53 .idiag_type = IPPROTO_TCP,
54 .idiag_info_size = sizeof(struct tcp_info),
53}; 55};
54 56
55static int __init tcp_diag_init(void) 57static int __init tcp_diag_init(void)
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 46b087a27503..f9c0fb84e435 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -78,8 +78,6 @@ static bool __tcp_fastopen_cookie_gen(const void *path,
78 struct tcp_fastopen_context *ctx; 78 struct tcp_fastopen_context *ctx;
79 bool ok = false; 79 bool ok = false;
80 80
81 tcp_fastopen_init_key_once(true);
82
83 rcu_read_lock(); 81 rcu_read_lock();
84 ctx = rcu_dereference(tcp_fastopen_ctx); 82 ctx = rcu_dereference(tcp_fastopen_ctx);
85 if (ctx) { 83 if (ctx) {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c9ab964189a0..684f095d196e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -359,7 +359,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
359 /* Check #1 */ 359 /* Check #1 */
360 if (tp->rcv_ssthresh < tp->window_clamp && 360 if (tp->rcv_ssthresh < tp->window_clamp &&
361 (int)tp->rcv_ssthresh < tcp_space(sk) && 361 (int)tp->rcv_ssthresh < tcp_space(sk) &&
362 !sk_under_memory_pressure(sk)) { 362 !tcp_under_memory_pressure(sk)) {
363 int incr; 363 int incr;
364 364
365 /* Check #2. Increase window, if skb with such overhead 365 /* Check #2. Increase window, if skb with such overhead
@@ -446,7 +446,7 @@ static void tcp_clamp_window(struct sock *sk)
446 446
447 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && 447 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
448 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && 448 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
449 !sk_under_memory_pressure(sk) && 449 !tcp_under_memory_pressure(sk) &&
450 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { 450 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
451 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), 451 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
452 sysctl_tcp_rmem[2]); 452 sysctl_tcp_rmem[2]);
@@ -1130,7 +1130,12 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1130struct tcp_sacktag_state { 1130struct tcp_sacktag_state {
1131 int reord; 1131 int reord;
1132 int fack_count; 1132 int fack_count;
1133 long rtt_us; /* RTT measured by SACKing never-retransmitted data */ 1133 /* Timestamps for earliest and latest never-retransmitted segment
1134 * that was SACKed. RTO needs the earliest RTT to stay conservative,
1135 * but congestion control should still get an accurate delay signal.
1136 */
1137 struct skb_mstamp first_sackt;
1138 struct skb_mstamp last_sackt;
1134 int flag; 1139 int flag;
1135}; 1140};
1136 1141
@@ -1233,14 +1238,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
1233 state->reord); 1238 state->reord);
1234 if (!after(end_seq, tp->high_seq)) 1239 if (!after(end_seq, tp->high_seq))
1235 state->flag |= FLAG_ORIG_SACK_ACKED; 1240 state->flag |= FLAG_ORIG_SACK_ACKED;
1236 /* Pick the earliest sequence sacked for RTT */ 1241 if (state->first_sackt.v64 == 0)
1237 if (state->rtt_us < 0) { 1242 state->first_sackt = *xmit_time;
1238 struct skb_mstamp now; 1243 state->last_sackt = *xmit_time;
1239
1240 skb_mstamp_get(&now);
1241 state->rtt_us = skb_mstamp_us_delta(&now,
1242 xmit_time);
1243 }
1244 } 1244 }
1245 1245
1246 if (sacked & TCPCB_LOST) { 1246 if (sacked & TCPCB_LOST) {
@@ -1316,16 +1316,12 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1316 * code can come after this skb later on it's better to keep 1316 * code can come after this skb later on it's better to keep
1317 * setting gso_size to something. 1317 * setting gso_size to something.
1318 */ 1318 */
1319 if (!skb_shinfo(prev)->gso_size) { 1319 if (!TCP_SKB_CB(prev)->tcp_gso_size)
1320 skb_shinfo(prev)->gso_size = mss; 1320 TCP_SKB_CB(prev)->tcp_gso_size = mss;
1321 skb_shinfo(prev)->gso_type = sk->sk_gso_type;
1322 }
1323 1321
1324 /* CHECKME: To clear or not to clear? Mimics normal skb currently */ 1322 /* CHECKME: To clear or not to clear? Mimics normal skb currently */
1325 if (tcp_skb_pcount(skb) <= 1) { 1323 if (tcp_skb_pcount(skb) <= 1)
1326 skb_shinfo(skb)->gso_size = 0; 1324 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1327 skb_shinfo(skb)->gso_type = 0;
1328 }
1329 1325
1330 /* Difference in this won't matter, both ACKed by the same cumul. ACK */ 1326 /* Difference in this won't matter, both ACKed by the same cumul. ACK */
1331 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); 1327 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
@@ -1634,7 +1630,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
1634 1630
1635static int 1631static int
1636tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, 1632tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1637 u32 prior_snd_una, long *sack_rtt_us) 1633 u32 prior_snd_una, struct tcp_sacktag_state *state)
1638{ 1634{
1639 struct tcp_sock *tp = tcp_sk(sk); 1635 struct tcp_sock *tp = tcp_sk(sk);
1640 const unsigned char *ptr = (skb_transport_header(ack_skb) + 1636 const unsigned char *ptr = (skb_transport_header(ack_skb) +
@@ -1642,7 +1638,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1642 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); 1638 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1643 struct tcp_sack_block sp[TCP_NUM_SACKS]; 1639 struct tcp_sack_block sp[TCP_NUM_SACKS];
1644 struct tcp_sack_block *cache; 1640 struct tcp_sack_block *cache;
1645 struct tcp_sacktag_state state;
1646 struct sk_buff *skb; 1641 struct sk_buff *skb;
1647 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3); 1642 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1648 int used_sacks; 1643 int used_sacks;
@@ -1650,9 +1645,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1650 int i, j; 1645 int i, j;
1651 int first_sack_index; 1646 int first_sack_index;
1652 1647
1653 state.flag = 0; 1648 state->flag = 0;
1654 state.reord = tp->packets_out; 1649 state->reord = tp->packets_out;
1655 state.rtt_us = -1L;
1656 1650
1657 if (!tp->sacked_out) { 1651 if (!tp->sacked_out) {
1658 if (WARN_ON(tp->fackets_out)) 1652 if (WARN_ON(tp->fackets_out))
@@ -1663,7 +1657,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1663 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, 1657 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1664 num_sacks, prior_snd_una); 1658 num_sacks, prior_snd_una);
1665 if (found_dup_sack) 1659 if (found_dup_sack)
1666 state.flag |= FLAG_DSACKING_ACK; 1660 state->flag |= FLAG_DSACKING_ACK;
1667 1661
1668 /* Eliminate too old ACKs, but take into 1662 /* Eliminate too old ACKs, but take into
1669 * account more or less fresh ones, they can 1663 * account more or less fresh ones, they can
@@ -1728,7 +1722,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1728 } 1722 }
1729 1723
1730 skb = tcp_write_queue_head(sk); 1724 skb = tcp_write_queue_head(sk);
1731 state.fack_count = 0; 1725 state->fack_count = 0;
1732 i = 0; 1726 i = 0;
1733 1727
1734 if (!tp->sacked_out) { 1728 if (!tp->sacked_out) {
@@ -1762,10 +1756,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1762 1756
1763 /* Head todo? */ 1757 /* Head todo? */
1764 if (before(start_seq, cache->start_seq)) { 1758 if (before(start_seq, cache->start_seq)) {
1765 skb = tcp_sacktag_skip(skb, sk, &state, 1759 skb = tcp_sacktag_skip(skb, sk, state,
1766 start_seq); 1760 start_seq);
1767 skb = tcp_sacktag_walk(skb, sk, next_dup, 1761 skb = tcp_sacktag_walk(skb, sk, next_dup,
1768 &state, 1762 state,
1769 start_seq, 1763 start_seq,
1770 cache->start_seq, 1764 cache->start_seq,
1771 dup_sack); 1765 dup_sack);
@@ -1776,7 +1770,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1776 goto advance_sp; 1770 goto advance_sp;
1777 1771
1778 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, 1772 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1779 &state, 1773 state,
1780 cache->end_seq); 1774 cache->end_seq);
1781 1775
1782 /* ...tail remains todo... */ 1776 /* ...tail remains todo... */
@@ -1785,12 +1779,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1785 skb = tcp_highest_sack(sk); 1779 skb = tcp_highest_sack(sk);
1786 if (!skb) 1780 if (!skb)
1787 break; 1781 break;
1788 state.fack_count = tp->fackets_out; 1782 state->fack_count = tp->fackets_out;
1789 cache++; 1783 cache++;
1790 goto walk; 1784 goto walk;
1791 } 1785 }
1792 1786
1793 skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq); 1787 skb = tcp_sacktag_skip(skb, sk, state, cache->end_seq);
1794 /* Check overlap against next cached too (past this one already) */ 1788 /* Check overlap against next cached too (past this one already) */
1795 cache++; 1789 cache++;
1796 continue; 1790 continue;
@@ -1800,12 +1794,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1800 skb = tcp_highest_sack(sk); 1794 skb = tcp_highest_sack(sk);
1801 if (!skb) 1795 if (!skb)
1802 break; 1796 break;
1803 state.fack_count = tp->fackets_out; 1797 state->fack_count = tp->fackets_out;
1804 } 1798 }
1805 skb = tcp_sacktag_skip(skb, sk, &state, start_seq); 1799 skb = tcp_sacktag_skip(skb, sk, state, start_seq);
1806 1800
1807walk: 1801walk:
1808 skb = tcp_sacktag_walk(skb, sk, next_dup, &state, 1802 skb = tcp_sacktag_walk(skb, sk, next_dup, state,
1809 start_seq, end_seq, dup_sack); 1803 start_seq, end_seq, dup_sack);
1810 1804
1811advance_sp: 1805advance_sp:
@@ -1820,9 +1814,9 @@ advance_sp:
1820 for (j = 0; j < used_sacks; j++) 1814 for (j = 0; j < used_sacks; j++)
1821 tp->recv_sack_cache[i++] = sp[j]; 1815 tp->recv_sack_cache[i++] = sp[j];
1822 1816
1823 if ((state.reord < tp->fackets_out) && 1817 if ((state->reord < tp->fackets_out) &&
1824 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) 1818 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
1825 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); 1819 tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
1826 1820
1827 tcp_mark_lost_retrans(sk); 1821 tcp_mark_lost_retrans(sk);
1828 tcp_verify_left_out(tp); 1822 tcp_verify_left_out(tp);
@@ -1834,8 +1828,7 @@ out:
1834 WARN_ON((int)tp->retrans_out < 0); 1828 WARN_ON((int)tp->retrans_out < 0);
1835 WARN_ON((int)tcp_packets_in_flight(tp) < 0); 1829 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1836#endif 1830#endif
1837 *sack_rtt_us = state.rtt_us; 1831 return state->flag;
1838 return state.flag;
1839} 1832}
1840 1833
1841/* Limits sacked_out so that sum with lost_out isn't ever larger than 1834/* Limits sacked_out so that sum with lost_out isn't ever larger than
@@ -2255,7 +2248,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2255 (oldcnt >= packets)) 2248 (oldcnt >= packets))
2256 break; 2249 break;
2257 2250
2258 mss = skb_shinfo(skb)->gso_size; 2251 mss = tcp_skb_mss(skb);
2259 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, 2252 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss,
2260 mss, GFP_ATOMIC); 2253 mss, GFP_ATOMIC);
2261 if (err < 0) 2254 if (err < 0)
@@ -2555,6 +2548,7 @@ void tcp_enter_cwr(struct sock *sk)
2555 tcp_set_ca_state(sk, TCP_CA_CWR); 2548 tcp_set_ca_state(sk, TCP_CA_CWR);
2556 } 2549 }
2557} 2550}
2551EXPORT_SYMBOL(tcp_enter_cwr);
2558 2552
2559static void tcp_try_keep_open(struct sock *sk) 2553static void tcp_try_keep_open(struct sock *sk)
2560{ 2554{
@@ -3055,7 +3049,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3055 * arrived at the other end. 3049 * arrived at the other end.
3056 */ 3050 */
3057static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, 3051static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3058 u32 prior_snd_una, long sack_rtt_us) 3052 u32 prior_snd_una,
3053 struct tcp_sacktag_state *sack)
3059{ 3054{
3060 const struct inet_connection_sock *icsk = inet_csk(sk); 3055 const struct inet_connection_sock *icsk = inet_csk(sk);
3061 struct skb_mstamp first_ackt, last_ackt, now; 3056 struct skb_mstamp first_ackt, last_ackt, now;
@@ -3063,8 +3058,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3063 u32 prior_sacked = tp->sacked_out; 3058 u32 prior_sacked = tp->sacked_out;
3064 u32 reord = tp->packets_out; 3059 u32 reord = tp->packets_out;
3065 bool fully_acked = true; 3060 bool fully_acked = true;
3066 long ca_seq_rtt_us = -1L; 3061 long sack_rtt_us = -1L;
3067 long seq_rtt_us = -1L; 3062 long seq_rtt_us = -1L;
3063 long ca_rtt_us = -1L;
3068 struct sk_buff *skb; 3064 struct sk_buff *skb;
3069 u32 pkts_acked = 0; 3065 u32 pkts_acked = 0;
3070 bool rtt_update; 3066 bool rtt_update;
@@ -3153,15 +3149,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3153 skb_mstamp_get(&now); 3149 skb_mstamp_get(&now);
3154 if (likely(first_ackt.v64)) { 3150 if (likely(first_ackt.v64)) {
3155 seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); 3151 seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
3156 ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); 3152 ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
3153 }
3154 if (sack->first_sackt.v64) {
3155 sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt);
3156 ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
3157 } 3157 }
3158 3158
3159 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us); 3159 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
3160 3160
3161 if (flag & FLAG_ACKED) { 3161 if (flag & FLAG_ACKED) {
3162 const struct tcp_congestion_ops *ca_ops
3163 = inet_csk(sk)->icsk_ca_ops;
3164
3165 tcp_rearm_rto(sk); 3162 tcp_rearm_rto(sk);
3166 if (unlikely(icsk->icsk_mtup.probe_size && 3163 if (unlikely(icsk->icsk_mtup.probe_size &&
3167 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { 3164 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
@@ -3184,11 +3181,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3184 3181
3185 tp->fackets_out -= min(pkts_acked, tp->fackets_out); 3182 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3186 3183
3187 if (ca_ops->pkts_acked) {
3188 long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us);
3189 ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
3190 }
3191
3192 } else if (skb && rtt_update && sack_rtt_us >= 0 && 3184 } else if (skb && rtt_update && sack_rtt_us >= 0 &&
3193 sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { 3185 sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
3194 /* Do not re-arm RTO if the sack RTT is measured from data sent 3186 /* Do not re-arm RTO if the sack RTT is measured from data sent
@@ -3198,6 +3190,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3198 tcp_rearm_rto(sk); 3190 tcp_rearm_rto(sk);
3199 } 3191 }
3200 3192
3193 if (icsk->icsk_ca_ops->pkts_acked)
3194 icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us);
3195
3201#if FASTRETRANS_DEBUG > 0 3196#if FASTRETRANS_DEBUG > 0
3202 WARN_ON((int)tp->sacked_out < 0); 3197 WARN_ON((int)tp->sacked_out < 0);
3203 WARN_ON((int)tp->lost_out < 0); 3198 WARN_ON((int)tp->lost_out < 0);
@@ -3238,7 +3233,7 @@ static void tcp_ack_probe(struct sock *sk)
3238 * This function is not for random using! 3233 * This function is not for random using!
3239 */ 3234 */
3240 } else { 3235 } else {
3241 unsigned long when = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); 3236 unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
3242 3237
3243 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 3238 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3244 when, TCP_RTO_MAX); 3239 when, TCP_RTO_MAX);
@@ -3466,6 +3461,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3466{ 3461{
3467 struct inet_connection_sock *icsk = inet_csk(sk); 3462 struct inet_connection_sock *icsk = inet_csk(sk);
3468 struct tcp_sock *tp = tcp_sk(sk); 3463 struct tcp_sock *tp = tcp_sk(sk);
3464 struct tcp_sacktag_state sack_state;
3469 u32 prior_snd_una = tp->snd_una; 3465 u32 prior_snd_una = tp->snd_una;
3470 u32 ack_seq = TCP_SKB_CB(skb)->seq; 3466 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3471 u32 ack = TCP_SKB_CB(skb)->ack_seq; 3467 u32 ack = TCP_SKB_CB(skb)->ack_seq;
@@ -3474,7 +3470,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3474 int prior_packets = tp->packets_out; 3470 int prior_packets = tp->packets_out;
3475 const int prior_unsacked = tp->packets_out - tp->sacked_out; 3471 const int prior_unsacked = tp->packets_out - tp->sacked_out;
3476 int acked = 0; /* Number of packets newly acked */ 3472 int acked = 0; /* Number of packets newly acked */
3477 long sack_rtt_us = -1L; 3473
3474 sack_state.first_sackt.v64 = 0;
3478 3475
3479 /* We very likely will need to access write queue head. */ 3476 /* We very likely will need to access write queue head. */
3480 prefetchw(sk->sk_write_queue.next); 3477 prefetchw(sk->sk_write_queue.next);
@@ -3538,7 +3535,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3538 3535
3539 if (TCP_SKB_CB(skb)->sacked) 3536 if (TCP_SKB_CB(skb)->sacked)
3540 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, 3537 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3541 &sack_rtt_us); 3538 &sack_state);
3542 3539
3543 if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) { 3540 if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
3544 flag |= FLAG_ECE; 3541 flag |= FLAG_ECE;
@@ -3563,7 +3560,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3563 /* See if we can take anything off of the retransmit queue. */ 3560 /* See if we can take anything off of the retransmit queue. */
3564 acked = tp->packets_out; 3561 acked = tp->packets_out;
3565 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, 3562 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
3566 sack_rtt_us); 3563 &sack_state);
3567 acked -= tp->packets_out; 3564 acked -= tp->packets_out;
3568 3565
3569 /* Advance cwnd if state allows */ 3566 /* Advance cwnd if state allows */
@@ -3615,7 +3612,7 @@ old_ack:
3615 */ 3612 */
3616 if (TCP_SKB_CB(skb)->sacked) { 3613 if (TCP_SKB_CB(skb)->sacked) {
3617 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, 3614 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3618 &sack_rtt_us); 3615 &sack_state);
3619 tcp_fastretrans_alert(sk, acked, prior_unsacked, 3616 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3620 is_dupack, flag); 3617 is_dupack, flag);
3621 } 3618 }
@@ -4514,10 +4511,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4514 4511
4515 if (eaten <= 0) { 4512 if (eaten <= 0) {
4516queue_and_out: 4513queue_and_out:
4517 if (eaten < 0 && 4514 if (eaten < 0) {
4518 tcp_try_rmem_schedule(sk, skb, skb->truesize)) 4515 if (skb_queue_len(&sk->sk_receive_queue) == 0)
4519 goto drop; 4516 sk_forced_mem_schedule(sk, skb->truesize);
4520 4517 else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
4518 goto drop;
4519 }
4521 eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); 4520 eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
4522 } 4521 }
4523 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); 4522 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
@@ -4788,7 +4787,7 @@ static int tcp_prune_queue(struct sock *sk)
4788 4787
4789 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 4788 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4790 tcp_clamp_window(sk); 4789 tcp_clamp_window(sk);
4791 else if (sk_under_memory_pressure(sk)) 4790 else if (tcp_under_memory_pressure(sk))
4792 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); 4791 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
4793 4792
4794 tcp_collapse_ofo_queue(sk); 4793 tcp_collapse_ofo_queue(sk);
@@ -4832,7 +4831,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk)
4832 return false; 4831 return false;
4833 4832
4834 /* If we are under global TCP memory pressure, do not expand. */ 4833 /* If we are under global TCP memory pressure, do not expand. */
4835 if (sk_under_memory_pressure(sk)) 4834 if (tcp_under_memory_pressure(sk))
4836 return false; 4835 return false;
4837 4836
4838 /* If we are under soft global TCP memory pressure, do not expand. */ 4837 /* If we are under soft global TCP memory pressure, do not expand. */
@@ -6067,6 +6066,23 @@ static bool tcp_syn_flood_action(struct sock *sk,
6067 return want_cookie; 6066 return want_cookie;
6068} 6067}
6069 6068
6069static void tcp_reqsk_record_syn(const struct sock *sk,
6070 struct request_sock *req,
6071 const struct sk_buff *skb)
6072{
6073 if (tcp_sk(sk)->save_syn) {
6074 u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
6075 u32 *copy;
6076
6077 copy = kmalloc(len + sizeof(u32), GFP_ATOMIC);
6078 if (copy) {
6079 copy[0] = len;
6080 memcpy(&copy[1], skb_network_header(skb), len);
6081 req->saved_syn = copy;
6082 }
6083 }
6084}
6085
6070int tcp_conn_request(struct request_sock_ops *rsk_ops, 6086int tcp_conn_request(struct request_sock_ops *rsk_ops,
6071 const struct tcp_request_sock_ops *af_ops, 6087 const struct tcp_request_sock_ops *af_ops,
6072 struct sock *sk, struct sk_buff *skb) 6088 struct sock *sk, struct sk_buff *skb)
@@ -6199,6 +6215,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6199 tcp_rsk(req)->tfo_listener = false; 6215 tcp_rsk(req)->tfo_listener = false;
6200 af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 6216 af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
6201 } 6217 }
6218 tcp_reqsk_record_syn(sk, req, skb);
6202 6219
6203 return 0; 6220 return 0;
6204 6221
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fc1c658ec6c1..d7d4c2b79cf2 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1400,7 +1400,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1400 return 0; 1400 return 0;
1401 } 1401 }
1402 1402
1403 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb)) 1403 if (tcp_checksum_complete(skb))
1404 goto csum_err; 1404 goto csum_err;
1405 1405
1406 if (sk->sk_state == TCP_LISTEN) { 1406 if (sk->sk_state == TCP_LISTEN) {
@@ -1626,6 +1626,7 @@ process:
1626 skb->dev = NULL; 1626 skb->dev = NULL;
1627 1627
1628 bh_lock_sock_nested(sk); 1628 bh_lock_sock_nested(sk);
1629 tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
1629 ret = 0; 1630 ret = 0;
1630 if (!sock_owned_by_user(sk)) { 1631 if (!sock_owned_by_user(sk)) {
1631 if (!tcp_prequeue(sk, skb)) 1632 if (!tcp_prequeue(sk, skb))
@@ -1646,7 +1647,7 @@ no_tcp_socket:
1646 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 1647 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1647 goto discard_it; 1648 goto discard_it;
1648 1649
1649 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 1650 if (tcp_checksum_complete(skb)) {
1650csum_error: 1651csum_error:
1651 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS); 1652 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1652bad_packet: 1653bad_packet:
@@ -1670,10 +1671,6 @@ do_time_wait:
1670 goto discard_it; 1671 goto discard_it;
1671 } 1672 }
1672 1673
1673 if (skb->len < (th->doff << 2)) {
1674 inet_twsk_put(inet_twsk(sk));
1675 goto bad_packet;
1676 }
1677 if (tcp_checksum_complete(skb)) { 1674 if (tcp_checksum_complete(skb)) {
1678 inet_twsk_put(inet_twsk(sk)); 1675 inet_twsk_put(inet_twsk(sk));
1679 goto csum_error; 1676 goto csum_error;
@@ -1802,6 +1799,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
1802 1799
1803 /* If socket is aborted during connect operation */ 1800 /* If socket is aborted during connect operation */
1804 tcp_free_fastopen_req(tp); 1801 tcp_free_fastopen_req(tp);
1802 tcp_saved_syn_free(tp);
1805 1803
1806 sk_sockets_allocated_dec(sk); 1804 sk_sockets_allocated_dec(sk);
1807 sock_release_memcg(sk); 1805 sock_release_memcg(sk);
@@ -2410,12 +2408,15 @@ static int __net_init tcp_sk_init(struct net *net)
2410 goto fail; 2408 goto fail;
2411 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; 2409 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2412 } 2410 }
2411
2413 net->ipv4.sysctl_tcp_ecn = 2; 2412 net->ipv4.sysctl_tcp_ecn = 2;
2413 net->ipv4.sysctl_tcp_ecn_fallback = 1;
2414
2414 net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; 2415 net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
2415 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; 2416 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
2416 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; 2417 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2417 return 0;
2418 2418
2419 return 0;
2419fail: 2420fail:
2420 tcp_sk_exit(net); 2421 tcp_sk_exit(net);
2421 2422
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 17e7339ee5ca..4bc00cb79e60 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -451,6 +451,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
451 451
452 newtp->rcv_wup = newtp->copied_seq = 452 newtp->rcv_wup = newtp->copied_seq =
453 newtp->rcv_nxt = treq->rcv_isn + 1; 453 newtp->rcv_nxt = treq->rcv_isn + 1;
454 newtp->segs_in = 0;
454 455
455 newtp->snd_sml = newtp->snd_una = 456 newtp->snd_sml = newtp->snd_una =
456 newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; 457 newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
@@ -539,6 +540,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
539 newtp->fastopen_rsk = NULL; 540 newtp->fastopen_rsk = NULL;
540 newtp->syn_data_acked = 0; 541 newtp->syn_data_acked = 0;
541 542
543 newtp->saved_syn = req->saved_syn;
544 req->saved_syn = NULL;
545
542 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); 546 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
543 } 547 }
544 return newsk; 548 return newsk;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 3f7c2fca5431..9864a2dbadce 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -77,7 +77,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
77 oldlen = (u16)~skb->len; 77 oldlen = (u16)~skb->len;
78 __skb_pull(skb, thlen); 78 __skb_pull(skb, thlen);
79 79
80 mss = tcp_skb_mss(skb); 80 mss = skb_shinfo(skb)->gso_size;
81 if (unlikely(skb->len <= mss)) 81 if (unlikely(skb->len <= mss))
82 goto out; 82 goto out;
83 83
@@ -242,7 +242,7 @@ found:
242 flush |= *(u32 *)((u8 *)th + i) ^ 242 flush |= *(u32 *)((u8 *)th + i) ^
243 *(u32 *)((u8 *)th2 + i); 243 *(u32 *)((u8 *)th2 + i);
244 244
245 mss = tcp_skb_mss(p); 245 mss = skb_shinfo(p)->gso_size;
246 246
247 flush |= (len - 1) >= mss; 247 flush |= (len - 1) >= mss;
248 flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); 248 flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a369e8a70b2c..b1c218df2c85 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -50,8 +50,8 @@ int sysctl_tcp_retrans_collapse __read_mostly = 1;
50 */ 50 */
51int sysctl_tcp_workaround_signed_windows __read_mostly = 0; 51int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
52 52
53/* Default TSQ limit of two TSO segments */ 53/* Default TSQ limit of four TSO segments */
54int sysctl_tcp_limit_output_bytes __read_mostly = 131072; 54int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
55 55
56/* This limits the percentage of the congestion window which we 56/* This limits the percentage of the congestion window which we
57 * will allow a single TSO frame to consume. Building TSO frames 57 * will allow a single TSO frame to consume. Building TSO frames
@@ -350,6 +350,15 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
350 } 350 }
351} 351}
352 352
353static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
354{
355 if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)
356 /* tp->ecn_flags are cleared at a later point in time when
357 * SYN ACK is ultimatively being received.
358 */
359 TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR);
360}
361
353static void 362static void
354tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th, 363tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th,
355 struct sock *sk) 364 struct sock *sk)
@@ -393,8 +402,6 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
393 */ 402 */
394static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) 403static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
395{ 404{
396 struct skb_shared_info *shinfo = skb_shinfo(skb);
397
398 skb->ip_summed = CHECKSUM_PARTIAL; 405 skb->ip_summed = CHECKSUM_PARTIAL;
399 skb->csum = 0; 406 skb->csum = 0;
400 407
@@ -402,8 +409,6 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
402 TCP_SKB_CB(skb)->sacked = 0; 409 TCP_SKB_CB(skb)->sacked = 0;
403 410
404 tcp_skb_pcount_set(skb, 1); 411 tcp_skb_pcount_set(skb, 1);
405 shinfo->gso_size = 0;
406 shinfo->gso_type = 0;
407 412
408 TCP_SKB_CB(skb)->seq = seq; 413 TCP_SKB_CB(skb)->seq = seq;
409 if (flags & (TCPHDR_SYN | TCPHDR_FIN)) 414 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
@@ -994,6 +999,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
994 } 999 }
995 1000
996 tcp_options_write((__be32 *)(th + 1), tp, &opts); 1001 tcp_options_write((__be32 *)(th + 1), tp, &opts);
1002 skb_shinfo(skb)->gso_type = sk->sk_gso_type;
997 if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0)) 1003 if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
998 tcp_ecn_send(sk, skb, tcp_header_size); 1004 tcp_ecn_send(sk, skb, tcp_header_size);
999 1005
@@ -1018,8 +1024,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1018 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, 1024 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
1019 tcp_skb_pcount(skb)); 1025 tcp_skb_pcount(skb));
1020 1026
1021 /* OK, its time to fill skb_shinfo(skb)->gso_segs */ 1027 tp->segs_out += tcp_skb_pcount(skb);
1028 /* OK, its time to fill skb_shinfo(skb)->gso_{segs|size} */
1022 skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb); 1029 skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
1030 skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
1023 1031
1024 /* Our usage of tstamp should remain private */ 1032 /* Our usage of tstamp should remain private */
1025 skb->tstamp.tv64 = 0; 1033 skb->tstamp.tv64 = 0;
@@ -1056,25 +1064,17 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
1056} 1064}
1057 1065
1058/* Initialize TSO segments for a packet. */ 1066/* Initialize TSO segments for a packet. */
1059static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, 1067static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1060 unsigned int mss_now)
1061{ 1068{
1062 struct skb_shared_info *shinfo = skb_shinfo(skb);
1063
1064 /* Make sure we own this skb before messing gso_size/gso_segs */
1065 WARN_ON_ONCE(skb_cloned(skb));
1066
1067 if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) { 1069 if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
1068 /* Avoid the costly divide in the normal 1070 /* Avoid the costly divide in the normal
1069 * non-TSO case. 1071 * non-TSO case.
1070 */ 1072 */
1071 tcp_skb_pcount_set(skb, 1); 1073 tcp_skb_pcount_set(skb, 1);
1072 shinfo->gso_size = 0; 1074 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1073 shinfo->gso_type = 0;
1074 } else { 1075 } else {
1075 tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now)); 1076 tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now));
1076 shinfo->gso_size = mss_now; 1077 TCP_SKB_CB(skb)->tcp_gso_size = mss_now;
1077 shinfo->gso_type = sk->sk_gso_type;
1078 } 1078 }
1079} 1079}
1080 1080
@@ -1163,7 +1163,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1163 return -ENOMEM; 1163 return -ENOMEM;
1164 1164
1165 /* Get a new skb... force flag on. */ 1165 /* Get a new skb... force flag on. */
1166 buff = sk_stream_alloc_skb(sk, nsize, gfp); 1166 buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
1167 if (!buff) 1167 if (!buff)
1168 return -ENOMEM; /* We'll just try again later. */ 1168 return -ENOMEM; /* We'll just try again later. */
1169 1169
@@ -1206,8 +1206,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1206 old_factor = tcp_skb_pcount(skb); 1206 old_factor = tcp_skb_pcount(skb);
1207 1207
1208 /* Fix up tso_factor for both original and new SKB. */ 1208 /* Fix up tso_factor for both original and new SKB. */
1209 tcp_set_skb_tso_segs(sk, skb, mss_now); 1209 tcp_set_skb_tso_segs(skb, mss_now);
1210 tcp_set_skb_tso_segs(sk, buff, mss_now); 1210 tcp_set_skb_tso_segs(buff, mss_now);
1211 1211
1212 /* If this packet has been sent out already, we must 1212 /* If this packet has been sent out already, we must
1213 * adjust the various packet counters. 1213 * adjust the various packet counters.
@@ -1287,7 +1287,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1287 1287
1288 /* Any change of skb->len requires recalculation of tso factor. */ 1288 /* Any change of skb->len requires recalculation of tso factor. */
1289 if (tcp_skb_pcount(skb) > 1) 1289 if (tcp_skb_pcount(skb) > 1)
1290 tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb)); 1290 tcp_set_skb_tso_segs(skb, tcp_skb_mss(skb));
1291 1291
1292 return 0; 1292 return 0;
1293} 1293}
@@ -1619,13 +1619,12 @@ static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
1619 * This must be invoked the first time we consider transmitting 1619 * This must be invoked the first time we consider transmitting
1620 * SKB onto the wire. 1620 * SKB onto the wire.
1621 */ 1621 */
1622static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb, 1622static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1623 unsigned int mss_now)
1624{ 1623{
1625 int tso_segs = tcp_skb_pcount(skb); 1624 int tso_segs = tcp_skb_pcount(skb);
1626 1625
1627 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) { 1626 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1628 tcp_set_skb_tso_segs(sk, skb, mss_now); 1627 tcp_set_skb_tso_segs(skb, mss_now);
1629 tso_segs = tcp_skb_pcount(skb); 1628 tso_segs = tcp_skb_pcount(skb);
1630 } 1629 }
1631 return tso_segs; 1630 return tso_segs;
@@ -1680,7 +1679,7 @@ static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
1680 const struct tcp_sock *tp = tcp_sk(sk); 1679 const struct tcp_sock *tp = tcp_sk(sk);
1681 unsigned int cwnd_quota; 1680 unsigned int cwnd_quota;
1682 1681
1683 tcp_init_tso_segs(sk, skb, cur_mss); 1682 tcp_init_tso_segs(skb, cur_mss);
1684 1683
1685 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle)) 1684 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
1686 return 0; 1685 return 0;
@@ -1722,7 +1721,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1722 if (skb->len != skb->data_len) 1721 if (skb->len != skb->data_len)
1723 return tcp_fragment(sk, skb, len, mss_now, gfp); 1722 return tcp_fragment(sk, skb, len, mss_now, gfp);
1724 1723
1725 buff = sk_stream_alloc_skb(sk, 0, gfp); 1724 buff = sk_stream_alloc_skb(sk, 0, gfp, true);
1726 if (unlikely(!buff)) 1725 if (unlikely(!buff))
1727 return -ENOMEM; 1726 return -ENOMEM;
1728 1727
@@ -1749,8 +1748,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1749 tcp_fragment_tstamp(skb, buff); 1748 tcp_fragment_tstamp(skb, buff);
1750 1749
1751 /* Fix up tso_factor for both original and new SKB. */ 1750 /* Fix up tso_factor for both original and new SKB. */
1752 tcp_set_skb_tso_segs(sk, skb, mss_now); 1751 tcp_set_skb_tso_segs(skb, mss_now);
1753 tcp_set_skb_tso_segs(sk, buff, mss_now); 1752 tcp_set_skb_tso_segs(buff, mss_now);
1754 1753
1755 /* Link BUFF into the send queue. */ 1754 /* Link BUFF into the send queue. */
1756 __skb_header_release(buff); 1755 __skb_header_release(buff);
@@ -1941,7 +1940,7 @@ static int tcp_mtu_probe(struct sock *sk)
1941 } 1940 }
1942 1941
1943 /* We're allowed to probe. Build it now. */ 1942 /* We're allowed to probe. Build it now. */
1944 nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC); 1943 nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
1945 if (!nskb) 1944 if (!nskb)
1946 return -1; 1945 return -1;
1947 sk->sk_wmem_queued += nskb->truesize; 1946 sk->sk_wmem_queued += nskb->truesize;
@@ -1984,7 +1983,7 @@ static int tcp_mtu_probe(struct sock *sk)
1984 skb->len, 0); 1983 skb->len, 0);
1985 } else { 1984 } else {
1986 __pskb_trim_head(skb, copy); 1985 __pskb_trim_head(skb, copy);
1987 tcp_set_skb_tso_segs(sk, skb, mss_now); 1986 tcp_set_skb_tso_segs(skb, mss_now);
1988 } 1987 }
1989 TCP_SKB_CB(skb)->seq += copy; 1988 TCP_SKB_CB(skb)->seq += copy;
1990 } 1989 }
@@ -1994,7 +1993,7 @@ static int tcp_mtu_probe(struct sock *sk)
1994 if (len >= probe_size) 1993 if (len >= probe_size)
1995 break; 1994 break;
1996 } 1995 }
1997 tcp_init_tso_segs(sk, nskb, nskb->len); 1996 tcp_init_tso_segs(nskb, nskb->len);
1998 1997
1999 /* We're ready to send. If this fails, the probe will 1998 /* We're ready to send. If this fails, the probe will
2000 * be resegmented into mss-sized pieces by tcp_write_xmit(). 1999 * be resegmented into mss-sized pieces by tcp_write_xmit().
@@ -2056,7 +2055,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2056 while ((skb = tcp_send_head(sk))) { 2055 while ((skb = tcp_send_head(sk))) {
2057 unsigned int limit; 2056 unsigned int limit;
2058 2057
2059 tso_segs = tcp_init_tso_segs(sk, skb, mss_now); 2058 tso_segs = tcp_init_tso_segs(skb, mss_now);
2060 BUG_ON(!tso_segs); 2059 BUG_ON(!tso_segs);
2061 2060
2062 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { 2061 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
@@ -2078,7 +2077,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2078 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) 2077 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
2079 break; 2078 break;
2080 2079
2081 if (tso_segs == 1 || !max_segs) { 2080 if (tso_segs == 1) {
2082 if (unlikely(!tcp_nagle_test(tp, skb, mss_now, 2081 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
2083 (tcp_skb_is_last(sk, skb) ? 2082 (tcp_skb_is_last(sk, skb) ?
2084 nonagle : TCP_NAGLE_PUSH)))) 2083 nonagle : TCP_NAGLE_PUSH))))
@@ -2091,7 +2090,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2091 } 2090 }
2092 2091
2093 limit = mss_now; 2092 limit = mss_now;
2094 if (tso_segs > 1 && max_segs && !tcp_urg_mode(tp)) 2093 if (tso_segs > 1 && !tcp_urg_mode(tp))
2095 limit = tcp_mss_split_point(sk, skb, mss_now, 2094 limit = tcp_mss_split_point(sk, skb, mss_now,
2096 min_t(unsigned int, 2095 min_t(unsigned int,
2097 cwnd_quota, 2096 cwnd_quota,
@@ -2392,7 +2391,7 @@ u32 __tcp_select_window(struct sock *sk)
2392 if (free_space < (full_space >> 1)) { 2391 if (free_space < (full_space >> 1)) {
2393 icsk->icsk_ack.quick = 0; 2392 icsk->icsk_ack.quick = 0;
2394 2393
2395 if (sk_under_memory_pressure(sk)) 2394 if (tcp_under_memory_pressure(sk))
2396 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 2395 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
2397 4U * tp->advmss); 2396 4U * tp->advmss);
2398 2397
@@ -2610,11 +2609,15 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2610 if (unlikely(oldpcount > 1)) { 2609 if (unlikely(oldpcount > 1)) {
2611 if (skb_unclone(skb, GFP_ATOMIC)) 2610 if (skb_unclone(skb, GFP_ATOMIC))
2612 return -ENOMEM; 2611 return -ENOMEM;
2613 tcp_init_tso_segs(sk, skb, cur_mss); 2612 tcp_init_tso_segs(skb, cur_mss);
2614 tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); 2613 tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
2615 } 2614 }
2616 } 2615 }
2617 2616
2617 /* RFC3168, section 6.1.1.1. ECN fallback */
2618 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN)
2619 tcp_ecn_clear_syn(sk, skb);
2620
2618 tcp_retrans_try_collapse(sk, skb, cur_mss); 2621 tcp_retrans_try_collapse(sk, skb, cur_mss);
2619 2622
2620 /* Make a copy, if the first transmission SKB clone we made 2623 /* Make a copy, if the first transmission SKB clone we made
@@ -2816,8 +2819,10 @@ begin_fwd:
2816 * connection tear down and (memory) recovery. 2819 * connection tear down and (memory) recovery.
2817 * Otherwise tcp_send_fin() could be tempted to either delay FIN 2820 * Otherwise tcp_send_fin() could be tempted to either delay FIN
2818 * or even be forced to close flow without any FIN. 2821 * or even be forced to close flow without any FIN.
2822 * In general, we want to allow one skb per socket to avoid hangs
2823 * with edge trigger epoll()
2819 */ 2824 */
2820static void sk_forced_wmem_schedule(struct sock *sk, int size) 2825void sk_forced_mem_schedule(struct sock *sk, int size)
2821{ 2826{
2822 int amt, status; 2827 int amt, status;
2823 2828
@@ -2841,7 +2846,7 @@ void tcp_send_fin(struct sock *sk)
2841 * Note: in the latter case, FIN packet will be sent after a timeout, 2846 * Note: in the latter case, FIN packet will be sent after a timeout,
2842 * as TCP stack thinks it has already been transmitted. 2847 * as TCP stack thinks it has already been transmitted.
2843 */ 2848 */
2844 if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) { 2849 if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) {
2845coalesce: 2850coalesce:
2846 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; 2851 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
2847 TCP_SKB_CB(tskb)->end_seq++; 2852 TCP_SKB_CB(tskb)->end_seq++;
@@ -2864,7 +2869,7 @@ coalesce:
2864 return; 2869 return;
2865 } 2870 }
2866 skb_reserve(skb, MAX_TCP_HEADER); 2871 skb_reserve(skb, MAX_TCP_HEADER);
2867 sk_forced_wmem_schedule(sk, skb->truesize); 2872 sk_forced_mem_schedule(sk, skb->truesize);
2868 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ 2873 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
2869 tcp_init_nondata_skb(skb, tp->write_seq, 2874 tcp_init_nondata_skb(skb, tp->write_seq,
2870 TCPHDR_ACK | TCPHDR_FIN); 2875 TCPHDR_ACK | TCPHDR_FIN);
@@ -3175,7 +3180,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3175 /* limit to order-0 allocations */ 3180 /* limit to order-0 allocations */
3176 space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); 3181 space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
3177 3182
3178 syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation); 3183 syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation, false);
3179 if (!syn_data) 3184 if (!syn_data)
3180 goto fallback; 3185 goto fallback;
3181 syn_data->ip_summed = CHECKSUM_PARTIAL; 3186 syn_data->ip_summed = CHECKSUM_PARTIAL;
@@ -3241,7 +3246,7 @@ int tcp_connect(struct sock *sk)
3241 return 0; 3246 return 0;
3242 } 3247 }
3243 3248
3244 buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); 3249 buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true);
3245 if (unlikely(!buff)) 3250 if (unlikely(!buff))
3246 return -ENOBUFS; 3251 return -ENOBUFS;
3247 3252
@@ -3382,7 +3387,7 @@ EXPORT_SYMBOL_GPL(tcp_send_ack);
3382 * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is 3387 * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is
3383 * out-of-date with SND.UNA-1 to probe window. 3388 * out-of-date with SND.UNA-1 to probe window.
3384 */ 3389 */
3385static int tcp_xmit_probe_skb(struct sock *sk, int urgent) 3390static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
3386{ 3391{
3387 struct tcp_sock *tp = tcp_sk(sk); 3392 struct tcp_sock *tp = tcp_sk(sk);
3388 struct sk_buff *skb; 3393 struct sk_buff *skb;
@@ -3400,6 +3405,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
3400 */ 3405 */
3401 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); 3406 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
3402 skb_mstamp_get(&skb->skb_mstamp); 3407 skb_mstamp_get(&skb->skb_mstamp);
3408 NET_INC_STATS_BH(sock_net(sk), mib);
3403 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); 3409 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
3404} 3410}
3405 3411
@@ -3407,12 +3413,12 @@ void tcp_send_window_probe(struct sock *sk)
3407{ 3413{
3408 if (sk->sk_state == TCP_ESTABLISHED) { 3414 if (sk->sk_state == TCP_ESTABLISHED) {
3409 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1; 3415 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
3410 tcp_xmit_probe_skb(sk, 0); 3416 tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE);
3411 } 3417 }
3412} 3418}
3413 3419
3414/* Initiate keepalive or window probe from timer. */ 3420/* Initiate keepalive or window probe from timer. */
3415int tcp_write_wakeup(struct sock *sk) 3421int tcp_write_wakeup(struct sock *sk, int mib)
3416{ 3422{
3417 struct tcp_sock *tp = tcp_sk(sk); 3423 struct tcp_sock *tp = tcp_sk(sk);
3418 struct sk_buff *skb; 3424 struct sk_buff *skb;
@@ -3440,7 +3446,7 @@ int tcp_write_wakeup(struct sock *sk)
3440 if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC)) 3446 if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
3441 return -1; 3447 return -1;
3442 } else if (!tcp_skb_pcount(skb)) 3448 } else if (!tcp_skb_pcount(skb))
3443 tcp_set_skb_tso_segs(sk, skb, mss); 3449 tcp_set_skb_tso_segs(skb, mss);
3444 3450
3445 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; 3451 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3446 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 3452 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
@@ -3449,8 +3455,8 @@ int tcp_write_wakeup(struct sock *sk)
3449 return err; 3455 return err;
3450 } else { 3456 } else {
3451 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF)) 3457 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
3452 tcp_xmit_probe_skb(sk, 1); 3458 tcp_xmit_probe_skb(sk, 1, mib);
3453 return tcp_xmit_probe_skb(sk, 0); 3459 return tcp_xmit_probe_skb(sk, 0, mib);
3454 } 3460 }
3455} 3461}
3456 3462
@@ -3464,7 +3470,7 @@ void tcp_send_probe0(struct sock *sk)
3464 unsigned long probe_max; 3470 unsigned long probe_max;
3465 int err; 3471 int err;
3466 3472
3467 err = tcp_write_wakeup(sk); 3473 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
3468 3474
3469 if (tp->packets_out || !tcp_send_head(sk)) { 3475 if (tp->packets_out || !tcp_send_head(sk)) {
3470 /* Cancel probe timer, if it is not required. */ 3476 /* Cancel probe timer, if it is not required. */
@@ -3490,7 +3496,7 @@ void tcp_send_probe0(struct sock *sk)
3490 probe_max = TCP_RESOURCE_PROBE_INTERVAL; 3496 probe_max = TCP_RESOURCE_PROBE_INTERVAL;
3491 } 3497 }
3492 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 3498 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3493 inet_csk_rto_backoff(icsk, probe_max), 3499 tcp_probe0_when(sk, probe_max),
3494 TCP_RTO_MAX); 3500 TCP_RTO_MAX);
3495} 3501}
3496 3502
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8c65dc147d8b..5b752f58a900 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -247,7 +247,7 @@ void tcp_delack_timer_handler(struct sock *sk)
247 } 247 }
248 248
249out: 249out:
250 if (sk_under_memory_pressure(sk)) 250 if (tcp_under_memory_pressure(sk))
251 sk_mem_reclaim(sk); 251 sk_mem_reclaim(sk);
252} 252}
253 253
@@ -616,7 +616,7 @@ static void tcp_keepalive_timer (unsigned long data)
616 tcp_write_err(sk); 616 tcp_write_err(sk);
617 goto out; 617 goto out;
618 } 618 }
619 if (tcp_write_wakeup(sk) <= 0) { 619 if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
620 icsk->icsk_probes_out++; 620 icsk->icsk_probes_out++;
621 elapsed = keepalive_intvl_when(tp); 621 elapsed = keepalive_intvl_when(tp);
622 } else { 622 } else {
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index b763c39ae1d7..6116604bf6e8 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -170,6 +170,7 @@ static const struct inet_diag_handler udp_diag_handler = {
170 .dump_one = udp_diag_dump_one, 170 .dump_one = udp_diag_dump_one,
171 .idiag_get_info = udp_diag_get_info, 171 .idiag_get_info = udp_diag_get_info,
172 .idiag_type = IPPROTO_UDP, 172 .idiag_type = IPPROTO_UDP,
173 .idiag_info_size = 0,
173}; 174};
174 175
175static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, 176static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
@@ -190,6 +191,7 @@ static const struct inet_diag_handler udplite_diag_handler = {
190 .dump_one = udplite_diag_dump_one, 191 .dump_one = udplite_diag_dump_one,
191 .idiag_get_info = udp_diag_get_info, 192 .idiag_get_info = udp_diag_get_info,
192 .idiag_type = IPPROTO_UDPLITE, 193 .idiag_type = IPPROTO_UDPLITE,
194 .idiag_info_size = 0,
193}; 195};
194 196
195static int __init udp_diag_init(void) 197static int __init udp_diag_init(void)
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 6bb98cc193c9..933ea903f7b8 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -15,12 +15,10 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
15 struct socket *sock = NULL; 15 struct socket *sock = NULL;
16 struct sockaddr_in udp_addr; 16 struct sockaddr_in udp_addr;
17 17
18 err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock); 18 err = sock_create_kern(net, AF_INET, SOCK_DGRAM, 0, &sock);
19 if (err < 0) 19 if (err < 0)
20 goto error; 20 goto error;
21 21
22 sk_change_net(sock->sk, net);
23
24 udp_addr.sin_family = AF_INET; 22 udp_addr.sin_family = AF_INET;
25 udp_addr.sin_addr = cfg->local_ip; 23 udp_addr.sin_addr = cfg->local_ip;
26 udp_addr.sin_port = cfg->local_udp_port; 24 udp_addr.sin_port = cfg->local_udp_port;
@@ -47,7 +45,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
47error: 45error:
48 if (sock) { 46 if (sock) {
49 kernel_sock_shutdown(sock, SHUT_RDWR); 47 kernel_sock_shutdown(sock, SHUT_RDWR);
50 sk_release_kernel(sock->sk); 48 sock_release(sock);
51 } 49 }
52 *sockp = NULL; 50 *sockp = NULL;
53 return err; 51 return err;
@@ -101,7 +99,7 @@ void udp_tunnel_sock_release(struct socket *sock)
101{ 99{
102 rcu_assign_sk_user_data(sock->sk, NULL); 100 rcu_assign_sk_user_data(sock->sk, NULL);
103 kernel_sock_shutdown(sock, SHUT_RDWR); 101 kernel_sock_shutdown(sock, SHUT_RDWR);
104 sk_release_kernel(sock->sk); 102 sock_release(sock);
105} 103}
106EXPORT_SYMBOL_GPL(udp_tunnel_sock_release); 104EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
107 105
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 2e8c06108ab9..0f3f1999719a 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -48,4 +48,5 @@ obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
48 48
49ifneq ($(CONFIG_IPV6),) 49ifneq ($(CONFIG_IPV6),)
50obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o 50obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o
51obj-y += mcast_snoop.o
51endif 52endif
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 37b70e82bff8..21c2c818df3b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2121,6 +2121,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
2121 fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0); 2121 fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
2122 if (!fn) 2122 if (!fn)
2123 goto out; 2123 goto out;
2124
2125 noflags |= RTF_CACHE;
2124 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 2126 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2125 if (rt->dst.dev->ifindex != dev->ifindex) 2127 if (rt->dst.dev->ifindex != dev->ifindex)
2126 continue; 2128 continue;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index eef63b394c5a..7de52b65173f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -167,7 +167,7 @@ lookup_protocol:
167 WARN_ON(!answer_prot->slab); 167 WARN_ON(!answer_prot->slab);
168 168
169 err = -ENOBUFS; 169 err = -ENOBUFS;
170 sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot); 170 sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern);
171 if (!sk) 171 if (!sk)
172 goto out; 172 goto out;
173 173
@@ -362,7 +362,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
362 np->saddr = addr->sin6_addr; 362 np->saddr = addr->sin6_addr;
363 363
364 /* Make sure we are allowed to bind here. */ 364 /* Make sure we are allowed to bind here. */
365 if (sk->sk_prot->get_port(sk, snum)) { 365 if ((snum || !inet->bind_address_no_port) &&
366 sk->sk_prot->get_port(sk, snum)) {
366 inet_reset_saddr(sk); 367 inet_reset_saddr(sk);
367 err = -EADDRINUSE; 368 err = -EADDRINUSE;
368 goto out; 369 goto out;
@@ -768,6 +769,7 @@ static int __net_init inet6_net_init(struct net *net)
768 net->ipv6.sysctl.auto_flowlabels = 0; 769 net->ipv6.sysctl.auto_flowlabels = 0;
769 net->ipv6.sysctl.idgen_retries = 3; 770 net->ipv6.sysctl.idgen_retries = 3;
770 net->ipv6.sysctl.idgen_delay = 1 * HZ; 771 net->ipv6.sysctl.idgen_delay = 1 * HZ;
772 net->ipv6.sysctl.flowlabel_state_ranges = 1;
771 atomic_set(&net->ipv6.fib6_sernum, 1); 773 atomic_set(&net->ipv6.fib6_sernum, 1);
772 774
773 err = ipv6_init_mibs(net); 775 err = ipv6_init_mibs(net);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 762a58c772b8..62d908e64eeb 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -325,6 +325,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
325 kfree_skb(skb); 325 kfree_skb(skb);
326} 326}
327 327
328/* For some errors we have valid addr_offset even with zero payload and
329 * zero port. Also, addr_offset should be supported if port is set.
330 */
331static inline bool ipv6_datagram_support_addr(struct sock_exterr_skb *serr)
332{
333 return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6 ||
334 serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
335 serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port;
336}
337
328/* IPv6 supports cmsg on all origins aside from SO_EE_ORIGIN_LOCAL. 338/* IPv6 supports cmsg on all origins aside from SO_EE_ORIGIN_LOCAL.
329 * 339 *
330 * At one point, excluding local errors was a quick test to identify icmp/icmp6 340 * At one point, excluding local errors was a quick test to identify icmp/icmp6
@@ -389,7 +399,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
389 399
390 serr = SKB_EXT_ERR(skb); 400 serr = SKB_EXT_ERR(skb);
391 401
392 if (sin && serr->port) { 402 if (sin && ipv6_datagram_support_addr(serr)) {
393 const unsigned char *nh = skb_network_header(skb); 403 const unsigned char *nh = skb_network_header(skb);
394 sin->sin6_family = AF_INET6; 404 sin->sin6_family = AF_INET6;
395 sin->sin6_flowinfo = 0; 405 sin->sin6_flowinfo = 0;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 7c07ce36aae2..060a60b2f8a6 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -76,7 +76,7 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen)
76 len = ALIGN(len, crypto_tfm_ctx_alignment()); 76 len = ALIGN(len, crypto_tfm_ctx_alignment());
77 } 77 }
78 78
79 len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead); 79 len += sizeof(struct aead_request) + crypto_aead_reqsize(aead);
80 len = ALIGN(len, __alignof__(struct scatterlist)); 80 len = ALIGN(len, __alignof__(struct scatterlist));
81 81
82 len += sizeof(struct scatterlist) * nfrags; 82 len += sizeof(struct scatterlist) * nfrags;
@@ -96,17 +96,6 @@ static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
96 crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; 96 crypto_aead_alignmask(aead) + 1) : tmp + seqhilen;
97} 97}
98 98
99static inline struct aead_givcrypt_request *esp_tmp_givreq(
100 struct crypto_aead *aead, u8 *iv)
101{
102 struct aead_givcrypt_request *req;
103
104 req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
105 crypto_tfm_ctx_alignment());
106 aead_givcrypt_set_tfm(req, aead);
107 return req;
108}
109
110static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv) 99static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv)
111{ 100{
112 struct aead_request *req; 101 struct aead_request *req;
@@ -125,14 +114,6 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
125 __alignof__(struct scatterlist)); 114 __alignof__(struct scatterlist));
126} 115}
127 116
128static inline struct scatterlist *esp_givreq_sg(
129 struct crypto_aead *aead, struct aead_givcrypt_request *req)
130{
131 return (void *)ALIGN((unsigned long)(req + 1) +
132 crypto_aead_reqsize(aead),
133 __alignof__(struct scatterlist));
134}
135
136static void esp_output_done(struct crypto_async_request *base, int err) 117static void esp_output_done(struct crypto_async_request *base, int err)
137{ 118{
138 struct sk_buff *skb = base->data; 119 struct sk_buff *skb = base->data;
@@ -141,32 +122,57 @@ static void esp_output_done(struct crypto_async_request *base, int err)
141 xfrm_output_resume(skb, err); 122 xfrm_output_resume(skb, err);
142} 123}
143 124
125/* Move ESP header back into place. */
126static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
127{
128 struct ip_esp_hdr *esph = (void *)(skb->data + offset);
129 void *tmp = ESP_SKB_CB(skb)->tmp;
130 __be32 *seqhi = esp_tmp_seqhi(tmp);
131
132 esph->seq_no = esph->spi;
133 esph->spi = *seqhi;
134}
135
136static void esp_output_restore_header(struct sk_buff *skb)
137{
138 esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32));
139}
140
141static void esp_output_done_esn(struct crypto_async_request *base, int err)
142{
143 struct sk_buff *skb = base->data;
144
145 esp_output_restore_header(skb);
146 esp_output_done(base, err);
147}
148
144static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) 149static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
145{ 150{
146 int err; 151 int err;
147 struct ip_esp_hdr *esph; 152 struct ip_esp_hdr *esph;
148 struct crypto_aead *aead; 153 struct crypto_aead *aead;
149 struct aead_givcrypt_request *req; 154 struct aead_request *req;
150 struct scatterlist *sg; 155 struct scatterlist *sg;
151 struct scatterlist *asg;
152 struct sk_buff *trailer; 156 struct sk_buff *trailer;
153 void *tmp; 157 void *tmp;
154 int blksize; 158 int blksize;
155 int clen; 159 int clen;
156 int alen; 160 int alen;
157 int plen; 161 int plen;
162 int ivlen;
158 int tfclen; 163 int tfclen;
159 int nfrags; 164 int nfrags;
160 int assoclen; 165 int assoclen;
161 int sglists;
162 int seqhilen; 166 int seqhilen;
163 u8 *iv; 167 u8 *iv;
164 u8 *tail; 168 u8 *tail;
165 __be32 *seqhi; 169 __be32 *seqhi;
170 __be64 seqno;
166 171
167 /* skb is pure payload to encrypt */ 172 /* skb is pure payload to encrypt */
168 aead = x->data; 173 aead = x->data;
169 alen = crypto_aead_authsize(aead); 174 alen = crypto_aead_authsize(aead);
175 ivlen = crypto_aead_ivsize(aead);
170 176
171 tfclen = 0; 177 tfclen = 0;
172 if (x->tfcpad) { 178 if (x->tfcpad) {
@@ -187,16 +193,14 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
187 nfrags = err; 193 nfrags = err;
188 194
189 assoclen = sizeof(*esph); 195 assoclen = sizeof(*esph);
190 sglists = 1;
191 seqhilen = 0; 196 seqhilen = 0;
192 197
193 if (x->props.flags & XFRM_STATE_ESN) { 198 if (x->props.flags & XFRM_STATE_ESN) {
194 sglists += 2;
195 seqhilen += sizeof(__be32); 199 seqhilen += sizeof(__be32);
196 assoclen += seqhilen; 200 assoclen += seqhilen;
197 } 201 }
198 202
199 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); 203 tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
200 if (!tmp) { 204 if (!tmp) {
201 err = -ENOMEM; 205 err = -ENOMEM;
202 goto error; 206 goto error;
@@ -204,9 +208,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
204 208
205 seqhi = esp_tmp_seqhi(tmp); 209 seqhi = esp_tmp_seqhi(tmp);
206 iv = esp_tmp_iv(aead, tmp, seqhilen); 210 iv = esp_tmp_iv(aead, tmp, seqhilen);
207 req = esp_tmp_givreq(aead, iv); 211 req = esp_tmp_req(aead, iv);
208 asg = esp_givreq_sg(aead, req); 212 sg = esp_req_sg(aead, req);
209 sg = asg + sglists;
210 213
211 /* Fill padding... */ 214 /* Fill padding... */
212 tail = skb_tail_pointer(trailer); 215 tail = skb_tail_pointer(trailer);
@@ -227,37 +230,53 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
227 esph = ip_esp_hdr(skb); 230 esph = ip_esp_hdr(skb);
228 *skb_mac_header(skb) = IPPROTO_ESP; 231 *skb_mac_header(skb) = IPPROTO_ESP;
229 232
230 esph->spi = x->id.spi;
231 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); 233 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
232 234
235 aead_request_set_callback(req, 0, esp_output_done, skb);
236
237 /* For ESN we move the header forward by 4 bytes to
238 * accomodate the high bits. We will move it back after
239 * encryption.
240 */
241 if ((x->props.flags & XFRM_STATE_ESN)) {
242 esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
243 *seqhi = esph->spi;
244 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
245 aead_request_set_callback(req, 0, esp_output_done_esn, skb);
246 }
247
248 esph->spi = x->id.spi;
249
233 sg_init_table(sg, nfrags); 250 sg_init_table(sg, nfrags);
234 skb_to_sgvec(skb, sg, 251 skb_to_sgvec(skb, sg,
235 esph->enc_data + crypto_aead_ivsize(aead) - skb->data, 252 (unsigned char *)esph - skb->data,
236 clen + alen); 253 assoclen + ivlen + clen + alen);
237 254
238 if ((x->props.flags & XFRM_STATE_ESN)) { 255 aead_request_set_crypt(req, sg, sg, ivlen + clen, iv);
239 sg_init_table(asg, 3); 256 aead_request_set_ad(req, assoclen);
240 sg_set_buf(asg, &esph->spi, sizeof(__be32)); 257
241 *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); 258 seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
242 sg_set_buf(asg + 1, seqhi, seqhilen); 259 ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
243 sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); 260
244 } else 261 memset(iv, 0, ivlen);
245 sg_init_one(asg, esph, sizeof(*esph)); 262 memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&seqno + 8 - min(ivlen, 8),
246 263 min(ivlen, 8));
247 aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
248 aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
249 aead_givcrypt_set_assoc(req, asg, assoclen);
250 aead_givcrypt_set_giv(req, esph->enc_data,
251 XFRM_SKB_CB(skb)->seq.output.low +
252 ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
253 264
254 ESP_SKB_CB(skb)->tmp = tmp; 265 ESP_SKB_CB(skb)->tmp = tmp;
255 err = crypto_aead_givencrypt(req); 266 err = crypto_aead_encrypt(req);
256 if (err == -EINPROGRESS) 267
268 switch (err) {
269 case -EINPROGRESS:
257 goto error; 270 goto error;
258 271
259 if (err == -EBUSY) 272 case -EBUSY:
260 err = NET_XMIT_DROP; 273 err = NET_XMIT_DROP;
274 break;
275
276 case 0:
277 if ((x->props.flags & XFRM_STATE_ESN))
278 esp_output_restore_header(skb);
279 }
261 280
262 kfree(tmp); 281 kfree(tmp);
263 282
@@ -318,25 +337,38 @@ static void esp_input_done(struct crypto_async_request *base, int err)
318 xfrm_input_resume(skb, esp_input_done2(skb, err)); 337 xfrm_input_resume(skb, esp_input_done2(skb, err));
319} 338}
320 339
340static void esp_input_restore_header(struct sk_buff *skb)
341{
342 esp_restore_header(skb, 0);
343 __skb_pull(skb, 4);
344}
345
346static void esp_input_done_esn(struct crypto_async_request *base, int err)
347{
348 struct sk_buff *skb = base->data;
349
350 esp_input_restore_header(skb);
351 esp_input_done(base, err);
352}
353
321static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) 354static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
322{ 355{
323 struct ip_esp_hdr *esph; 356 struct ip_esp_hdr *esph;
324 struct crypto_aead *aead = x->data; 357 struct crypto_aead *aead = x->data;
325 struct aead_request *req; 358 struct aead_request *req;
326 struct sk_buff *trailer; 359 struct sk_buff *trailer;
327 int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); 360 int ivlen = crypto_aead_ivsize(aead);
361 int elen = skb->len - sizeof(*esph) - ivlen;
328 int nfrags; 362 int nfrags;
329 int assoclen; 363 int assoclen;
330 int sglists;
331 int seqhilen; 364 int seqhilen;
332 int ret = 0; 365 int ret = 0;
333 void *tmp; 366 void *tmp;
334 __be32 *seqhi; 367 __be32 *seqhi;
335 u8 *iv; 368 u8 *iv;
336 struct scatterlist *sg; 369 struct scatterlist *sg;
337 struct scatterlist *asg;
338 370
339 if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) { 371 if (!pskb_may_pull(skb, sizeof(*esph) + ivlen)) {
340 ret = -EINVAL; 372 ret = -EINVAL;
341 goto out; 373 goto out;
342 } 374 }
@@ -355,16 +387,14 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
355 ret = -ENOMEM; 387 ret = -ENOMEM;
356 388
357 assoclen = sizeof(*esph); 389 assoclen = sizeof(*esph);
358 sglists = 1;
359 seqhilen = 0; 390 seqhilen = 0;
360 391
361 if (x->props.flags & XFRM_STATE_ESN) { 392 if (x->props.flags & XFRM_STATE_ESN) {
362 sglists += 2;
363 seqhilen += sizeof(__be32); 393 seqhilen += sizeof(__be32);
364 assoclen += seqhilen; 394 assoclen += seqhilen;
365 } 395 }
366 396
367 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); 397 tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
368 if (!tmp) 398 if (!tmp)
369 goto out; 399 goto out;
370 400
@@ -372,36 +402,39 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
372 seqhi = esp_tmp_seqhi(tmp); 402 seqhi = esp_tmp_seqhi(tmp);
373 iv = esp_tmp_iv(aead, tmp, seqhilen); 403 iv = esp_tmp_iv(aead, tmp, seqhilen);
374 req = esp_tmp_req(aead, iv); 404 req = esp_tmp_req(aead, iv);
375 asg = esp_req_sg(aead, req); 405 sg = esp_req_sg(aead, req);
376 sg = asg + sglists;
377 406
378 skb->ip_summed = CHECKSUM_NONE; 407 skb->ip_summed = CHECKSUM_NONE;
379 408
380 esph = (struct ip_esp_hdr *)skb->data; 409 esph = (struct ip_esp_hdr *)skb->data;
381 410
382 /* Get ivec. This can be wrong, check against another impls. */ 411 aead_request_set_callback(req, 0, esp_input_done, skb);
383 iv = esph->enc_data;
384
385 sg_init_table(sg, nfrags);
386 skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen);
387 412
413 /* For ESN we move the header forward by 4 bytes to
414 * accomodate the high bits. We will move it back after
415 * decryption.
416 */
388 if ((x->props.flags & XFRM_STATE_ESN)) { 417 if ((x->props.flags & XFRM_STATE_ESN)) {
389 sg_init_table(asg, 3); 418 esph = (void *)skb_push(skb, 4);
390 sg_set_buf(asg, &esph->spi, sizeof(__be32)); 419 *seqhi = esph->spi;
391 *seqhi = XFRM_SKB_CB(skb)->seq.input.hi; 420 esph->spi = esph->seq_no;
392 sg_set_buf(asg + 1, seqhi, seqhilen); 421 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi);
393 sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); 422 aead_request_set_callback(req, 0, esp_input_done_esn, skb);
394 } else 423 }
395 sg_init_one(asg, esph, sizeof(*esph));
396 424
397 aead_request_set_callback(req, 0, esp_input_done, skb); 425 sg_init_table(sg, nfrags);
398 aead_request_set_crypt(req, sg, sg, elen, iv); 426 skb_to_sgvec(skb, sg, 0, skb->len);
399 aead_request_set_assoc(req, asg, assoclen); 427
428 aead_request_set_crypt(req, sg, sg, elen + ivlen, iv);
429 aead_request_set_ad(req, assoclen);
400 430
401 ret = crypto_aead_decrypt(req); 431 ret = crypto_aead_decrypt(req);
402 if (ret == -EINPROGRESS) 432 if (ret == -EINPROGRESS)
403 goto out; 433 goto out;
404 434
435 if ((x->props.flags & XFRM_STATE_ESN))
436 esp_input_restore_header(skb);
437
405 ret = esp_input_done2(skb, ret); 438 ret = esp_input_done2(skb, ret);
406 439
407out: 440out:
@@ -461,10 +494,16 @@ static void esp6_destroy(struct xfrm_state *x)
461 494
462static int esp_init_aead(struct xfrm_state *x) 495static int esp_init_aead(struct xfrm_state *x)
463{ 496{
497 char aead_name[CRYPTO_MAX_ALG_NAME];
464 struct crypto_aead *aead; 498 struct crypto_aead *aead;
465 int err; 499 int err;
466 500
467 aead = crypto_alloc_aead(x->aead->alg_name, 0, 0); 501 err = -ENAMETOOLONG;
502 if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
503 x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
504 goto error;
505
506 aead = crypto_alloc_aead(aead_name, 0, 0);
468 err = PTR_ERR(aead); 507 err = PTR_ERR(aead);
469 if (IS_ERR(aead)) 508 if (IS_ERR(aead))
470 goto error; 509 goto error;
@@ -503,15 +542,19 @@ static int esp_init_authenc(struct xfrm_state *x)
503 542
504 if ((x->props.flags & XFRM_STATE_ESN)) { 543 if ((x->props.flags & XFRM_STATE_ESN)) {
505 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, 544 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
506 "authencesn(%s,%s)", 545 "%s%sauthencesn(%s,%s)%s",
546 x->geniv ?: "", x->geniv ? "(" : "",
507 x->aalg ? x->aalg->alg_name : "digest_null", 547 x->aalg ? x->aalg->alg_name : "digest_null",
508 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) 548 x->ealg->alg_name,
549 x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME)
509 goto error; 550 goto error;
510 } else { 551 } else {
511 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, 552 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
512 "authenc(%s,%s)", 553 "%s%sauthenc(%s,%s)%s",
554 x->geniv ?: "", x->geniv ? "(" : "",
513 x->aalg ? x->aalg->alg_name : "digest_null", 555 x->aalg ? x->aalg->alg_name : "digest_null",
514 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) 556 x->ealg->alg_name,
557 x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME)
515 goto error; 558 goto error;
516 } 559 }
517 560
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 2c2b5d51f15c..713d7434c911 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -207,7 +207,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
207 struct inet_peer *peer; 207 struct inet_peer *peer;
208 208
209 peer = inet_getpeer_v6(net->ipv6.peers, 209 peer = inet_getpeer_v6(net->ipv6.peers,
210 &rt->rt6i_dst.addr, 1); 210 &fl6->daddr, 1);
211 res = inet_peer_xrlim_allow(peer, tmo); 211 res = inet_peer_xrlim_allow(peer, tmo);
212 if (peer) 212 if (peer)
213 inet_putpeer(peer); 213 inet_putpeer(peer);
@@ -337,7 +337,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net,
337 * We won't send icmp if the destination is known 337 * We won't send icmp if the destination is known
338 * anycast. 338 * anycast.
339 */ 339 */
340 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) { 340 if (ipv6_anycast_destination(dst, &fl6->daddr)) {
341 net_dbg_ratelimited("icmp6_send: acast source\n"); 341 net_dbg_ratelimited("icmp6_send: acast source\n");
342 dst_release(dst); 342 dst_release(dst);
343 return ERR_PTR(-EINVAL); 343 return ERR_PTR(-EINVAL);
@@ -564,7 +564,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
564 564
565 if (!ipv6_unicast_destination(skb) && 565 if (!ipv6_unicast_destination(skb) &&
566 !(net->ipv6.sysctl.anycast_src_echo_reply && 566 !(net->ipv6.sysctl.anycast_src_echo_reply &&
567 ipv6_anycast_destination(skb))) 567 ipv6_anycast_destination(skb_dst(skb), saddr)))
568 saddr = NULL; 568 saddr = NULL;
569 569
570 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); 570 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 871641bc1ed4..b4fd96de97e6 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -257,7 +257,7 @@ not_unique:
257 return -EADDRNOTAVAIL; 257 return -EADDRNOTAVAIL;
258} 258}
259 259
260static inline u32 inet6_sk_port_offset(const struct sock *sk) 260static u32 inet6_sk_port_offset(const struct sock *sk)
261{ 261{
262 const struct inet_sock *inet = inet_sk(sk); 262 const struct inet_sock *inet = inet_sk(sk);
263 263
@@ -269,7 +269,11 @@ static inline u32 inet6_sk_port_offset(const struct sock *sk)
269int inet6_hash_connect(struct inet_timewait_death_row *death_row, 269int inet6_hash_connect(struct inet_timewait_death_row *death_row,
270 struct sock *sk) 270 struct sock *sk)
271{ 271{
272 return __inet_hash_connect(death_row, sk, inet6_sk_port_offset(sk), 272 u32 port_offset = 0;
273
274 if (!inet_sk(sk)->inet_num)
275 port_offset = inet6_sk_port_offset(sk);
276 return __inet_hash_connect(death_row, sk, port_offset,
273 __inet6_check_established); 277 __inet6_check_established);
274} 278}
275EXPORT_SYMBOL_GPL(inet6_hash_connect); 279EXPORT_SYMBOL_GPL(inet6_hash_connect);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index bde57b113009..55d19861ab20 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -154,10 +154,32 @@ static void node_free(struct fib6_node *fn)
154 kmem_cache_free(fib6_node_kmem, fn); 154 kmem_cache_free(fib6_node_kmem, fn);
155} 155}
156 156
157static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
158{
159 int cpu;
160
161 if (!non_pcpu_rt->rt6i_pcpu)
162 return;
163
164 for_each_possible_cpu(cpu) {
165 struct rt6_info **ppcpu_rt;
166 struct rt6_info *pcpu_rt;
167
168 ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu);
169 pcpu_rt = *ppcpu_rt;
170 if (pcpu_rt) {
171 dst_free(&pcpu_rt->dst);
172 *ppcpu_rt = NULL;
173 }
174 }
175}
176
157static void rt6_release(struct rt6_info *rt) 177static void rt6_release(struct rt6_info *rt)
158{ 178{
159 if (atomic_dec_and_test(&rt->rt6i_ref)) 179 if (atomic_dec_and_test(&rt->rt6i_ref)) {
180 rt6_free_pcpu(rt);
160 dst_free(&rt->dst); 181 dst_free(&rt->dst);
182 }
161} 183}
162 184
163static void fib6_link_table(struct net *net, struct fib6_table *tb) 185static void fib6_link_table(struct net *net, struct fib6_table *tb)
@@ -738,6 +760,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
738 rt6_clean_expires(iter); 760 rt6_clean_expires(iter);
739 else 761 else
740 rt6_set_expires(iter, rt->dst.expires); 762 rt6_set_expires(iter, rt->dst.expires);
763 iter->rt6i_pmtu = rt->rt6i_pmtu;
741 return -EEXIST; 764 return -EEXIST;
742 } 765 }
743 /* If we have the same destination and the same metric, 766 /* If we have the same destination and the same metric,
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index d491125011c4..1f9ebe3cbb4a 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -595,6 +595,10 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
595 if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) 595 if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
596 return -EINVAL; 596 return -EINVAL;
597 597
598 if (net->ipv6.sysctl.flowlabel_state_ranges &&
599 (freq.flr_label & IPV6_FLOWLABEL_STATELESS_FLAG))
600 return -ERANGE;
601
598 fl = fl_create(net, sk, &freq, optval, optlen, &err); 602 fl = fl_create(net, sk, &freq, optval, optlen, &err);
599 if (!fl) 603 if (!fl)
600 return err; 604 return err;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index f2e464eba5ef..57990c929cd8 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -331,10 +331,10 @@ int ip6_mc_input(struct sk_buff *skb)
331 if (offset < 0) 331 if (offset < 0)
332 goto out; 332 goto out;
333 333
334 if (!ipv6_is_mld(skb, nexthdr, offset)) 334 if (ipv6_is_mld(skb, nexthdr, offset))
335 goto out; 335 deliver = true;
336 336
337 deliver = true; 337 goto out;
338 } 338 }
339 /* unknown RA - process it normally */ 339 /* unknown RA - process it normally */
340 } 340 }
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index bc09cb97b840..d5f7716662db 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
105 } 105 }
106 106
107 rcu_read_lock_bh(); 107 rcu_read_lock_bh();
108 nexthop = rt6_nexthop((struct rt6_info *)dst); 108 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); 109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110 if (unlikely(!neigh)) 110 if (unlikely(!neigh))
111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); 111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
@@ -459,7 +459,7 @@ int ip6_forward(struct sk_buff *skb)
459 else 459 else
460 target = &hdr->daddr; 460 target = &hdr->daddr;
461 461
462 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); 462 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
463 463
464 /* Limit redirects both by destination (here) 464 /* Limit redirects both by destination (here)
465 and by source (inside ndisc_send_redirect) 465 and by source (inside ndisc_send_redirect)
@@ -551,7 +551,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
551 struct frag_hdr *fh; 551 struct frag_hdr *fh;
552 unsigned int mtu, hlen, left, len; 552 unsigned int mtu, hlen, left, len;
553 int hroom, troom; 553 int hroom, troom;
554 __be32 frag_id = 0; 554 __be32 frag_id;
555 int ptr, offset = 0, err = 0; 555 int ptr, offset = 0, err = 0;
556 u8 *prevhdr, nexthdr = 0; 556 u8 *prevhdr, nexthdr = 0;
557 struct net *net = dev_net(skb_dst(skb)->dev); 557 struct net *net = dev_net(skb_dst(skb)->dev);
@@ -564,18 +564,17 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
564 /* We must not fragment if the socket is set to force MTU discovery 564 /* We must not fragment if the socket is set to force MTU discovery
565 * or if the skb it not generated by a local socket. 565 * or if the skb it not generated by a local socket.
566 */ 566 */
567 if (unlikely(!skb->ignore_df && skb->len > mtu) || 567 if (unlikely(!skb->ignore_df && skb->len > mtu))
568 (IP6CB(skb)->frag_max_size && 568 goto fail_toobig;
569 IP6CB(skb)->frag_max_size > mtu)) {
570 if (skb->sk && dst_allfrag(skb_dst(skb)))
571 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
572 569
573 skb->dev = skb_dst(skb)->dev; 570 if (IP6CB(skb)->frag_max_size) {
574 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 571 if (IP6CB(skb)->frag_max_size > mtu)
575 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 572 goto fail_toobig;
576 IPSTATS_MIB_FRAGFAILS); 573
577 kfree_skb(skb); 574 /* don't send fragments larger than what we received */
578 return -EMSGSIZE; 575 mtu = IP6CB(skb)->frag_max_size;
576 if (mtu < IPV6_MIN_MTU)
577 mtu = IPV6_MIN_MTU;
579 } 578 }
580 579
581 if (np && np->frag_size < mtu) { 580 if (np && np->frag_size < mtu) {
@@ -584,6 +583,9 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
584 } 583 }
585 mtu -= hlen + sizeof(struct frag_hdr); 584 mtu -= hlen + sizeof(struct frag_hdr);
586 585
586 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
587 &ipv6_hdr(skb)->saddr);
588
587 if (skb_has_frag_list(skb)) { 589 if (skb_has_frag_list(skb)) {
588 int first_len = skb_pagelen(skb); 590 int first_len = skb_pagelen(skb);
589 struct sk_buff *frag2; 591 struct sk_buff *frag2;
@@ -632,11 +634,10 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
632 skb_reset_network_header(skb); 634 skb_reset_network_header(skb);
633 memcpy(skb_network_header(skb), tmp_hdr, hlen); 635 memcpy(skb_network_header(skb), tmp_hdr, hlen);
634 636
635 ipv6_select_ident(net, fh, rt);
636 fh->nexthdr = nexthdr; 637 fh->nexthdr = nexthdr;
637 fh->reserved = 0; 638 fh->reserved = 0;
638 fh->frag_off = htons(IP6_MF); 639 fh->frag_off = htons(IP6_MF);
639 frag_id = fh->identification; 640 fh->identification = frag_id;
640 641
641 first_len = skb_pagelen(skb); 642 first_len = skb_pagelen(skb);
642 skb->data_len = first_len - skb_headlen(skb); 643 skb->data_len = first_len - skb_headlen(skb);
@@ -778,11 +779,7 @@ slow_path:
778 */ 779 */
779 fh->nexthdr = nexthdr; 780 fh->nexthdr = nexthdr;
780 fh->reserved = 0; 781 fh->reserved = 0;
781 if (!frag_id) { 782 fh->identification = frag_id;
782 ipv6_select_ident(net, fh, rt);
783 frag_id = fh->identification;
784 } else
785 fh->identification = frag_id;
786 783
787 /* 784 /*
788 * Copy a block of the IP datagram. 785 * Copy a block of the IP datagram.
@@ -815,6 +812,14 @@ slow_path:
815 consume_skb(skb); 812 consume_skb(skb);
816 return err; 813 return err;
817 814
815fail_toobig:
816 if (skb->sk && dst_allfrag(skb_dst(skb)))
817 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
818
819 skb->dev = skb_dst(skb)->dev;
820 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
821 err = -EMSGSIZE;
822
818fail: 823fail:
819 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 824 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
820 IPSTATS_MIB_FRAGFAILS); 825 IPSTATS_MIB_FRAGFAILS);
@@ -936,7 +941,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
936 */ 941 */
937 rt = (struct rt6_info *) *dst; 942 rt = (struct rt6_info *) *dst;
938 rcu_read_lock_bh(); 943 rcu_read_lock_bh();
939 n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); 944 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
945 rt6_nexthop(rt, &fl6->daddr));
940 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; 946 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
941 rcu_read_unlock_bh(); 947 rcu_read_unlock_bh();
942 948
@@ -1060,11 +1066,10 @@ static inline int ip6_ufo_append_data(struct sock *sk,
1060 int odd, struct sk_buff *skb), 1066 int odd, struct sk_buff *skb),
1061 void *from, int length, int hh_len, int fragheaderlen, 1067 void *from, int length, int hh_len, int fragheaderlen,
1062 int transhdrlen, int mtu, unsigned int flags, 1068 int transhdrlen, int mtu, unsigned int flags,
1063 struct rt6_info *rt) 1069 const struct flowi6 *fl6)
1064 1070
1065{ 1071{
1066 struct sk_buff *skb; 1072 struct sk_buff *skb;
1067 struct frag_hdr fhdr;
1068 int err; 1073 int err;
1069 1074
1070 /* There is support for UDP large send offload by network 1075 /* There is support for UDP large send offload by network
@@ -1106,8 +1111,9 @@ static inline int ip6_ufo_append_data(struct sock *sk,
1106 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - 1111 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1107 sizeof(struct frag_hdr)) & ~7; 1112 sizeof(struct frag_hdr)) & ~7;
1108 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1113 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1109 ipv6_select_ident(sock_net(sk), &fhdr, rt); 1114 skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1110 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1115 &fl6->daddr,
1116 &fl6->saddr);
1111 1117
1112append: 1118append:
1113 return skb_append_datato_frags(sk, skb, getfrag, from, 1119 return skb_append_datato_frags(sk, skb, getfrag, from,
@@ -1332,7 +1338,7 @@ emsgsize:
1332 (sk->sk_type == SOCK_DGRAM)) { 1338 (sk->sk_type == SOCK_DGRAM)) {
1333 err = ip6_ufo_append_data(sk, queue, getfrag, from, length, 1339 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
1334 hh_len, fragheaderlen, 1340 hh_len, fragheaderlen,
1335 transhdrlen, mtu, flags, rt); 1341 transhdrlen, mtu, flags, fl6);
1336 if (err) 1342 if (err)
1337 goto error; 1343 goto error;
1338 return 0; 1344 return 0;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5cafd92c2312..2e67b660118b 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -151,7 +151,7 @@ EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
151void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) 151void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
152{ 152{
153 struct rt6_info *rt = (struct rt6_info *) dst; 153 struct rt6_info *rt = (struct rt6_info *) dst;
154 t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; 154 t->dst_cookie = rt6_get_cookie(rt);
155 dst_release(t->dst_cache); 155 dst_release(t->dst_cache);
156 t->dst_cache = dst; 156 t->dst_cache = dst;
157} 157}
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index bba8903e871f..e1a1136bda7c 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -19,12 +19,10 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
19 int err; 19 int err;
20 struct socket *sock = NULL; 20 struct socket *sock = NULL;
21 21
22 err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock); 22 err = sock_create_kern(net, AF_INET6, SOCK_DGRAM, 0, &sock);
23 if (err < 0) 23 if (err < 0)
24 goto error; 24 goto error;
25 25
26 sk_change_net(sock->sk, net);
27
28 udp6_addr.sin6_family = AF_INET6; 26 udp6_addr.sin6_family = AF_INET6;
29 memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6, 27 memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
30 sizeof(udp6_addr.sin6_addr)); 28 sizeof(udp6_addr.sin6_addr));
@@ -55,7 +53,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
55error: 53error:
56 if (sock) { 54 if (sock) {
57 kernel_sock_shutdown(sock, SHUT_RDWR); 55 kernel_sock_shutdown(sock, SHUT_RDWR);
58 sk_release_kernel(sock->sk); 56 sock_release(sock);
59 } 57 }
60 *sockp = NULL; 58 *sockp = NULL;
61 return err; 59 return err;
diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c
new file mode 100644
index 000000000000..df8afe5ab31e
--- /dev/null
+++ b/net/ipv6/mcast_snoop.c
@@ -0,0 +1,213 @@
1/* Copyright (C) 2010: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
2 * Copyright (C) 2015: Linus Lüssing <linus.luessing@c0d3.blue>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, see <http://www.gnu.org/licenses/>.
15 *
16 *
17 * Based on the MLD support added to br_multicast.c by YOSHIFUJI Hideaki.
18 */
19
20#include <linux/skbuff.h>
21#include <net/ipv6.h>
22#include <net/mld.h>
23#include <net/addrconf.h>
24#include <net/ip6_checksum.h>
25
26static int ipv6_mc_check_ip6hdr(struct sk_buff *skb)
27{
28 const struct ipv6hdr *ip6h;
29 unsigned int len;
30 unsigned int offset = skb_network_offset(skb) + sizeof(*ip6h);
31
32 if (!pskb_may_pull(skb, offset))
33 return -EINVAL;
34
35 ip6h = ipv6_hdr(skb);
36
37 if (ip6h->version != 6)
38 return -EINVAL;
39
40 len = offset + ntohs(ip6h->payload_len);
41 if (skb->len < len || len <= offset)
42 return -EINVAL;
43
44 return 0;
45}
46
47static int ipv6_mc_check_exthdrs(struct sk_buff *skb)
48{
49 const struct ipv6hdr *ip6h;
50 int offset;
51 u8 nexthdr;
52 __be16 frag_off;
53
54 ip6h = ipv6_hdr(skb);
55
56 if (ip6h->nexthdr != IPPROTO_HOPOPTS)
57 return -ENOMSG;
58
59 nexthdr = ip6h->nexthdr;
60 offset = skb_network_offset(skb) + sizeof(*ip6h);
61 offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off);
62
63 if (offset < 0)
64 return -EINVAL;
65
66 if (nexthdr != IPPROTO_ICMPV6)
67 return -ENOMSG;
68
69 skb_set_transport_header(skb, offset);
70
71 return 0;
72}
73
74static int ipv6_mc_check_mld_reportv2(struct sk_buff *skb)
75{
76 unsigned int len = skb_transport_offset(skb);
77
78 len += sizeof(struct mld2_report);
79
80 return pskb_may_pull(skb, len) ? 0 : -EINVAL;
81}
82
83static int ipv6_mc_check_mld_query(struct sk_buff *skb)
84{
85 struct mld_msg *mld;
86 unsigned int len = skb_transport_offset(skb);
87
88 /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */
89 if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
90 return -EINVAL;
91
92 len += sizeof(struct mld_msg);
93 if (skb->len < len)
94 return -EINVAL;
95
96 /* MLDv1? */
97 if (skb->len != len) {
98 /* or MLDv2? */
99 len += sizeof(struct mld2_query) - sizeof(struct mld_msg);
100 if (skb->len < len || !pskb_may_pull(skb, len))
101 return -EINVAL;
102 }
103
104 mld = (struct mld_msg *)skb_transport_header(skb);
105
106 /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer
107 * all-nodes destination address (ff02::1) for general queries
108 */
109 if (ipv6_addr_any(&mld->mld_mca) &&
110 !ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr))
111 return -EINVAL;
112
113 return 0;
114}
115
116static int ipv6_mc_check_mld_msg(struct sk_buff *skb)
117{
118 struct mld_msg *mld = (struct mld_msg *)skb_transport_header(skb);
119
120 switch (mld->mld_type) {
121 case ICMPV6_MGM_REDUCTION:
122 case ICMPV6_MGM_REPORT:
123 /* fall through */
124 return 0;
125 case ICMPV6_MLD2_REPORT:
126 return ipv6_mc_check_mld_reportv2(skb);
127 case ICMPV6_MGM_QUERY:
128 return ipv6_mc_check_mld_query(skb);
129 default:
130 return -ENOMSG;
131 }
132}
133
134static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb)
135{
136 return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo);
137}
138
139static int __ipv6_mc_check_mld(struct sk_buff *skb,
140 struct sk_buff **skb_trimmed)
141
142{
143 struct sk_buff *skb_chk = NULL;
144 unsigned int transport_len;
145 unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg);
146 int ret;
147
148 transport_len = ntohs(ipv6_hdr(skb)->payload_len);
149 transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr);
150
151 skb_get(skb);
152 skb_chk = skb_checksum_trimmed(skb, transport_len,
153 ipv6_mc_validate_checksum);
154 if (!skb_chk)
155 return -EINVAL;
156
157 if (!pskb_may_pull(skb_chk, len)) {
158 kfree_skb(skb_chk);
159 return -EINVAL;
160 }
161
162 ret = ipv6_mc_check_mld_msg(skb_chk);
163 if (ret) {
164 kfree_skb(skb_chk);
165 return ret;
166 }
167
168 if (skb_trimmed)
169 *skb_trimmed = skb_chk;
170 else
171 kfree_skb(skb_chk);
172
173 return 0;
174}
175
176/**
177 * ipv6_mc_check_mld - checks whether this is a sane MLD packet
178 * @skb: the skb to validate
179 * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional)
180 *
181 * Checks whether an IPv6 packet is a valid MLD packet. If so sets
182 * skb network and transport headers accordingly and returns zero.
183 *
184 * -EINVAL: A broken packet was detected, i.e. it violates some internet
185 * standard
186 * -ENOMSG: IP header validation succeeded but it is not an MLD packet.
187 * -ENOMEM: A memory allocation failure happened.
188 *
189 * Optionally, an skb pointer might be provided via skb_trimmed (or set it
190 * to NULL): After parsing an MLD packet successfully it will point to
191 * an skb which has its tail aligned to the IP packet end. This might
192 * either be the originally provided skb or a trimmed, cloned version if
193 * the skb frame had data beyond the IP packet. A cloned skb allows us
194 * to leave the original skb and its full frame unchanged (which might be
195 * desirable for layer 2 frame jugglers).
196 *
197 * The caller needs to release a reference count from any returned skb_trimmed.
198 */
199int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed)
200{
201 int ret;
202
203 ret = ipv6_mc_check_ip6hdr(skb);
204 if (ret < 0)
205 return ret;
206
207 ret = ipv6_mc_check_exthdrs(skb);
208 if (ret < 0)
209 return ret;
210
211 return __ipv6_mc_check_mld(skb, skb_trimmed);
212}
213EXPORT_SYMBOL(ipv6_mc_check_mld);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 96f153c0846b..0a05b35a90fc 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1506,7 +1506,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
1506 "Redirect: destination is not a neighbour\n"); 1506 "Redirect: destination is not a neighbour\n");
1507 goto release; 1507 goto release;
1508 } 1508 }
1509 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); 1509 peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1);
1510 ret = inet_peer_xrlim_allow(peer, 1*HZ); 1510 ret = inet_peer_xrlim_allow(peer, 1*HZ);
1511 if (peer) 1511 if (peer)
1512 inet_putpeer(peer); 1512 inet_putpeer(peer);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index d958718b5031..b4de08a83e0b 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -191,6 +191,8 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
191 191
192static const struct nf_ipv6_ops ipv6ops = { 192static const struct nf_ipv6_ops ipv6ops = {
193 .chk_addr = ipv6_chk_addr, 193 .chk_addr = ipv6_chk_addr,
194 .route_input = ip6_route_input,
195 .fragment = ip6_fragment
194}; 196};
195 197
196static const struct nf_afinfo nf_ip6_afinfo = { 198static const struct nf_afinfo nf_ip6_afinfo = {
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index ca6998345b42..b552cf0d6198 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -186,7 +186,8 @@ config IP6_NF_MATCH_MH
186 186
187config IP6_NF_MATCH_RPFILTER 187config IP6_NF_MATCH_RPFILTER
188 tristate '"rpfilter" reverse path filter match support' 188 tristate '"rpfilter" reverse path filter match support'
189 depends on NETFILTER_ADVANCED && (IP6_NF_MANGLE || IP6_NF_RAW) 189 depends on NETFILTER_ADVANCED
190 depends on IP6_NF_MANGLE || IP6_NF_RAW
190 ---help--- 191 ---help---
191 This option allows you to match packets whose replies would 192 This option allows you to match packets whose replies would
192 go out via the interface the packet came in. 193 go out via the interface the packet came in.
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 62f5b0d0bc9b..3c35ced39b42 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -283,15 +283,13 @@ static void trace_packet(const struct sk_buff *skb,
283 const struct xt_table_info *private, 283 const struct xt_table_info *private,
284 const struct ip6t_entry *e) 284 const struct ip6t_entry *e)
285{ 285{
286 const void *table_base;
287 const struct ip6t_entry *root; 286 const struct ip6t_entry *root;
288 const char *hookname, *chainname, *comment; 287 const char *hookname, *chainname, *comment;
289 const struct ip6t_entry *iter; 288 const struct ip6t_entry *iter;
290 unsigned int rulenum = 0; 289 unsigned int rulenum = 0;
291 struct net *net = dev_net(in ? in : out); 290 struct net *net = dev_net(in ? in : out);
292 291
293 table_base = private->entries[smp_processor_id()]; 292 root = get_entry(private->entries, private->hook_entry[hook]);
294 root = get_entry(table_base, private->hook_entry[hook]);
295 293
296 hookname = chainname = hooknames[hook]; 294 hookname = chainname = hooknames[hook];
297 comment = comments[NF_IP6_TRACE_COMMENT_RULE]; 295 comment = comments[NF_IP6_TRACE_COMMENT_RULE];
@@ -357,7 +355,7 @@ ip6t_do_table(struct sk_buff *skb,
357 */ 355 */
358 smp_read_barrier_depends(); 356 smp_read_barrier_depends();
359 cpu = smp_processor_id(); 357 cpu = smp_processor_id();
360 table_base = private->entries[cpu]; 358 table_base = private->entries;
361 jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; 359 jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
362 stackptr = per_cpu_ptr(private->stackptr, cpu); 360 stackptr = per_cpu_ptr(private->stackptr, cpu);
363 origptr = *stackptr; 361 origptr = *stackptr;
@@ -367,6 +365,7 @@ ip6t_do_table(struct sk_buff *skb,
367 do { 365 do {
368 const struct xt_entry_target *t; 366 const struct xt_entry_target *t;
369 const struct xt_entry_match *ematch; 367 const struct xt_entry_match *ematch;
368 struct xt_counters *counter;
370 369
371 IP_NF_ASSERT(e); 370 IP_NF_ASSERT(e);
372 acpar.thoff = 0; 371 acpar.thoff = 0;
@@ -384,7 +383,8 @@ ip6t_do_table(struct sk_buff *skb,
384 goto no_match; 383 goto no_match;
385 } 384 }
386 385
387 ADD_COUNTER(e->counters, skb->len, 1); 386 counter = xt_get_this_cpu_counter(&e->counters);
387 ADD_COUNTER(*counter, skb->len, 1);
388 388
389 t = ip6t_get_target_c(e); 389 t = ip6t_get_target_c(e);
390 IP_NF_ASSERT(t->u.kernel.target); 390 IP_NF_ASSERT(t->u.kernel.target);
@@ -679,6 +679,10 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
679 if (ret) 679 if (ret)
680 return ret; 680 return ret;
681 681
682 e->counters.pcnt = xt_percpu_counter_alloc();
683 if (IS_ERR_VALUE(e->counters.pcnt))
684 return -ENOMEM;
685
682 j = 0; 686 j = 0;
683 mtpar.net = net; 687 mtpar.net = net;
684 mtpar.table = name; 688 mtpar.table = name;
@@ -714,6 +718,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
714 break; 718 break;
715 cleanup_match(ematch, net); 719 cleanup_match(ematch, net);
716 } 720 }
721
722 xt_percpu_counter_free(e->counters.pcnt);
723
717 return ret; 724 return ret;
718} 725}
719 726
@@ -797,6 +804,8 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net)
797 if (par.target->destroy != NULL) 804 if (par.target->destroy != NULL)
798 par.target->destroy(&par); 805 par.target->destroy(&par);
799 module_put(par.target->me); 806 module_put(par.target->me);
807
808 xt_percpu_counter_free(e->counters.pcnt);
800} 809}
801 810
802/* Checks and translates the user-supplied table segment (held in 811/* Checks and translates the user-supplied table segment (held in
@@ -879,12 +888,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
879 return ret; 888 return ret;
880 } 889 }
881 890
882 /* And one copy for every other CPU */
883 for_each_possible_cpu(i) {
884 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
885 memcpy(newinfo->entries[i], entry0, newinfo->size);
886 }
887
888 return ret; 891 return ret;
889} 892}
890 893
@@ -900,14 +903,16 @@ get_counters(const struct xt_table_info *t,
900 seqcount_t *s = &per_cpu(xt_recseq, cpu); 903 seqcount_t *s = &per_cpu(xt_recseq, cpu);
901 904
902 i = 0; 905 i = 0;
903 xt_entry_foreach(iter, t->entries[cpu], t->size) { 906 xt_entry_foreach(iter, t->entries, t->size) {
907 struct xt_counters *tmp;
904 u64 bcnt, pcnt; 908 u64 bcnt, pcnt;
905 unsigned int start; 909 unsigned int start;
906 910
911 tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
907 do { 912 do {
908 start = read_seqcount_begin(s); 913 start = read_seqcount_begin(s);
909 bcnt = iter->counters.bcnt; 914 bcnt = tmp->bcnt;
910 pcnt = iter->counters.pcnt; 915 pcnt = tmp->pcnt;
911 } while (read_seqcount_retry(s, start)); 916 } while (read_seqcount_retry(s, start));
912 917
913 ADD_COUNTER(counters[i], bcnt, pcnt); 918 ADD_COUNTER(counters[i], bcnt, pcnt);
@@ -952,11 +957,7 @@ copy_entries_to_user(unsigned int total_size,
952 if (IS_ERR(counters)) 957 if (IS_ERR(counters))
953 return PTR_ERR(counters); 958 return PTR_ERR(counters);
954 959
955 /* choose the copy that is on our node/cpu, ... 960 loc_cpu_entry = private->entries;
956 * This choice is lazy (because current thread is
957 * allowed to migrate to another cpu)
958 */
959 loc_cpu_entry = private->entries[raw_smp_processor_id()];
960 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { 961 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
961 ret = -EFAULT; 962 ret = -EFAULT;
962 goto free_counters; 963 goto free_counters;
@@ -1064,16 +1065,16 @@ static int compat_table_info(const struct xt_table_info *info,
1064 struct xt_table_info *newinfo) 1065 struct xt_table_info *newinfo)
1065{ 1066{
1066 struct ip6t_entry *iter; 1067 struct ip6t_entry *iter;
1067 void *loc_cpu_entry; 1068 const void *loc_cpu_entry;
1068 int ret; 1069 int ret;
1069 1070
1070 if (!newinfo || !info) 1071 if (!newinfo || !info)
1071 return -EINVAL; 1072 return -EINVAL;
1072 1073
1073 /* we dont care about newinfo->entries[] */ 1074 /* we dont care about newinfo->entries */
1074 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 1075 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1075 newinfo->initial_entries = 0; 1076 newinfo->initial_entries = 0;
1076 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 1077 loc_cpu_entry = info->entries;
1077 xt_compat_init_offsets(AF_INET6, info->number); 1078 xt_compat_init_offsets(AF_INET6, info->number);
1078 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 1079 xt_entry_foreach(iter, loc_cpu_entry, info->size) {
1079 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 1080 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
@@ -1194,7 +1195,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1194 struct xt_table *t; 1195 struct xt_table *t;
1195 struct xt_table_info *oldinfo; 1196 struct xt_table_info *oldinfo;
1196 struct xt_counters *counters; 1197 struct xt_counters *counters;
1197 const void *loc_cpu_old_entry;
1198 struct ip6t_entry *iter; 1198 struct ip6t_entry *iter;
1199 1199
1200 ret = 0; 1200 ret = 0;
@@ -1237,8 +1237,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1237 get_counters(oldinfo, counters); 1237 get_counters(oldinfo, counters);
1238 1238
1239 /* Decrease module usage counts and free resource */ 1239 /* Decrease module usage counts and free resource */
1240 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; 1240 xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
1241 xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
1242 cleanup_entry(iter, net); 1241 cleanup_entry(iter, net);
1243 1242
1244 xt_free_table_info(oldinfo); 1243 xt_free_table_info(oldinfo);
@@ -1284,8 +1283,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
1284 if (!newinfo) 1283 if (!newinfo)
1285 return -ENOMEM; 1284 return -ENOMEM;
1286 1285
1287 /* choose the copy that is on our node/cpu */ 1286 loc_cpu_entry = newinfo->entries;
1288 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1289 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), 1287 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1290 tmp.size) != 0) { 1288 tmp.size) != 0) {
1291 ret = -EFAULT; 1289 ret = -EFAULT;
@@ -1316,7 +1314,7 @@ static int
1316do_add_counters(struct net *net, const void __user *user, unsigned int len, 1314do_add_counters(struct net *net, const void __user *user, unsigned int len,
1317 int compat) 1315 int compat)
1318{ 1316{
1319 unsigned int i, curcpu; 1317 unsigned int i;
1320 struct xt_counters_info tmp; 1318 struct xt_counters_info tmp;
1321 struct xt_counters *paddc; 1319 struct xt_counters *paddc;
1322 unsigned int num_counters; 1320 unsigned int num_counters;
@@ -1326,7 +1324,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
1326 struct xt_table *t; 1324 struct xt_table *t;
1327 const struct xt_table_info *private; 1325 const struct xt_table_info *private;
1328 int ret = 0; 1326 int ret = 0;
1329 const void *loc_cpu_entry;
1330 struct ip6t_entry *iter; 1327 struct ip6t_entry *iter;
1331 unsigned int addend; 1328 unsigned int addend;
1332#ifdef CONFIG_COMPAT 1329#ifdef CONFIG_COMPAT
@@ -1374,7 +1371,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
1374 goto free; 1371 goto free;
1375 } 1372 }
1376 1373
1377
1378 local_bh_disable(); 1374 local_bh_disable();
1379 private = t->private; 1375 private = t->private;
1380 if (private->number != num_counters) { 1376 if (private->number != num_counters) {
@@ -1383,16 +1379,15 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
1383 } 1379 }
1384 1380
1385 i = 0; 1381 i = 0;
1386 /* Choose the copy that is on our node */
1387 curcpu = smp_processor_id();
1388 addend = xt_write_recseq_begin(); 1382 addend = xt_write_recseq_begin();
1389 loc_cpu_entry = private->entries[curcpu]; 1383 xt_entry_foreach(iter, private->entries, private->size) {
1390 xt_entry_foreach(iter, loc_cpu_entry, private->size) { 1384 struct xt_counters *tmp;
1391 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); 1385
1386 tmp = xt_get_this_cpu_counter(&iter->counters);
1387 ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
1392 ++i; 1388 ++i;
1393 } 1389 }
1394 xt_write_recseq_end(addend); 1390 xt_write_recseq_end(addend);
1395
1396 unlock_up_free: 1391 unlock_up_free:
1397 local_bh_enable(); 1392 local_bh_enable();
1398 xt_table_unlock(t); 1393 xt_table_unlock(t);
@@ -1459,7 +1454,6 @@ static int
1459compat_find_calc_match(struct xt_entry_match *m, 1454compat_find_calc_match(struct xt_entry_match *m,
1460 const char *name, 1455 const char *name,
1461 const struct ip6t_ip6 *ipv6, 1456 const struct ip6t_ip6 *ipv6,
1462 unsigned int hookmask,
1463 int *size) 1457 int *size)
1464{ 1458{
1465 struct xt_match *match; 1459 struct xt_match *match;
@@ -1528,8 +1522,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
1528 entry_offset = (void *)e - (void *)base; 1522 entry_offset = (void *)e - (void *)base;
1529 j = 0; 1523 j = 0;
1530 xt_ematch_foreach(ematch, e) { 1524 xt_ematch_foreach(ematch, e) {
1531 ret = compat_find_calc_match(ematch, name, 1525 ret = compat_find_calc_match(ematch, name, &e->ipv6, &off);
1532 &e->ipv6, e->comefrom, &off);
1533 if (ret != 0) 1526 if (ret != 0)
1534 goto release_matches; 1527 goto release_matches;
1535 ++j; 1528 ++j;
@@ -1623,6 +1616,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
1623 struct xt_mtchk_param mtpar; 1616 struct xt_mtchk_param mtpar;
1624 struct xt_entry_match *ematch; 1617 struct xt_entry_match *ematch;
1625 1618
1619 e->counters.pcnt = xt_percpu_counter_alloc();
1620 if (IS_ERR_VALUE(e->counters.pcnt))
1621 return -ENOMEM;
1626 j = 0; 1622 j = 0;
1627 mtpar.net = net; 1623 mtpar.net = net;
1628 mtpar.table = name; 1624 mtpar.table = name;
@@ -1647,6 +1643,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
1647 break; 1643 break;
1648 cleanup_match(ematch, net); 1644 cleanup_match(ematch, net);
1649 } 1645 }
1646
1647 xt_percpu_counter_free(e->counters.pcnt);
1648
1650 return ret; 1649 return ret;
1651} 1650}
1652 1651
@@ -1731,7 +1730,7 @@ translate_compat_table(struct net *net,
1731 newinfo->hook_entry[i] = info->hook_entry[i]; 1730 newinfo->hook_entry[i] = info->hook_entry[i];
1732 newinfo->underflow[i] = info->underflow[i]; 1731 newinfo->underflow[i] = info->underflow[i];
1733 } 1732 }
1734 entry1 = newinfo->entries[raw_smp_processor_id()]; 1733 entry1 = newinfo->entries;
1735 pos = entry1; 1734 pos = entry1;
1736 size = total_size; 1735 size = total_size;
1737 xt_entry_foreach(iter0, entry0, total_size) { 1736 xt_entry_foreach(iter0, entry0, total_size) {
@@ -1783,11 +1782,6 @@ translate_compat_table(struct net *net,
1783 return ret; 1782 return ret;
1784 } 1783 }
1785 1784
1786 /* And one copy for every other CPU */
1787 for_each_possible_cpu(i)
1788 if (newinfo->entries[i] && newinfo->entries[i] != entry1)
1789 memcpy(newinfo->entries[i], entry1, newinfo->size);
1790
1791 *pinfo = newinfo; 1785 *pinfo = newinfo;
1792 *pentry0 = entry1; 1786 *pentry0 = entry1;
1793 xt_free_table_info(info); 1787 xt_free_table_info(info);
@@ -1834,8 +1828,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
1834 if (!newinfo) 1828 if (!newinfo)
1835 return -ENOMEM; 1829 return -ENOMEM;
1836 1830
1837 /* choose the copy that is on our node/cpu */ 1831 loc_cpu_entry = newinfo->entries;
1838 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1839 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), 1832 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1840 tmp.size) != 0) { 1833 tmp.size) != 0) {
1841 ret = -EFAULT; 1834 ret = -EFAULT;
@@ -1906,7 +1899,6 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1906 void __user *pos; 1899 void __user *pos;
1907 unsigned int size; 1900 unsigned int size;
1908 int ret = 0; 1901 int ret = 0;
1909 const void *loc_cpu_entry;
1910 unsigned int i = 0; 1902 unsigned int i = 0;
1911 struct ip6t_entry *iter; 1903 struct ip6t_entry *iter;
1912 1904
@@ -1914,14 +1906,9 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1914 if (IS_ERR(counters)) 1906 if (IS_ERR(counters))
1915 return PTR_ERR(counters); 1907 return PTR_ERR(counters);
1916 1908
1917 /* choose the copy that is on our node/cpu, ...
1918 * This choice is lazy (because current thread is
1919 * allowed to migrate to another cpu)
1920 */
1921 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1922 pos = userptr; 1909 pos = userptr;
1923 size = total_size; 1910 size = total_size;
1924 xt_entry_foreach(iter, loc_cpu_entry, total_size) { 1911 xt_entry_foreach(iter, private->entries, total_size) {
1925 ret = compat_copy_entry_to_user(iter, &pos, 1912 ret = compat_copy_entry_to_user(iter, &pos,
1926 &size, counters, i++); 1913 &size, counters, i++);
1927 if (ret != 0) 1914 if (ret != 0)
@@ -2096,8 +2083,7 @@ struct xt_table *ip6t_register_table(struct net *net,
2096 goto out; 2083 goto out;
2097 } 2084 }
2098 2085
2099 /* choose the copy on our node/cpu, but dont care about preemption */ 2086 loc_cpu_entry = newinfo->entries;
2100 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
2101 memcpy(loc_cpu_entry, repl->entries, repl->size); 2087 memcpy(loc_cpu_entry, repl->entries, repl->size);
2102 2088
2103 ret = translate_table(net, newinfo, loc_cpu_entry, repl); 2089 ret = translate_table(net, newinfo, loc_cpu_entry, repl);
@@ -2127,7 +2113,7 @@ void ip6t_unregister_table(struct net *net, struct xt_table *table)
2127 private = xt_unregister_table(table); 2113 private = xt_unregister_table(table);
2128 2114
2129 /* Decrease module usage counts and free resources */ 2115 /* Decrease module usage counts and free resources */
2130 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 2116 loc_cpu_entry = private->entries;
2131 xt_entry_foreach(iter, loc_cpu_entry, private->size) 2117 xt_entry_foreach(iter, loc_cpu_entry, private->size)
2132 cleanup_entry(iter, net); 2118 cleanup_entry(iter, net);
2133 if (private->number > private->initial_entries) 2119 if (private->number > private->initial_entries)
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 85892af57364..928a0fb0b744 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -8,9 +8,11 @@
8#include <net/ip6_fib.h> 8#include <net/ip6_fib.h>
9#include <net/addrconf.h> 9#include <net/addrconf.h>
10#include <net/secure_seq.h> 10#include <net/secure_seq.h>
11#include <linux/netfilter.h>
11 12
12static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, 13static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
13 struct in6_addr *dst, struct in6_addr *src) 14 const struct in6_addr *dst,
15 const struct in6_addr *src)
14{ 16{
15 u32 hash, id; 17 u32 hash, id;
16 18
@@ -60,17 +62,17 @@ void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
60} 62}
61EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); 63EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
62 64
63void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr, 65__be32 ipv6_select_ident(struct net *net,
64 struct rt6_info *rt) 66 const struct in6_addr *daddr,
67 const struct in6_addr *saddr)
65{ 68{
66 static u32 ip6_idents_hashrnd __read_mostly; 69 static u32 ip6_idents_hashrnd __read_mostly;
67 u32 id; 70 u32 id;
68 71
69 net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); 72 net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
70 73
71 id = __ipv6_select_ident(net, ip6_idents_hashrnd, &rt->rt6i_dst.addr, 74 id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr);
72 &rt->rt6i_src.addr); 75 return htonl(id);
73 fhdr->identification = htonl(id);
74} 76}
75EXPORT_SYMBOL(ipv6_select_ident); 77EXPORT_SYMBOL(ipv6_select_ident);
76 78
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8072bd4139b7..ca4700cb26c4 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -865,6 +865,9 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
865 fl6.flowi6_oif = np->ucast_oif; 865 fl6.flowi6_oif = np->ucast_oif;
866 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); 866 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
867 867
868 if (inet->hdrincl)
869 fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH;
870
868 dst = ip6_dst_lookup_flow(sk, &fl6, final_p); 871 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
869 if (IS_ERR(dst)) { 872 if (IS_ERR(dst)) {
870 err = PTR_ERR(dst); 873 err = PTR_ERR(dst);
@@ -1324,13 +1327,7 @@ static struct inet_protosw rawv6_protosw = {
1324 1327
1325int __init rawv6_init(void) 1328int __init rawv6_init(void)
1326{ 1329{
1327 int ret; 1330 return inet6_register_protosw(&rawv6_protosw);
1328
1329 ret = inet6_register_protosw(&rawv6_protosw);
1330 if (ret)
1331 goto out;
1332out:
1333 return ret;
1334} 1331}
1335 1332
1336void rawv6_exit(void) 1333void rawv6_exit(void)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c73ae5039e46..6090969937f8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -72,8 +72,7 @@ enum rt6_nud_state {
72 RT6_NUD_SUCCEED = 1 72 RT6_NUD_SUCCEED = 1
73}; 73};
74 74
75static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, 75static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
76 const struct in6_addr *dest);
77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78static unsigned int ip6_default_advmss(const struct dst_entry *dst); 77static unsigned int ip6_default_advmss(const struct dst_entry *dst);
79static unsigned int ip6_mtu(const struct dst_entry *dst); 78static unsigned int ip6_mtu(const struct dst_entry *dst);
@@ -92,6 +91,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 struct sk_buff *skb, u32 mtu); 91 struct sk_buff *skb, u32 mtu);
93static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, 92static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94 struct sk_buff *skb); 93 struct sk_buff *skb);
94static void rt6_dst_from_metrics_check(struct rt6_info *rt);
95static int rt6_score_route(struct rt6_info *rt, int oif, int strict); 95static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
96 96
97#ifdef CONFIG_IPV6_ROUTE_INFO 97#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -104,65 +104,82 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
104 const struct in6_addr *gwaddr, int ifindex); 104 const struct in6_addr *gwaddr, int ifindex);
105#endif 105#endif
106 106
107static void rt6_bind_peer(struct rt6_info *rt, int create) 107struct uncached_list {
108 spinlock_t lock;
109 struct list_head head;
110};
111
112static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
113
114static void rt6_uncached_list_add(struct rt6_info *rt)
108{ 115{
109 struct inet_peer_base *base; 116 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
110 struct inet_peer *peer;
111 117
112 base = inetpeer_base_ptr(rt->_rt6i_peer); 118 rt->dst.flags |= DST_NOCACHE;
113 if (!base) 119 rt->rt6i_uncached_list = ul;
114 return; 120
121 spin_lock_bh(&ul->lock);
122 list_add_tail(&rt->rt6i_uncached, &ul->head);
123 spin_unlock_bh(&ul->lock);
124}
115 125
116 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); 126static void rt6_uncached_list_del(struct rt6_info *rt)
117 if (peer) { 127{
118 if (!rt6_set_peer(rt, peer)) 128 if (!list_empty(&rt->rt6i_uncached)) {
119 inet_putpeer(peer); 129 struct uncached_list *ul = rt->rt6i_uncached_list;
130
131 spin_lock_bh(&ul->lock);
132 list_del(&rt->rt6i_uncached);
133 spin_unlock_bh(&ul->lock);
120 } 134 }
121} 135}
122 136
123static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) 137static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
124{ 138{
125 if (rt6_has_peer(rt)) 139 struct net_device *loopback_dev = net->loopback_dev;
126 return rt6_peer_ptr(rt); 140 int cpu;
141
142 for_each_possible_cpu(cpu) {
143 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
144 struct rt6_info *rt;
145
146 spin_lock_bh(&ul->lock);
147 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
148 struct inet6_dev *rt_idev = rt->rt6i_idev;
149 struct net_device *rt_dev = rt->dst.dev;
150
151 if (rt_idev && (rt_idev->dev == dev || !dev) &&
152 rt_idev->dev != loopback_dev) {
153 rt->rt6i_idev = in6_dev_get(loopback_dev);
154 in6_dev_put(rt_idev);
155 }
127 156
128 rt6_bind_peer(rt, create); 157 if (rt_dev && (rt_dev == dev || !dev) &&
129 return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL); 158 rt_dev != loopback_dev) {
159 rt->dst.dev = loopback_dev;
160 dev_hold(rt->dst.dev);
161 dev_put(rt_dev);
162 }
163 }
164 spin_unlock_bh(&ul->lock);
165 }
130} 166}
131 167
132static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) 168static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
133{ 169{
134 return __rt6_get_peer(rt, 1); 170 return dst_metrics_write_ptr(rt->dst.from);
135} 171}
136 172
137static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) 173static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
138{ 174{
139 struct rt6_info *rt = (struct rt6_info *) dst; 175 struct rt6_info *rt = (struct rt6_info *)dst;
140 struct inet_peer *peer;
141 u32 *p = NULL;
142 176
143 if (!(rt->dst.flags & DST_HOST)) 177 if (rt->rt6i_flags & RTF_PCPU)
178 return rt6_pcpu_cow_metrics(rt);
179 else if (rt->rt6i_flags & RTF_CACHE)
180 return NULL;
181 else
144 return dst_cow_metrics_generic(dst, old); 182 return dst_cow_metrics_generic(dst, old);
145
146 peer = rt6_get_peer_create(rt);
147 if (peer) {
148 u32 *old_p = __DST_METRICS_PTR(old);
149 unsigned long prev, new;
150
151 p = peer->metrics;
152 if (inet_metrics_new(peer) ||
153 (old & DST_METRICS_FORCE_OVERWRITE))
154 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
155
156 new = (unsigned long) p;
157 prev = cmpxchg(&dst->_metrics, old, new);
158
159 if (prev != old) {
160 p = __DST_METRICS_PTR(prev);
161 if (prev & DST_METRICS_READ_ONLY)
162 p = NULL;
163 }
164 }
165 return p;
166} 183}
167 184
168static inline const void *choose_neigh_daddr(struct rt6_info *rt, 185static inline const void *choose_neigh_daddr(struct rt6_info *rt,
@@ -299,10 +316,10 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
299#endif 316#endif
300 317
301/* allocate dst with ip6_dst_ops */ 318/* allocate dst with ip6_dst_ops */
302static inline struct rt6_info *ip6_dst_alloc(struct net *net, 319static struct rt6_info *__ip6_dst_alloc(struct net *net,
303 struct net_device *dev, 320 struct net_device *dev,
304 int flags, 321 int flags,
305 struct fib6_table *table) 322 struct fib6_table *table)
306{ 323{
307 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 324 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
308 0, DST_OBSOLETE_FORCE_CHK, flags); 325 0, DST_OBSOLETE_FORCE_CHK, flags);
@@ -311,21 +328,51 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
311 struct dst_entry *dst = &rt->dst; 328 struct dst_entry *dst = &rt->dst;
312 329
313 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); 330 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
314 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
315 INIT_LIST_HEAD(&rt->rt6i_siblings); 331 INIT_LIST_HEAD(&rt->rt6i_siblings);
332 INIT_LIST_HEAD(&rt->rt6i_uncached);
333 }
334 return rt;
335}
336
337static struct rt6_info *ip6_dst_alloc(struct net *net,
338 struct net_device *dev,
339 int flags,
340 struct fib6_table *table)
341{
342 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags, table);
343
344 if (rt) {
345 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
346 if (rt->rt6i_pcpu) {
347 int cpu;
348
349 for_each_possible_cpu(cpu) {
350 struct rt6_info **p;
351
352 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
353 /* no one shares rt */
354 *p = NULL;
355 }
356 } else {
357 dst_destroy((struct dst_entry *)rt);
358 return NULL;
359 }
316 } 360 }
361
317 return rt; 362 return rt;
318} 363}
319 364
320static void ip6_dst_destroy(struct dst_entry *dst) 365static void ip6_dst_destroy(struct dst_entry *dst)
321{ 366{
322 struct rt6_info *rt = (struct rt6_info *)dst; 367 struct rt6_info *rt = (struct rt6_info *)dst;
323 struct inet6_dev *idev = rt->rt6i_idev;
324 struct dst_entry *from = dst->from; 368 struct dst_entry *from = dst->from;
369 struct inet6_dev *idev;
325 370
326 if (!(rt->dst.flags & DST_HOST)) 371 dst_destroy_metrics_generic(dst);
327 dst_destroy_metrics_generic(dst); 372 free_percpu(rt->rt6i_pcpu);
373 rt6_uncached_list_del(rt);
328 374
375 idev = rt->rt6i_idev;
329 if (idev) { 376 if (idev) {
330 rt->rt6i_idev = NULL; 377 rt->rt6i_idev = NULL;
331 in6_dev_put(idev); 378 in6_dev_put(idev);
@@ -333,11 +380,6 @@ static void ip6_dst_destroy(struct dst_entry *dst)
333 380
334 dst->from = NULL; 381 dst->from = NULL;
335 dst_release(from); 382 dst_release(from);
336
337 if (rt6_has_peer(rt)) {
338 struct inet_peer *peer = rt6_peer_ptr(rt);
339 inet_putpeer(peer);
340 }
341} 383}
342 384
343static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 385static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -652,15 +694,33 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
652 u32 metric, int oif, int strict, 694 u32 metric, int oif, int strict,
653 bool *do_rr) 695 bool *do_rr)
654{ 696{
655 struct rt6_info *rt, *match; 697 struct rt6_info *rt, *match, *cont;
656 int mpri = -1; 698 int mpri = -1;
657 699
658 match = NULL; 700 match = NULL;
659 for (rt = rr_head; rt && rt->rt6i_metric == metric; 701 cont = NULL;
660 rt = rt->dst.rt6_next) 702 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
703 if (rt->rt6i_metric != metric) {
704 cont = rt;
705 break;
706 }
707
661 match = find_match(rt, oif, strict, &mpri, match, do_rr); 708 match = find_match(rt, oif, strict, &mpri, match, do_rr);
662 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 709 }
663 rt = rt->dst.rt6_next) 710
711 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
712 if (rt->rt6i_metric != metric) {
713 cont = rt;
714 break;
715 }
716
717 match = find_match(rt, oif, strict, &mpri, match, do_rr);
718 }
719
720 if (match || !cont)
721 return match;
722
723 for (rt = cont; rt; rt = rt->dst.rt6_next)
664 match = find_match(rt, oif, strict, &mpri, match, do_rr); 724 match = find_match(rt, oif, strict, &mpri, match, do_rr);
665 725
666 return match; 726 return match;
@@ -694,6 +754,11 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
694 return match ? match : net->ipv6.ip6_null_entry; 754 return match ? match : net->ipv6.ip6_null_entry;
695} 755}
696 756
757static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
758{
759 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
760}
761
697#ifdef CONFIG_IPV6_ROUTE_INFO 762#ifdef CONFIG_IPV6_ROUTE_INFO
698int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 763int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
699 const struct in6_addr *gwaddr) 764 const struct in6_addr *gwaddr)
@@ -872,9 +937,9 @@ int ip6_ins_rt(struct rt6_info *rt)
872 return __ip6_ins_rt(rt, &info, &mxc); 937 return __ip6_ins_rt(rt, &info, &mxc);
873} 938}
874 939
875static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, 940static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
876 const struct in6_addr *daddr, 941 const struct in6_addr *daddr,
877 const struct in6_addr *saddr) 942 const struct in6_addr *saddr)
878{ 943{
879 struct rt6_info *rt; 944 struct rt6_info *rt;
880 945
@@ -882,15 +947,26 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
882 * Clone the route. 947 * Clone the route.
883 */ 948 */
884 949
885 rt = ip6_rt_copy(ort, daddr); 950 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
951 ort = (struct rt6_info *)ort->dst.from;
886 952
887 if (rt) { 953 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev,
954 0, ort->rt6i_table);
955
956 if (!rt)
957 return NULL;
958
959 ip6_rt_copy_init(rt, ort);
960 rt->rt6i_flags |= RTF_CACHE;
961 rt->rt6i_metric = 0;
962 rt->dst.flags |= DST_HOST;
963 rt->rt6i_dst.addr = *daddr;
964 rt->rt6i_dst.plen = 128;
965
966 if (!rt6_is_gw_or_nonexthop(ort)) {
888 if (ort->rt6i_dst.plen != 128 && 967 if (ort->rt6i_dst.plen != 128 &&
889 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) 968 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
890 rt->rt6i_flags |= RTF_ANYCAST; 969 rt->rt6i_flags |= RTF_ANYCAST;
891
892 rt->rt6i_flags |= RTF_CACHE;
893
894#ifdef CONFIG_IPV6_SUBTREES 970#ifdef CONFIG_IPV6_SUBTREES
895 if (rt->rt6i_src.plen && saddr) { 971 if (rt->rt6i_src.plen && saddr) {
896 rt->rt6i_src.addr = *saddr; 972 rt->rt6i_src.addr = *saddr;
@@ -902,30 +978,65 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
902 return rt; 978 return rt;
903} 979}
904 980
905static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, 981static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
906 const struct in6_addr *daddr)
907{ 982{
908 struct rt6_info *rt = ip6_rt_copy(ort, daddr); 983 struct rt6_info *pcpu_rt;
909 984
910 if (rt) 985 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
911 rt->rt6i_flags |= RTF_CACHE; 986 rt->dst.dev, rt->dst.flags,
912 return rt; 987 rt->rt6i_table);
988
989 if (!pcpu_rt)
990 return NULL;
991 ip6_rt_copy_init(pcpu_rt, rt);
992 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
993 pcpu_rt->rt6i_flags |= RTF_PCPU;
994 return pcpu_rt;
995}
996
997/* It should be called with read_lock_bh(&tb6_lock) acquired */
998static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
999{
1000 struct rt6_info *pcpu_rt, *prev, **p;
1001
1002 p = this_cpu_ptr(rt->rt6i_pcpu);
1003 pcpu_rt = *p;
1004
1005 if (pcpu_rt)
1006 goto done;
1007
1008 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1009 if (!pcpu_rt) {
1010 struct net *net = dev_net(rt->dst.dev);
1011
1012 pcpu_rt = net->ipv6.ip6_null_entry;
1013 goto done;
1014 }
1015
1016 prev = cmpxchg(p, NULL, pcpu_rt);
1017 if (prev) {
1018 /* If someone did it before us, return prev instead */
1019 dst_destroy(&pcpu_rt->dst);
1020 pcpu_rt = prev;
1021 }
1022
1023done:
1024 dst_hold(&pcpu_rt->dst);
1025 rt6_dst_from_metrics_check(pcpu_rt);
1026 return pcpu_rt;
913} 1027}
914 1028
915static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 1029static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
916 struct flowi6 *fl6, int flags) 1030 struct flowi6 *fl6, int flags)
917{ 1031{
918 struct fib6_node *fn, *saved_fn; 1032 struct fib6_node *fn, *saved_fn;
919 struct rt6_info *rt, *nrt; 1033 struct rt6_info *rt;
920 int strict = 0; 1034 int strict = 0;
921 int attempts = 3;
922 int err;
923 1035
924 strict |= flags & RT6_LOOKUP_F_IFACE; 1036 strict |= flags & RT6_LOOKUP_F_IFACE;
925 if (net->ipv6.devconf_all->forwarding == 0) 1037 if (net->ipv6.devconf_all->forwarding == 0)
926 strict |= RT6_LOOKUP_F_REACHABLE; 1038 strict |= RT6_LOOKUP_F_REACHABLE;
927 1039
928redo_fib6_lookup_lock:
929 read_lock_bh(&table->tb6_lock); 1040 read_lock_bh(&table->tb6_lock);
930 1041
931 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1042 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
@@ -944,51 +1055,52 @@ redo_rt6_select:
944 strict &= ~RT6_LOOKUP_F_REACHABLE; 1055 strict &= ~RT6_LOOKUP_F_REACHABLE;
945 fn = saved_fn; 1056 fn = saved_fn;
946 goto redo_rt6_select; 1057 goto redo_rt6_select;
947 } else {
948 dst_hold(&rt->dst);
949 read_unlock_bh(&table->tb6_lock);
950 goto out2;
951 } 1058 }
952 } 1059 }
953 1060
954 dst_hold(&rt->dst);
955 read_unlock_bh(&table->tb6_lock);
956 1061
957 if (rt->rt6i_flags & RTF_CACHE) 1062 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
958 goto out2; 1063 dst_use(&rt->dst, jiffies);
1064 read_unlock_bh(&table->tb6_lock);
959 1065
960 if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY))) 1066 rt6_dst_from_metrics_check(rt);
961 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); 1067 return rt;
962 else if (!(rt->dst.flags & DST_HOST)) 1068 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
963 nrt = rt6_alloc_clone(rt, &fl6->daddr); 1069 !(rt->rt6i_flags & RTF_GATEWAY))) {
964 else 1070 /* Create a RTF_CACHE clone which will not be
965 goto out2; 1071 * owned by the fib6 tree. It is for the special case where
1072 * the daddr in the skb during the neighbor look-up is different
1073 * from the fl6->daddr used to look-up route here.
1074 */
966 1075
967 ip6_rt_put(rt); 1076 struct rt6_info *uncached_rt;
968 rt = nrt ? : net->ipv6.ip6_null_entry;
969 1077
970 dst_hold(&rt->dst); 1078 dst_use(&rt->dst, jiffies);
971 if (nrt) { 1079 read_unlock_bh(&table->tb6_lock);
972 err = ip6_ins_rt(nrt);
973 if (!err)
974 goto out2;
975 }
976 1080
977 if (--attempts <= 0) 1081 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
978 goto out2; 1082 dst_release(&rt->dst);
979 1083
980 /* 1084 if (uncached_rt)
981 * Race condition! In the gap, when table->tb6_lock was 1085 rt6_uncached_list_add(uncached_rt);
982 * released someone could insert this route. Relookup. 1086 else
983 */ 1087 uncached_rt = net->ipv6.ip6_null_entry;
984 ip6_rt_put(rt);
985 goto redo_fib6_lookup_lock;
986 1088
987out2: 1089 dst_hold(&uncached_rt->dst);
988 rt->dst.lastuse = jiffies; 1090 return uncached_rt;
989 rt->dst.__use++;
990 1091
991 return rt; 1092 } else {
1093 /* Get a percpu copy */
1094
1095 struct rt6_info *pcpu_rt;
1096
1097 rt->dst.lastuse = jiffies;
1098 rt->dst.__use++;
1099 pcpu_rt = rt6_get_pcpu_route(rt);
1100 read_unlock_bh(&table->tb6_lock);
1101
1102 return pcpu_rt;
1103 }
992} 1104}
993 1105
994static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 1106static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
@@ -1059,7 +1171,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
1059 new = &rt->dst; 1171 new = &rt->dst;
1060 1172
1061 memset(new + 1, 0, sizeof(*rt) - sizeof(*new)); 1173 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1062 rt6_init_peer(rt, net->ipv6.peers);
1063 1174
1064 new->__use = 1; 1175 new->__use = 1;
1065 new->input = dst_discard; 1176 new->input = dst_discard;
@@ -1093,6 +1204,33 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
1093 * Destination cache support functions 1204 * Destination cache support functions
1094 */ 1205 */
1095 1206
1207static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1208{
1209 if (rt->dst.from &&
1210 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1211 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1212}
1213
1214static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1215{
1216 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1217 return NULL;
1218
1219 if (rt6_check_expired(rt))
1220 return NULL;
1221
1222 return &rt->dst;
1223}
1224
1225static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1226{
1227 if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1228 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1229 return &rt->dst;
1230 else
1231 return NULL;
1232}
1233
1096static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 1234static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1097{ 1235{
1098 struct rt6_info *rt; 1236 struct rt6_info *rt;
@@ -1103,13 +1241,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1103 * DST_OBSOLETE_FORCE_CHK which forces validation calls down 1241 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1104 * into this function always. 1242 * into this function always.
1105 */ 1243 */
1106 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1107 return NULL;
1108 1244
1109 if (rt6_check_expired(rt)) 1245 rt6_dst_from_metrics_check(rt);
1110 return NULL;
1111 1246
1112 return dst; 1247 if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
1248 return rt6_dst_from_check(rt, cookie);
1249 else
1250 return rt6_check(rt, cookie);
1113} 1251}
1114 1252
1115static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 1253static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
@@ -1148,24 +1286,63 @@ static void ip6_link_failure(struct sk_buff *skb)
1148 } 1286 }
1149} 1287}
1150 1288
1151static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 1289static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1152 struct sk_buff *skb, u32 mtu) 1290{
1291 struct net *net = dev_net(rt->dst.dev);
1292
1293 rt->rt6i_flags |= RTF_MODIFIED;
1294 rt->rt6i_pmtu = mtu;
1295 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1296}
1297
1298static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1299 const struct ipv6hdr *iph, u32 mtu)
1153{ 1300{
1154 struct rt6_info *rt6 = (struct rt6_info *)dst; 1301 struct rt6_info *rt6 = (struct rt6_info *)dst;
1155 1302
1156 dst_confirm(dst); 1303 if (rt6->rt6i_flags & RTF_LOCAL)
1157 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 1304 return;
1158 struct net *net = dev_net(dst->dev);
1159 1305
1160 rt6->rt6i_flags |= RTF_MODIFIED; 1306 dst_confirm(dst);
1161 if (mtu < IPV6_MIN_MTU) 1307 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1162 mtu = IPV6_MIN_MTU; 1308 if (mtu >= dst_mtu(dst))
1309 return;
1163 1310
1164 dst_metric_set(dst, RTAX_MTU, mtu); 1311 if (rt6->rt6i_flags & RTF_CACHE) {
1165 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); 1312 rt6_do_update_pmtu(rt6, mtu);
1313 } else {
1314 const struct in6_addr *daddr, *saddr;
1315 struct rt6_info *nrt6;
1316
1317 if (iph) {
1318 daddr = &iph->daddr;
1319 saddr = &iph->saddr;
1320 } else if (sk) {
1321 daddr = &sk->sk_v6_daddr;
1322 saddr = &inet6_sk(sk)->saddr;
1323 } else {
1324 return;
1325 }
1326 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1327 if (nrt6) {
1328 rt6_do_update_pmtu(nrt6, mtu);
1329
1330 /* ip6_ins_rt(nrt6) will bump the
1331 * rt6->rt6i_node->fn_sernum
1332 * which will fail the next rt6_check() and
1333 * invalidate the sk->sk_dst_cache.
1334 */
1335 ip6_ins_rt(nrt6);
1336 }
1166 } 1337 }
1167} 1338}
1168 1339
1340static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1341 struct sk_buff *skb, u32 mtu)
1342{
1343 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1344}
1345
1169void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, 1346void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1170 int oif, u32 mark) 1347 int oif, u32 mark)
1171{ 1348{
@@ -1182,7 +1359,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1182 1359
1183 dst = ip6_route_output(net, NULL, &fl6); 1360 dst = ip6_route_output(net, NULL, &fl6);
1184 if (!dst->error) 1361 if (!dst->error)
1185 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); 1362 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1186 dst_release(dst); 1363 dst_release(dst);
1187} 1364}
1188EXPORT_SYMBOL_GPL(ip6_update_pmtu); 1365EXPORT_SYMBOL_GPL(ip6_update_pmtu);
@@ -1341,12 +1518,17 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1341 1518
1342static unsigned int ip6_mtu(const struct dst_entry *dst) 1519static unsigned int ip6_mtu(const struct dst_entry *dst)
1343{ 1520{
1521 const struct rt6_info *rt = (const struct rt6_info *)dst;
1522 unsigned int mtu = rt->rt6i_pmtu;
1344 struct inet6_dev *idev; 1523 struct inet6_dev *idev;
1345 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1346 1524
1347 if (mtu) 1525 if (mtu)
1348 goto out; 1526 goto out;
1349 1527
1528 mtu = dst_metric_raw(dst, RTAX_MTU);
1529 if (mtu)
1530 goto out;
1531
1350 mtu = IPV6_MIN_MTU; 1532 mtu = IPV6_MIN_MTU;
1351 1533
1352 rcu_read_lock(); 1534 rcu_read_lock();
@@ -1590,10 +1772,8 @@ int ip6_route_add(struct fib6_config *cfg)
1590 1772
1591 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1773 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1592 rt->rt6i_dst.plen = cfg->fc_dst_len; 1774 rt->rt6i_dst.plen = cfg->fc_dst_len;
1593 if (rt->rt6i_dst.plen == 128) { 1775 if (rt->rt6i_dst.plen == 128)
1594 rt->dst.flags |= DST_HOST; 1776 rt->dst.flags |= DST_HOST;
1595 dst_metrics_set_force_overwrite(&rt->dst);
1596 }
1597 1777
1598#ifdef CONFIG_IPV6_SUBTREES 1778#ifdef CONFIG_IPV6_SUBTREES
1599 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1779 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
@@ -1651,6 +1831,16 @@ int ip6_route_add(struct fib6_config *cfg)
1651 int gwa_type; 1831 int gwa_type;
1652 1832
1653 gw_addr = &cfg->fc_gateway; 1833 gw_addr = &cfg->fc_gateway;
1834
1835 /* if gw_addr is local we will fail to detect this in case
1836 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1837 * will return already-added prefix route via interface that
1838 * prefix route was assigned to, which might be non-loopback.
1839 */
1840 err = -EINVAL;
1841 if (ipv6_chk_addr_and_flags(net, gw_addr, NULL, 0, 0))
1842 goto out;
1843
1654 rt->rt6i_gateway = *gw_addr; 1844 rt->rt6i_gateway = *gw_addr;
1655 gwa_type = ipv6_addr_type(gw_addr); 1845 gwa_type = ipv6_addr_type(gw_addr);
1656 1846
@@ -1664,7 +1854,6 @@ int ip6_route_add(struct fib6_config *cfg)
1664 (SIT, PtP, NBMA NOARP links) it is handy to allow 1854 (SIT, PtP, NBMA NOARP links) it is handy to allow
1665 some exceptions. --ANK 1855 some exceptions. --ANK
1666 */ 1856 */
1667 err = -EINVAL;
1668 if (!(gwa_type & IPV6_ADDR_UNICAST)) 1857 if (!(gwa_type & IPV6_ADDR_UNICAST))
1669 goto out; 1858 goto out;
1670 1859
@@ -1785,6 +1974,9 @@ static int ip6_route_del(struct fib6_config *cfg)
1785 1974
1786 if (fn) { 1975 if (fn) {
1787 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1976 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1977 if ((rt->rt6i_flags & RTF_CACHE) &&
1978 !(cfg->fc_flags & RTF_CACHE))
1979 continue;
1788 if (cfg->fc_ifindex && 1980 if (cfg->fc_ifindex &&
1789 (!rt->dst.dev || 1981 (!rt->dst.dev ||
1790 rt->dst.dev->ifindex != cfg->fc_ifindex)) 1982 rt->dst.dev->ifindex != cfg->fc_ifindex))
@@ -1894,7 +2086,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
1894 NEIGH_UPDATE_F_ISROUTER)) 2086 NEIGH_UPDATE_F_ISROUTER))
1895 ); 2087 );
1896 2088
1897 nrt = ip6_rt_copy(rt, &msg->dest); 2089 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
1898 if (!nrt) 2090 if (!nrt)
1899 goto out; 2091 goto out;
1900 2092
@@ -1926,42 +2118,35 @@ out:
1926 * Misc support functions 2118 * Misc support functions
1927 */ 2119 */
1928 2120
1929static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, 2121static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
1930 const struct in6_addr *dest)
1931{ 2122{
1932 struct net *net = dev_net(ort->dst.dev); 2123 BUG_ON(from->dst.from);
1933 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1934 ort->rt6i_table);
1935 2124
1936 if (rt) { 2125 rt->rt6i_flags &= ~RTF_EXPIRES;
1937 rt->dst.input = ort->dst.input; 2126 dst_hold(&from->dst);
1938 rt->dst.output = ort->dst.output; 2127 rt->dst.from = &from->dst;
1939 rt->dst.flags |= DST_HOST; 2128 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
1940 2129}
1941 rt->rt6i_dst.addr = *dest;
1942 rt->rt6i_dst.plen = 128;
1943 dst_copy_metrics(&rt->dst, &ort->dst);
1944 rt->dst.error = ort->dst.error;
1945 rt->rt6i_idev = ort->rt6i_idev;
1946 if (rt->rt6i_idev)
1947 in6_dev_hold(rt->rt6i_idev);
1948 rt->dst.lastuse = jiffies;
1949
1950 if (ort->rt6i_flags & RTF_GATEWAY)
1951 rt->rt6i_gateway = ort->rt6i_gateway;
1952 else
1953 rt->rt6i_gateway = *dest;
1954 rt->rt6i_flags = ort->rt6i_flags;
1955 rt6_set_from(rt, ort);
1956 rt->rt6i_metric = 0;
1957 2130
2131static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2132{
2133 rt->dst.input = ort->dst.input;
2134 rt->dst.output = ort->dst.output;
2135 rt->rt6i_dst = ort->rt6i_dst;
2136 rt->dst.error = ort->dst.error;
2137 rt->rt6i_idev = ort->rt6i_idev;
2138 if (rt->rt6i_idev)
2139 in6_dev_hold(rt->rt6i_idev);
2140 rt->dst.lastuse = jiffies;
2141 rt->rt6i_gateway = ort->rt6i_gateway;
2142 rt->rt6i_flags = ort->rt6i_flags;
2143 rt6_set_from(rt, ort);
2144 rt->rt6i_metric = ort->rt6i_metric;
1958#ifdef CONFIG_IPV6_SUBTREES 2145#ifdef CONFIG_IPV6_SUBTREES
1959 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 2146 rt->rt6i_src = ort->rt6i_src;
1960#endif 2147#endif
1961 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key)); 2148 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
1962 rt->rt6i_table = ort->rt6i_table; 2149 rt->rt6i_table = ort->rt6i_table;
1963 }
1964 return rt;
1965} 2150}
1966 2151
1967#ifdef CONFIG_IPV6_ROUTE_INFO 2152#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -2336,6 +2521,7 @@ void rt6_ifdown(struct net *net, struct net_device *dev)
2336 2521
2337 fib6_clean_all(net, fib6_ifdown, &adn); 2522 fib6_clean_all(net, fib6_ifdown, &adn);
2338 icmp6_clean_all(fib6_ifdown, &adn); 2523 icmp6_clean_all(fib6_ifdown, &adn);
2524 rt6_uncached_list_flush_dev(net, dev);
2339} 2525}
2340 2526
2341struct rt6_mtu_change_arg { 2527struct rt6_mtu_change_arg {
@@ -2373,11 +2559,20 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2373 PMTU discouvery. 2559 PMTU discouvery.
2374 */ 2560 */
2375 if (rt->dst.dev == arg->dev && 2561 if (rt->dst.dev == arg->dev &&
2376 !dst_metric_locked(&rt->dst, RTAX_MTU) && 2562 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2377 (dst_mtu(&rt->dst) >= arg->mtu || 2563 if (rt->rt6i_flags & RTF_CACHE) {
2378 (dst_mtu(&rt->dst) < arg->mtu && 2564 /* For RTF_CACHE with rt6i_pmtu == 0
2379 dst_mtu(&rt->dst) == idev->cnf.mtu6))) { 2565 * (i.e. a redirected route),
2380 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 2566 * the metrics of its rt->dst.from has already
2567 * been updated.
2568 */
2569 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2570 rt->rt6i_pmtu = arg->mtu;
2571 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2572 (dst_mtu(&rt->dst) < arg->mtu &&
2573 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2574 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2575 }
2381 } 2576 }
2382 return 0; 2577 return 0;
2383} 2578}
@@ -2434,6 +2629,9 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2434 if (rtm->rtm_type == RTN_LOCAL) 2629 if (rtm->rtm_type == RTN_LOCAL)
2435 cfg->fc_flags |= RTF_LOCAL; 2630 cfg->fc_flags |= RTF_LOCAL;
2436 2631
2632 if (rtm->rtm_flags & RTM_F_CLONED)
2633 cfg->fc_flags |= RTF_CACHE;
2634
2437 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; 2635 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2438 cfg->fc_nlinfo.nlh = nlh; 2636 cfg->fc_nlinfo.nlh = nlh;
2439 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2637 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
@@ -2608,6 +2806,7 @@ static int rt6_fill_node(struct net *net,
2608 int iif, int type, u32 portid, u32 seq, 2806 int iif, int type, u32 portid, u32 seq,
2609 int prefix, int nowait, unsigned int flags) 2807 int prefix, int nowait, unsigned int flags)
2610{ 2808{
2809 u32 metrics[RTAX_MAX];
2611 struct rtmsg *rtm; 2810 struct rtmsg *rtm;
2612 struct nlmsghdr *nlh; 2811 struct nlmsghdr *nlh;
2613 long expires; 2812 long expires;
@@ -2721,7 +2920,10 @@ static int rt6_fill_node(struct net *net,
2721 goto nla_put_failure; 2920 goto nla_put_failure;
2722 } 2921 }
2723 2922
2724 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2923 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2924 if (rt->rt6i_pmtu)
2925 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
2926 if (rtnetlink_put_metrics(skb, metrics) < 0)
2725 goto nla_put_failure; 2927 goto nla_put_failure;
2726 2928
2727 if (rt->rt6i_flags & RTF_GATEWAY) { 2929 if (rt->rt6i_flags & RTF_GATEWAY) {
@@ -3216,6 +3418,7 @@ static struct notifier_block ip6_route_dev_notifier = {
3216int __init ip6_route_init(void) 3418int __init ip6_route_init(void)
3217{ 3419{
3218 int ret; 3420 int ret;
3421 int cpu;
3219 3422
3220 ret = -ENOMEM; 3423 ret = -ENOMEM;
3221 ip6_dst_ops_template.kmem_cachep = 3424 ip6_dst_ops_template.kmem_cachep =
@@ -3275,6 +3478,13 @@ int __init ip6_route_init(void)
3275 if (ret) 3478 if (ret)
3276 goto out_register_late_subsys; 3479 goto out_register_late_subsys;
3277 3480
3481 for_each_possible_cpu(cpu) {
3482 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3483
3484 INIT_LIST_HEAD(&ul->head);
3485 spin_lock_init(&ul->lock);
3486 }
3487
3278out: 3488out:
3279 return ret; 3489 return ret;
3280 3490
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 21bc2eb53c57..0909f4e0d53c 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -41,23 +41,6 @@ static __u16 const msstab[] = {
41 9000 - 60, 41 9000 - 60,
42}; 42};
43 43
44static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
45 struct request_sock *req,
46 struct dst_entry *dst)
47{
48 struct inet_connection_sock *icsk = inet_csk(sk);
49 struct sock *child;
50
51 child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
52 if (child) {
53 atomic_set(&req->rsk_refcnt, 1);
54 inet_csk_reqsk_queue_add(sk, req, child);
55 } else {
56 reqsk_free(req);
57 }
58 return child;
59}
60
61static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], 44static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
62 ipv6_cookie_scratch); 45 ipv6_cookie_scratch);
63 46
@@ -264,7 +247,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
264 ireq->rcv_wscale = rcv_wscale; 247 ireq->rcv_wscale = rcv_wscale;
265 ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst); 248 ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst);
266 249
267 ret = get_cookie_sock(sk, skb, req, dst); 250 ret = tcp_get_cookie_sock(sk, skb, req, dst);
268out: 251out:
269 return ret; 252 return ret;
270out_free: 253out_free:
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index abcc79f649b3..4e705add4f18 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -68,6 +68,13 @@ static struct ctl_table ipv6_table_template[] = {
68 .mode = 0644, 68 .mode = 0644,
69 .proc_handler = proc_dointvec_jiffies, 69 .proc_handler = proc_dointvec_jiffies,
70 }, 70 },
71 {
72 .procname = "flowlabel_state_ranges",
73 .data = &init_net.ipv6.sysctl.flowlabel_state_ranges,
74 .maxlen = sizeof(int),
75 .mode = 0644,
76 .proc_handler = proc_dointvec
77 },
71 { } 78 { }
72}; 79};
73 80
@@ -109,6 +116,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
109 ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect; 116 ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect;
110 ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries; 117 ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries;
111 ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay; 118 ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay;
119 ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges;
112 120
113 ipv6_route_table = ipv6_route_sysctl_init(net); 121 ipv6_route_table = ipv6_route_sysctl_init(net);
114 if (!ipv6_route_table) 122 if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3adffb300238..6748c4277aff 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -99,8 +99,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
99 dst_hold(dst); 99 dst_hold(dst);
100 sk->sk_rx_dst = dst; 100 sk->sk_rx_dst = dst;
101 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; 101 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
102 if (rt->rt6i_node) 102 inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
103 inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
104 } 103 }
105} 104}
106 105
@@ -121,7 +120,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
121 struct ipv6_pinfo *np = inet6_sk(sk); 120 struct ipv6_pinfo *np = inet6_sk(sk);
122 struct tcp_sock *tp = tcp_sk(sk); 121 struct tcp_sock *tp = tcp_sk(sk);
123 struct in6_addr *saddr = NULL, *final_p, final; 122 struct in6_addr *saddr = NULL, *final_p, final;
124 struct rt6_info *rt;
125 struct flowi6 fl6; 123 struct flowi6 fl6;
126 struct dst_entry *dst; 124 struct dst_entry *dst;
127 int addr_type; 125 int addr_type;
@@ -259,10 +257,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
259 sk->sk_gso_type = SKB_GSO_TCPV6; 257 sk->sk_gso_type = SKB_GSO_TCPV6;
260 __ip6_dst_store(sk, dst, NULL, NULL); 258 __ip6_dst_store(sk, dst, NULL, NULL);
261 259
262 rt = (struct rt6_info *) dst;
263 if (tcp_death_row.sysctl_tw_recycle && 260 if (tcp_death_row.sysctl_tw_recycle &&
264 !tp->rx_opt.ts_recent_stamp && 261 !tp->rx_opt.ts_recent_stamp &&
265 ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr)) 262 ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr))
266 tcp_fetch_timewait_stamp(sk, dst); 263 tcp_fetch_timewait_stamp(sk, dst);
267 264
268 icsk->icsk_ext_hdr_len = 0; 265 icsk->icsk_ext_hdr_len = 0;
@@ -1251,7 +1248,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1251 return 0; 1248 return 0;
1252 } 1249 }
1253 1250
1254 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb)) 1251 if (tcp_checksum_complete(skb))
1255 goto csum_err; 1252 goto csum_err;
1256 1253
1257 if (sk->sk_state == TCP_LISTEN) { 1254 if (sk->sk_state == TCP_LISTEN) {
@@ -1421,6 +1418,7 @@ process:
1421 skb->dev = NULL; 1418 skb->dev = NULL;
1422 1419
1423 bh_lock_sock_nested(sk); 1420 bh_lock_sock_nested(sk);
1421 tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
1424 ret = 0; 1422 ret = 0;
1425 if (!sock_owned_by_user(sk)) { 1423 if (!sock_owned_by_user(sk)) {
1426 if (!tcp_prequeue(sk, skb)) 1424 if (!tcp_prequeue(sk, skb))
@@ -1442,7 +1440,7 @@ no_tcp_socket:
1442 1440
1443 tcp_v6_fill_cb(skb, hdr, th); 1441 tcp_v6_fill_cb(skb, hdr, th);
1444 1442
1445 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { 1443 if (tcp_checksum_complete(skb)) {
1446csum_error: 1444csum_error:
1447 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS); 1445 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1448bad_packet: 1446bad_packet:
@@ -1467,10 +1465,6 @@ do_time_wait:
1467 1465
1468 tcp_v6_fill_cb(skb, hdr, th); 1466 tcp_v6_fill_cb(skb, hdr, th);
1469 1467
1470 if (skb->len < (th->doff<<2)) {
1471 inet_twsk_put(inet_twsk(sk));
1472 goto bad_packet;
1473 }
1474 if (tcp_checksum_complete(skb)) { 1468 if (tcp_checksum_complete(skb)) {
1475 inet_twsk_put(inet_twsk(sk)); 1469 inet_twsk_put(inet_twsk(sk));
1476 goto csum_error; 1470 goto csum_error;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index f337a908a76a..ed0583c1b9fc 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -71,20 +71,12 @@ static int xfrm6_get_tos(const struct flowi *fl)
71 return 0; 71 return 0;
72} 72}
73 73
74static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst)
75{
76 struct rt6_info *rt = (struct rt6_info *)xdst;
77
78 rt6_init_peer(rt, net->ipv6.peers);
79}
80
81static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, 74static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
82 int nfheader_len) 75 int nfheader_len)
83{ 76{
84 if (dst->ops->family == AF_INET6) { 77 if (dst->ops->family == AF_INET6) {
85 struct rt6_info *rt = (struct rt6_info *)dst; 78 struct rt6_info *rt = (struct rt6_info *)dst;
86 if (rt->rt6i_node) 79 path->path_cookie = rt6_get_cookie(rt);
87 path->path_cookie = rt->rt6i_node->fn_sernum;
88 } 80 }
89 81
90 path->u.rt6.rt6i_nfheader_len = nfheader_len; 82 path->u.rt6.rt6i_nfheader_len = nfheader_len;
@@ -106,16 +98,13 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
106 return -ENODEV; 98 return -ENODEV;
107 } 99 }
108 100
109 rt6_transfer_peer(&xdst->u.rt6, rt);
110
111 /* Sheit... I remember I did this right. Apparently, 101 /* Sheit... I remember I did this right. Apparently,
112 * it was magically lost, so this code needs audit */ 102 * it was magically lost, so this code needs audit */
113 xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | 103 xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
114 RTF_LOCAL); 104 RTF_LOCAL);
115 xdst->u.rt6.rt6i_metric = rt->rt6i_metric; 105 xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
116 xdst->u.rt6.rt6i_node = rt->rt6i_node; 106 xdst->u.rt6.rt6i_node = rt->rt6i_node;
117 if (rt->rt6i_node) 107 xdst->route_cookie = rt6_get_cookie(rt);
118 xdst->route_cookie = rt->rt6i_node->fn_sernum;
119 xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; 108 xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
120 xdst->u.rt6.rt6i_dst = rt->rt6i_dst; 109 xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
121 xdst->u.rt6.rt6i_src = rt->rt6i_src; 110 xdst->u.rt6.rt6i_src = rt->rt6i_src;
@@ -255,10 +244,6 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
255 if (likely(xdst->u.rt6.rt6i_idev)) 244 if (likely(xdst->u.rt6.rt6i_idev))
256 in6_dev_put(xdst->u.rt6.rt6i_idev); 245 in6_dev_put(xdst->u.rt6.rt6i_idev);
257 dst_destroy_metrics_generic(dst); 246 dst_destroy_metrics_generic(dst);
258 if (rt6_has_peer(&xdst->u.rt6)) {
259 struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6);
260 inet_putpeer(peer);
261 }
262 xfrm_dst_destroy(xdst); 247 xfrm_dst_destroy(xdst);
263} 248}
264 249
@@ -308,7 +293,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
308 .get_saddr = xfrm6_get_saddr, 293 .get_saddr = xfrm6_get_saddr,
309 .decode_session = _decode_session6, 294 .decode_session = _decode_session6,
310 .get_tos = xfrm6_get_tos, 295 .get_tos = xfrm6_get_tos,
311 .init_dst = xfrm6_init_dst,
312 .init_path = xfrm6_init_path, 296 .init_path = xfrm6_init_path,
313 .fill_dst = xfrm6_fill_dst, 297 .fill_dst = xfrm6_fill_dst,
314 .blackhole_route = ip6_blackhole_route, 298 .blackhole_route = ip6_blackhole_route,
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 4ea5d7497b5f..48d0dc89b58d 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1347,7 +1347,7 @@ static int ipx_create(struct net *net, struct socket *sock, int protocol,
1347 goto out; 1347 goto out;
1348 1348
1349 rc = -ENOMEM; 1349 rc = -ENOMEM;
1350 sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto); 1350 sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, kern);
1351 if (!sk) 1351 if (!sk)
1352 goto out; 1352 goto out;
1353 1353
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index ee0ea25c8e7a..fae6822cc367 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1100,7 +1100,7 @@ static int irda_create(struct net *net, struct socket *sock, int protocol,
1100 } 1100 }
1101 1101
1102 /* Allocate networking socket */ 1102 /* Allocate networking socket */
1103 sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto); 1103 sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto, kern);
1104 if (sk == NULL) 1104 if (sk == NULL)
1105 return -ENOMEM; 1105 return -ENOMEM;
1106 1106
diff --git a/net/irda/timer.c b/net/irda/timer.c
index 0c4c115a5cab..f2280f73b057 100644
--- a/net/irda/timer.c
+++ b/net/irda/timer.c
@@ -60,8 +60,8 @@ void irlap_start_query_timer(struct irlap_cb *self, int S, int s)
60 * to avoid messing with for incoming connections requests and 60 * to avoid messing with for incoming connections requests and
61 * to accommodate devices that perform discovery slower than us. 61 * to accommodate devices that perform discovery slower than us.
62 * Jean II */ 62 * Jean II */
63 timeout = ((sysctl_slot_timeout * HZ / 1000) * (S - s) 63 timeout = msecs_to_jiffies(sysctl_slot_timeout) * (S - s)
64 + XIDEXTRA_TIMEOUT + SMALLBUSY_TIMEOUT); 64 + XIDEXTRA_TIMEOUT + SMALLBUSY_TIMEOUT;
65 65
66 /* Set or re-set the timer. We reset the timer for each received 66 /* Set or re-set the timer. We reset the timer for each received
67 * discovery query, which allow us to automatically adjust to 67 * discovery query, which allow us to automatically adjust to
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 6daa52a18d40..918151c11348 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -535,12 +535,12 @@ static void iucv_sock_init(struct sock *sk, struct sock *parent)
535 sk->sk_type = parent->sk_type; 535 sk->sk_type = parent->sk_type;
536} 536}
537 537
538static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) 538static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio, int kern)
539{ 539{
540 struct sock *sk; 540 struct sock *sk;
541 struct iucv_sock *iucv; 541 struct iucv_sock *iucv;
542 542
543 sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto); 543 sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto, kern);
544 if (!sk) 544 if (!sk)
545 return NULL; 545 return NULL;
546 iucv = iucv_sk(sk); 546 iucv = iucv_sk(sk);
@@ -602,7 +602,7 @@ static int iucv_sock_create(struct net *net, struct socket *sock, int protocol,
602 return -ESOCKTNOSUPPORT; 602 return -ESOCKTNOSUPPORT;
603 } 603 }
604 604
605 sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL); 605 sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL, kern);
606 if (!sk) 606 if (!sk)
607 return -ENOMEM; 607 return -ENOMEM;
608 608
@@ -1723,7 +1723,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
1723 } 1723 }
1724 1724
1725 /* Create the new socket */ 1725 /* Create the new socket */
1726 nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); 1726 nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0);
1727 if (!nsk) { 1727 if (!nsk) {
1728 err = pr_iucv->path_sever(path, user_data); 1728 err = pr_iucv->path_sever(path, user_data);
1729 iucv_path_free(path); 1729 iucv_path_free(path);
@@ -1933,7 +1933,7 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb)
1933 goto out; 1933 goto out;
1934 } 1934 }
1935 1935
1936 nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); 1936 nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0);
1937 bh_lock_sock(sk); 1937 bh_lock_sock(sk);
1938 if ((sk->sk_state != IUCV_LISTEN) || 1938 if ((sk->sk_state != IUCV_LISTEN) ||
1939 sk_acceptq_is_full(sk) || 1939 sk_acceptq_is_full(sk) ||
diff --git a/net/key/af_key.c b/net/key/af_key.c
index f0d52d721b3a..b397f0aa9005 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -149,7 +149,7 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
149 return -EPROTONOSUPPORT; 149 return -EPROTONOSUPPORT;
150 150
151 err = -ENOMEM; 151 err = -ENOMEM;
152 sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto); 152 sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, kern);
153 if (sk == NULL) 153 if (sk == NULL)
154 goto out; 154 goto out;
155 155
@@ -1190,6 +1190,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1190 memcpy(x->ealg->alg_key, key+1, keysize); 1190 memcpy(x->ealg->alg_key, key+1, keysize);
1191 } 1191 }
1192 x->props.ealgo = sa->sadb_sa_encrypt; 1192 x->props.ealgo = sa->sadb_sa_encrypt;
1193 x->geniv = a->uinfo.encr.geniv;
1193 } 1194 }
1194 } 1195 }
1195 /* x->algo.flags = sa->sadb_sa_flags; */ 1196 /* x->algo.flags = sa->sadb_sa_flags; */
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index a29a504492af..f6b090df3930 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1334,9 +1334,10 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
1334 if (sock) 1334 if (sock)
1335 inet_shutdown(sock, 2); 1335 inet_shutdown(sock, 2);
1336 } else { 1336 } else {
1337 if (sock) 1337 if (sock) {
1338 kernel_sock_shutdown(sock, SHUT_RDWR); 1338 kernel_sock_shutdown(sock, SHUT_RDWR);
1339 sk_release_kernel(sk); 1339 sock_release(sock);
1340 }
1340 } 1341 }
1341 1342
1342 l2tp_tunnel_sock_put(sk); 1343 l2tp_tunnel_sock_put(sk);
@@ -1399,13 +1400,11 @@ static int l2tp_tunnel_sock_create(struct net *net,
1399 if (cfg->local_ip6 && cfg->peer_ip6) { 1400 if (cfg->local_ip6 && cfg->peer_ip6) {
1400 struct sockaddr_l2tpip6 ip6_addr = {0}; 1401 struct sockaddr_l2tpip6 ip6_addr = {0};
1401 1402
1402 err = sock_create_kern(AF_INET6, SOCK_DGRAM, 1403 err = sock_create_kern(net, AF_INET6, SOCK_DGRAM,
1403 IPPROTO_L2TP, &sock); 1404 IPPROTO_L2TP, &sock);
1404 if (err < 0) 1405 if (err < 0)
1405 goto out; 1406 goto out;
1406 1407
1407 sk_change_net(sock->sk, net);
1408
1409 ip6_addr.l2tp_family = AF_INET6; 1408 ip6_addr.l2tp_family = AF_INET6;
1410 memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6, 1409 memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6,
1411 sizeof(ip6_addr.l2tp_addr)); 1410 sizeof(ip6_addr.l2tp_addr));
@@ -1429,13 +1428,11 @@ static int l2tp_tunnel_sock_create(struct net *net,
1429 { 1428 {
1430 struct sockaddr_l2tpip ip_addr = {0}; 1429 struct sockaddr_l2tpip ip_addr = {0};
1431 1430
1432 err = sock_create_kern(AF_INET, SOCK_DGRAM, 1431 err = sock_create_kern(net, AF_INET, SOCK_DGRAM,
1433 IPPROTO_L2TP, &sock); 1432 IPPROTO_L2TP, &sock);
1434 if (err < 0) 1433 if (err < 0)
1435 goto out; 1434 goto out;
1436 1435
1437 sk_change_net(sock->sk, net);
1438
1439 ip_addr.l2tp_family = AF_INET; 1436 ip_addr.l2tp_family = AF_INET;
1440 ip_addr.l2tp_addr = cfg->local_ip; 1437 ip_addr.l2tp_addr = cfg->local_ip;
1441 ip_addr.l2tp_conn_id = tunnel_id; 1438 ip_addr.l2tp_conn_id = tunnel_id;
@@ -1462,7 +1459,7 @@ out:
1462 *sockp = sock; 1459 *sockp = sock;
1463 if ((err < 0) && sock) { 1460 if ((err < 0) && sock) {
1464 kernel_sock_shutdown(sock, SHUT_RDWR); 1461 kernel_sock_shutdown(sock, SHUT_RDWR);
1465 sk_release_kernel(sock->sk); 1462 sock_release(sock);
1466 *sockp = NULL; 1463 *sockp = NULL;
1467 } 1464 }
1468 1465
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index e9b0dec56b8e..f56c9f69e9f2 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -542,12 +542,12 @@ static int pppol2tp_backlog_recv(struct sock *sk, struct sk_buff *skb)
542 542
543/* socket() handler. Initialize a new struct sock. 543/* socket() handler. Initialize a new struct sock.
544 */ 544 */
545static int pppol2tp_create(struct net *net, struct socket *sock) 545static int pppol2tp_create(struct net *net, struct socket *sock, int kern)
546{ 546{
547 int error = -ENOMEM; 547 int error = -ENOMEM;
548 struct sock *sk; 548 struct sock *sk;
549 549
550 sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto); 550 sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto, kern);
551 if (!sk) 551 if (!sk)
552 goto out; 552 goto out;
553 553
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 17a8dff06090..8fd9febaa5ba 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -168,7 +168,7 @@ static int llc_ui_create(struct net *net, struct socket *sock, int protocol,
168 168
169 if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) { 169 if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) {
170 rc = -ENOMEM; 170 rc = -ENOMEM;
171 sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto); 171 sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto, kern);
172 if (sk) { 172 if (sk) {
173 rc = 0; 173 rc = 0;
174 llc_ui_sk_init(sock, sk); 174 llc_ui_sk_init(sock, sk);
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 81a61fce3afb..3e821daf9dd4 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -768,7 +768,7 @@ static struct sock *llc_create_incoming_sock(struct sock *sk,
768 struct llc_addr *daddr) 768 struct llc_addr *daddr)
769{ 769{
770 struct sock *newsk = llc_sk_alloc(sock_net(sk), sk->sk_family, GFP_ATOMIC, 770 struct sock *newsk = llc_sk_alloc(sock_net(sk), sk->sk_family, GFP_ATOMIC,
771 sk->sk_prot); 771 sk->sk_prot, 0);
772 struct llc_sock *newllc, *llc = llc_sk(sk); 772 struct llc_sock *newllc, *llc = llc_sk(sk);
773 773
774 if (!newsk) 774 if (!newsk)
@@ -931,9 +931,9 @@ static void llc_sk_init(struct sock *sk)
931 * Allocates a LLC sock and initializes it. Returns the new LLC sock 931 * Allocates a LLC sock and initializes it. Returns the new LLC sock
932 * or %NULL if there's no memory available for one 932 * or %NULL if there's no memory available for one
933 */ 933 */
934struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot) 934struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern)
935{ 935{
936 struct sock *sk = sk_alloc(net, family, priority, prot); 936 struct sock *sk = sk_alloc(net, family, priority, prot, kern);
937 937
938 if (!sk) 938 if (!sk)
939 goto out; 939 goto out;
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 64a012a0c6e5..086de496a4c1 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -302,6 +302,20 @@ config MAC80211_DEBUG_COUNTERS
302 ---help--- 302 ---help---
303 Selecting this option causes mac80211 to keep additional 303 Selecting this option causes mac80211 to keep additional
304 and very verbose statistics about TX and RX handler use 304 and very verbose statistics about TX and RX handler use
305 and show them in debugfs. 305 as well as a few selected dot11 counters. These will be
306 exposed in debugfs.
307
308 Note that some of the counters are not concurrency safe
309 and may thus not always be accurate.
306 310
307 If unsure, say N. 311 If unsure, say N.
312
313config MAC80211_STA_HASH_MAX_SIZE
314 int "Station hash table maximum size" if MAC80211_DEBUG_MENU
315 default 0
316 ---help---
317 Setting this option to a low value (e.g. 4) allows testing the
318 hash table with collisions relatively deterministically (just
319 connect more stations than the number selected here.)
320
321 If unsure, leave the default of 0.
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index 208df7c0b6ea..7663c28ba353 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -11,9 +11,8 @@
11 11
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/types.h> 13#include <linux/types.h>
14#include <linux/crypto.h>
15#include <linux/err.h> 14#include <linux/err.h>
16#include <crypto/aes.h> 15#include <crypto/aead.h>
17 16
18#include <net/mac80211.h> 17#include <net/mac80211.h>
19#include "key.h" 18#include "key.h"
@@ -23,7 +22,7 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
23 u8 *data, size_t data_len, u8 *mic, 22 u8 *data, size_t data_len, u8 *mic,
24 size_t mic_len) 23 size_t mic_len)
25{ 24{
26 struct scatterlist assoc, pt, ct[2]; 25 struct scatterlist sg[3];
27 26
28 char aead_req_data[sizeof(struct aead_request) + 27 char aead_req_data[sizeof(struct aead_request) +
29 crypto_aead_reqsize(tfm)] 28 crypto_aead_reqsize(tfm)]
@@ -32,15 +31,14 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
32 31
33 memset(aead_req, 0, sizeof(aead_req_data)); 32 memset(aead_req, 0, sizeof(aead_req_data));
34 33
35 sg_init_one(&pt, data, data_len); 34 sg_init_table(sg, 3);
36 sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad)); 35 sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad));
37 sg_init_table(ct, 2); 36 sg_set_buf(&sg[1], data, data_len);
38 sg_set_buf(&ct[0], data, data_len); 37 sg_set_buf(&sg[2], mic, mic_len);
39 sg_set_buf(&ct[1], mic, mic_len);
40 38
41 aead_request_set_tfm(aead_req, tfm); 39 aead_request_set_tfm(aead_req, tfm);
42 aead_request_set_assoc(aead_req, &assoc, assoc.length); 40 aead_request_set_crypt(aead_req, sg, sg, data_len, b_0);
43 aead_request_set_crypt(aead_req, &pt, ct, data_len, b_0); 41 aead_request_set_ad(aead_req, sg[0].length);
44 42
45 crypto_aead_encrypt(aead_req); 43 crypto_aead_encrypt(aead_req);
46} 44}
@@ -49,7 +47,7 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
49 u8 *data, size_t data_len, u8 *mic, 47 u8 *data, size_t data_len, u8 *mic,
50 size_t mic_len) 48 size_t mic_len)
51{ 49{
52 struct scatterlist assoc, pt, ct[2]; 50 struct scatterlist sg[3];
53 char aead_req_data[sizeof(struct aead_request) + 51 char aead_req_data[sizeof(struct aead_request) +
54 crypto_aead_reqsize(tfm)] 52 crypto_aead_reqsize(tfm)]
55 __aligned(__alignof__(struct aead_request)); 53 __aligned(__alignof__(struct aead_request));
@@ -60,15 +58,14 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
60 58
61 memset(aead_req, 0, sizeof(aead_req_data)); 59 memset(aead_req, 0, sizeof(aead_req_data));
62 60
63 sg_init_one(&pt, data, data_len); 61 sg_init_table(sg, 3);
64 sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad)); 62 sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad));
65 sg_init_table(ct, 2); 63 sg_set_buf(&sg[1], data, data_len);
66 sg_set_buf(&ct[0], data, data_len); 64 sg_set_buf(&sg[2], mic, mic_len);
67 sg_set_buf(&ct[1], mic, mic_len);
68 65
69 aead_request_set_tfm(aead_req, tfm); 66 aead_request_set_tfm(aead_req, tfm);
70 aead_request_set_assoc(aead_req, &assoc, assoc.length); 67 aead_request_set_crypt(aead_req, sg, sg, data_len + mic_len, b_0);
71 aead_request_set_crypt(aead_req, ct, &pt, data_len + mic_len, b_0); 68 aead_request_set_ad(aead_req, sg[0].length);
72 69
73 return crypto_aead_decrypt(aead_req); 70 return crypto_aead_decrypt(aead_req);
74} 71}
diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c
index fd278bbe1b0d..3afe361fd27c 100644
--- a/net/mac80211/aes_gcm.c
+++ b/net/mac80211/aes_gcm.c
@@ -8,9 +8,8 @@
8 8
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/types.h> 10#include <linux/types.h>
11#include <linux/crypto.h>
12#include <linux/err.h> 11#include <linux/err.h>
13#include <crypto/aes.h> 12#include <crypto/aead.h>
14 13
15#include <net/mac80211.h> 14#include <net/mac80211.h>
16#include "key.h" 15#include "key.h"
@@ -19,7 +18,7 @@
19void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, 18void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
20 u8 *data, size_t data_len, u8 *mic) 19 u8 *data, size_t data_len, u8 *mic)
21{ 20{
22 struct scatterlist assoc, pt, ct[2]; 21 struct scatterlist sg[3];
23 22
24 char aead_req_data[sizeof(struct aead_request) + 23 char aead_req_data[sizeof(struct aead_request) +
25 crypto_aead_reqsize(tfm)] 24 crypto_aead_reqsize(tfm)]
@@ -28,15 +27,14 @@ void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
28 27
29 memset(aead_req, 0, sizeof(aead_req_data)); 28 memset(aead_req, 0, sizeof(aead_req_data));
30 29
31 sg_init_one(&pt, data, data_len); 30 sg_init_table(sg, 3);
32 sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad)); 31 sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad));
33 sg_init_table(ct, 2); 32 sg_set_buf(&sg[1], data, data_len);
34 sg_set_buf(&ct[0], data, data_len); 33 sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
35 sg_set_buf(&ct[1], mic, IEEE80211_GCMP_MIC_LEN);
36 34
37 aead_request_set_tfm(aead_req, tfm); 35 aead_request_set_tfm(aead_req, tfm);
38 aead_request_set_assoc(aead_req, &assoc, assoc.length); 36 aead_request_set_crypt(aead_req, sg, sg, data_len, j_0);
39 aead_request_set_crypt(aead_req, &pt, ct, data_len, j_0); 37 aead_request_set_ad(aead_req, sg[0].length);
40 38
41 crypto_aead_encrypt(aead_req); 39 crypto_aead_encrypt(aead_req);
42} 40}
@@ -44,7 +42,7 @@ void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
44int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, 42int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
45 u8 *data, size_t data_len, u8 *mic) 43 u8 *data, size_t data_len, u8 *mic)
46{ 44{
47 struct scatterlist assoc, pt, ct[2]; 45 struct scatterlist sg[3];
48 char aead_req_data[sizeof(struct aead_request) + 46 char aead_req_data[sizeof(struct aead_request) +
49 crypto_aead_reqsize(tfm)] 47 crypto_aead_reqsize(tfm)]
50 __aligned(__alignof__(struct aead_request)); 48 __aligned(__alignof__(struct aead_request));
@@ -55,16 +53,15 @@ int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
55 53
56 memset(aead_req, 0, sizeof(aead_req_data)); 54 memset(aead_req, 0, sizeof(aead_req_data));
57 55
58 sg_init_one(&pt, data, data_len); 56 sg_init_table(sg, 3);
59 sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad)); 57 sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad));
60 sg_init_table(ct, 2); 58 sg_set_buf(&sg[1], data, data_len);
61 sg_set_buf(&ct[0], data, data_len); 59 sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
62 sg_set_buf(&ct[1], mic, IEEE80211_GCMP_MIC_LEN);
63 60
64 aead_request_set_tfm(aead_req, tfm); 61 aead_request_set_tfm(aead_req, tfm);
65 aead_request_set_assoc(aead_req, &assoc, assoc.length); 62 aead_request_set_crypt(aead_req, sg, sg,
66 aead_request_set_crypt(aead_req, ct, &pt,
67 data_len + IEEE80211_GCMP_MIC_LEN, j_0); 63 data_len + IEEE80211_GCMP_MIC_LEN, j_0);
64 aead_request_set_ad(aead_req, sg[0].length);
68 65
69 return crypto_aead_decrypt(aead_req); 66 return crypto_aead_decrypt(aead_req);
70} 67}
diff --git a/net/mac80211/aes_gmac.c b/net/mac80211/aes_gmac.c
index f1321b7d6506..3ddd927aaf30 100644
--- a/net/mac80211/aes_gmac.c
+++ b/net/mac80211/aes_gmac.c
@@ -9,8 +9,8 @@
9 9
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/crypto.h>
13#include <linux/err.h> 12#include <linux/err.h>
13#include <crypto/aead.h>
14#include <crypto/aes.h> 14#include <crypto/aes.h>
15 15
16#include <net/mac80211.h> 16#include <net/mac80211.h>
@@ -24,7 +24,7 @@
24int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce, 24int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce,
25 const u8 *data, size_t data_len, u8 *mic) 25 const u8 *data, size_t data_len, u8 *mic)
26{ 26{
27 struct scatterlist sg[3], ct[1]; 27 struct scatterlist sg[4];
28 char aead_req_data[sizeof(struct aead_request) + 28 char aead_req_data[sizeof(struct aead_request) +
29 crypto_aead_reqsize(tfm)] 29 crypto_aead_reqsize(tfm)]
30 __aligned(__alignof__(struct aead_request)); 30 __aligned(__alignof__(struct aead_request));
@@ -37,21 +37,19 @@ int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce,
37 memset(aead_req, 0, sizeof(aead_req_data)); 37 memset(aead_req, 0, sizeof(aead_req_data));
38 38
39 memset(zero, 0, GMAC_MIC_LEN); 39 memset(zero, 0, GMAC_MIC_LEN);
40 sg_init_table(sg, 3); 40 sg_init_table(sg, 4);
41 sg_set_buf(&sg[0], aad, AAD_LEN); 41 sg_set_buf(&sg[0], aad, AAD_LEN);
42 sg_set_buf(&sg[1], data, data_len - GMAC_MIC_LEN); 42 sg_set_buf(&sg[1], data, data_len - GMAC_MIC_LEN);
43 sg_set_buf(&sg[2], zero, GMAC_MIC_LEN); 43 sg_set_buf(&sg[2], zero, GMAC_MIC_LEN);
44 sg_set_buf(&sg[3], mic, GMAC_MIC_LEN);
44 45
45 memcpy(iv, nonce, GMAC_NONCE_LEN); 46 memcpy(iv, nonce, GMAC_NONCE_LEN);
46 memset(iv + GMAC_NONCE_LEN, 0, sizeof(iv) - GMAC_NONCE_LEN); 47 memset(iv + GMAC_NONCE_LEN, 0, sizeof(iv) - GMAC_NONCE_LEN);
47 iv[AES_BLOCK_SIZE - 1] = 0x01; 48 iv[AES_BLOCK_SIZE - 1] = 0x01;
48 49
49 sg_init_table(ct, 1);
50 sg_set_buf(&ct[0], mic, GMAC_MIC_LEN);
51
52 aead_request_set_tfm(aead_req, tfm); 50 aead_request_set_tfm(aead_req, tfm);
53 aead_request_set_assoc(aead_req, sg, AAD_LEN + data_len); 51 aead_request_set_crypt(aead_req, sg, sg, 0, iv);
54 aead_request_set_crypt(aead_req, NULL, ct, 0, iv); 52 aead_request_set_ad(aead_req, AAD_LEN + data_len);
55 53
56 crypto_aead_encrypt(aead_req); 54 crypto_aead_encrypt(aead_req);
57 55
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index cce9d425c718..c8ba2e77737c 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -564,8 +564,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
564 return -EINVAL; 564 return -EINVAL;
565 565
566 if ((tid >= IEEE80211_NUM_TIDS) || 566 if ((tid >= IEEE80211_NUM_TIDS) ||
567 !(local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) || 567 !ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION) ||
568 (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW)) 568 ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW))
569 return -EINVAL; 569 return -EINVAL;
570 570
571 ht_dbg(sdata, "Open BA session requested for %pM tid %u\n", 571 ht_dbg(sdata, "Open BA session requested for %pM tid %u\n",
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ff347a0eebd4..bf7023f6c327 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2,7 +2,7 @@
2 * mac80211 configuration hooks for cfg80211 2 * mac80211 configuration hooks for cfg80211
3 * 3 *
4 * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> 4 * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
5 * Copyright 2013-2014 Intel Mobile Communications GmbH 5 * Copyright 2013-2015 Intel Mobile Communications GmbH
6 * 6 *
7 * This file is GPLv2 as found in COPYING. 7 * This file is GPLv2 as found in COPYING.
8 */ 8 */
@@ -137,6 +137,9 @@ static int ieee80211_set_noack_map(struct wiphy *wiphy,
137 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 137 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
138 138
139 sdata->noack_map = noack_map; 139 sdata->noack_map = noack_map;
140
141 ieee80211_check_fast_xmit_iface(sdata);
142
140 return 0; 143 return 0;
141} 144}
142 145
@@ -309,6 +312,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
309 u32 iv32; 312 u32 iv32;
310 u16 iv16; 313 u16 iv16;
311 int err = -ENOENT; 314 int err = -ENOENT;
315 struct ieee80211_key_seq kseq = {};
312 316
313 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 317 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
314 318
@@ -339,10 +343,12 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
339 iv32 = key->u.tkip.tx.iv32; 343 iv32 = key->u.tkip.tx.iv32;
340 iv16 = key->u.tkip.tx.iv16; 344 iv16 = key->u.tkip.tx.iv16;
341 345
342 if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) 346 if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
343 drv_get_tkip_seq(sdata->local, 347 !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
344 key->conf.hw_key_idx, 348 drv_get_key_seq(sdata->local, key, &kseq);
345 &iv32, &iv16); 349 iv32 = kseq.tkip.iv32;
350 iv16 = kseq.tkip.iv16;
351 }
346 352
347 seq[0] = iv16 & 0xff; 353 seq[0] = iv16 & 0xff;
348 seq[1] = (iv16 >> 8) & 0xff; 354 seq[1] = (iv16 >> 8) & 0xff;
@@ -355,52 +361,44 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
355 break; 361 break;
356 case WLAN_CIPHER_SUITE_CCMP: 362 case WLAN_CIPHER_SUITE_CCMP:
357 case WLAN_CIPHER_SUITE_CCMP_256: 363 case WLAN_CIPHER_SUITE_CCMP_256:
358 pn64 = atomic64_read(&key->u.ccmp.tx_pn);
359 seq[0] = pn64;
360 seq[1] = pn64 >> 8;
361 seq[2] = pn64 >> 16;
362 seq[3] = pn64 >> 24;
363 seq[4] = pn64 >> 32;
364 seq[5] = pn64 >> 40;
365 params.seq = seq;
366 params.seq_len = 6;
367 break;
368 case WLAN_CIPHER_SUITE_AES_CMAC: 364 case WLAN_CIPHER_SUITE_AES_CMAC:
369 case WLAN_CIPHER_SUITE_BIP_CMAC_256: 365 case WLAN_CIPHER_SUITE_BIP_CMAC_256:
370 pn64 = atomic64_read(&key->u.aes_cmac.tx_pn); 366 BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) !=
371 seq[0] = pn64; 367 offsetof(typeof(kseq), aes_cmac));
372 seq[1] = pn64 >> 8;
373 seq[2] = pn64 >> 16;
374 seq[3] = pn64 >> 24;
375 seq[4] = pn64 >> 32;
376 seq[5] = pn64 >> 40;
377 params.seq = seq;
378 params.seq_len = 6;
379 break;
380 case WLAN_CIPHER_SUITE_BIP_GMAC_128: 368 case WLAN_CIPHER_SUITE_BIP_GMAC_128:
381 case WLAN_CIPHER_SUITE_BIP_GMAC_256: 369 case WLAN_CIPHER_SUITE_BIP_GMAC_256:
382 pn64 = atomic64_read(&key->u.aes_gmac.tx_pn); 370 BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) !=
383 seq[0] = pn64; 371 offsetof(typeof(kseq), aes_gmac));
384 seq[1] = pn64 >> 8;
385 seq[2] = pn64 >> 16;
386 seq[3] = pn64 >> 24;
387 seq[4] = pn64 >> 32;
388 seq[5] = pn64 >> 40;
389 params.seq = seq;
390 params.seq_len = 6;
391 break;
392 case WLAN_CIPHER_SUITE_GCMP: 372 case WLAN_CIPHER_SUITE_GCMP:
393 case WLAN_CIPHER_SUITE_GCMP_256: 373 case WLAN_CIPHER_SUITE_GCMP_256:
394 pn64 = atomic64_read(&key->u.gcmp.tx_pn); 374 BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) !=
395 seq[0] = pn64; 375 offsetof(typeof(kseq), gcmp));
396 seq[1] = pn64 >> 8; 376
397 seq[2] = pn64 >> 16; 377 if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
398 seq[3] = pn64 >> 24; 378 !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
399 seq[4] = pn64 >> 32; 379 drv_get_key_seq(sdata->local, key, &kseq);
400 seq[5] = pn64 >> 40; 380 memcpy(seq, kseq.ccmp.pn, 6);
381 } else {
382 pn64 = atomic64_read(&key->conf.tx_pn);
383 seq[0] = pn64;
384 seq[1] = pn64 >> 8;
385 seq[2] = pn64 >> 16;
386 seq[3] = pn64 >> 24;
387 seq[4] = pn64 >> 32;
388 seq[5] = pn64 >> 40;
389 }
401 params.seq = seq; 390 params.seq = seq;
402 params.seq_len = 6; 391 params.seq_len = 6;
403 break; 392 break;
393 default:
394 if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
395 break;
396 if (WARN_ON(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV))
397 break;
398 drv_get_key_seq(sdata->local, key, &kseq);
399 params.seq = kseq.hw.seq;
400 params.seq_len = kseq.hw.seq_len;
401 break;
404 } 402 }
405 403
406 params.key = key->conf.key; 404 params.key = key->conf.key;
@@ -1372,6 +1370,7 @@ static int ieee80211_change_station(struct wiphy *wiphy,
1372 } 1370 }
1373 1371
1374 sta->sdata = vlansdata; 1372 sta->sdata = vlansdata;
1373 ieee80211_check_fast_xmit(sta);
1375 1374
1376 if (sta->sta_state == IEEE80211_STA_AUTHORIZED && 1375 if (sta->sta_state == IEEE80211_STA_AUTHORIZED &&
1377 prev_4addr != new_4addr) { 1376 prev_4addr != new_4addr) {
@@ -1764,7 +1763,7 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy,
1764 /* our RSSI threshold implementation is supported only for 1763 /* our RSSI threshold implementation is supported only for
1765 * devices that report signal in dBm. 1764 * devices that report signal in dBm.
1766 */ 1765 */
1767 if (!(sdata->local->hw.flags & IEEE80211_HW_SIGNAL_DBM)) 1766 if (!ieee80211_hw_check(&sdata->local->hw, SIGNAL_DBM))
1768 return -ENOTSUPP; 1767 return -ENOTSUPP;
1769 conf->rssi_threshold = nconf->rssi_threshold; 1768 conf->rssi_threshold = nconf->rssi_threshold;
1770 } 1769 }
@@ -2099,10 +2098,14 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
2099 int err; 2098 int err;
2100 2099
2101 if (changed & WIPHY_PARAM_FRAG_THRESHOLD) { 2100 if (changed & WIPHY_PARAM_FRAG_THRESHOLD) {
2101 ieee80211_check_fast_xmit_all(local);
2102
2102 err = drv_set_frag_threshold(local, wiphy->frag_threshold); 2103 err = drv_set_frag_threshold(local, wiphy->frag_threshold);
2103 2104
2104 if (err) 2105 if (err) {
2106 ieee80211_check_fast_xmit_all(local);
2105 return err; 2107 return err;
2108 }
2106 } 2109 }
2107 2110
2108 if ((changed & WIPHY_PARAM_COVERAGE_CLASS) || 2111 if ((changed & WIPHY_PARAM_COVERAGE_CLASS) ||
@@ -2404,7 +2407,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
2404 if (sdata->vif.type != NL80211_IFTYPE_STATION) 2407 if (sdata->vif.type != NL80211_IFTYPE_STATION)
2405 return -EOPNOTSUPP; 2408 return -EOPNOTSUPP;
2406 2409
2407 if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) 2410 if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS))
2408 return -EOPNOTSUPP; 2411 return -EOPNOTSUPP;
2409 2412
2410 if (enabled == sdata->u.mgd.powersave && 2413 if (enabled == sdata->u.mgd.powersave &&
@@ -2419,7 +2422,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
2419 __ieee80211_request_smps_mgd(sdata, sdata->u.mgd.req_smps); 2422 __ieee80211_request_smps_mgd(sdata, sdata->u.mgd.req_smps);
2420 sdata_unlock(sdata); 2423 sdata_unlock(sdata);
2421 2424
2422 if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) 2425 if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
2423 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); 2426 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
2424 2427
2425 ieee80211_recalc_ps(local, -1); 2428 ieee80211_recalc_ps(local, -1);
@@ -2463,7 +2466,7 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
2463 if (!ieee80211_sdata_running(sdata)) 2466 if (!ieee80211_sdata_running(sdata))
2464 return -ENETDOWN; 2467 return -ENETDOWN;
2465 2468
2466 if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) { 2469 if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
2467 ret = drv_set_bitrate_mask(local, sdata, mask); 2470 ret = drv_set_bitrate_mask(local, sdata, mask);
2468 if (ret) 2471 if (ret)
2469 return ret; 2472 return ret;
@@ -2514,6 +2517,19 @@ static bool ieee80211_coalesce_started_roc(struct ieee80211_local *local,
2514 return true; 2517 return true;
2515} 2518}
2516 2519
2520static u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local)
2521{
2522 lockdep_assert_held(&local->mtx);
2523
2524 local->roc_cookie_counter++;
2525
2526 /* wow, you wrapped 64 bits ... more likely a bug */
2527 if (WARN_ON(local->roc_cookie_counter == 0))
2528 local->roc_cookie_counter++;
2529
2530 return local->roc_cookie_counter;
2531}
2532
2517static int ieee80211_start_roc_work(struct ieee80211_local *local, 2533static int ieee80211_start_roc_work(struct ieee80211_local *local,
2518 struct ieee80211_sub_if_data *sdata, 2534 struct ieee80211_sub_if_data *sdata,
2519 struct ieee80211_channel *channel, 2535 struct ieee80211_channel *channel,
@@ -2551,7 +2567,6 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
2551 roc->req_duration = duration; 2567 roc->req_duration = duration;
2552 roc->frame = txskb; 2568 roc->frame = txskb;
2553 roc->type = type; 2569 roc->type = type;
2554 roc->mgmt_tx_cookie = (unsigned long)txskb;
2555 roc->sdata = sdata; 2570 roc->sdata = sdata;
2556 INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); 2571 INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work);
2557 INIT_LIST_HEAD(&roc->dependents); 2572 INIT_LIST_HEAD(&roc->dependents);
@@ -2561,17 +2576,10 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
2561 * or the SKB (for mgmt TX) 2576 * or the SKB (for mgmt TX)
2562 */ 2577 */
2563 if (!txskb) { 2578 if (!txskb) {
2564 /* local->mtx protects this */ 2579 roc->cookie = ieee80211_mgmt_tx_cookie(local);
2565 local->roc_cookie_counter++;
2566 roc->cookie = local->roc_cookie_counter;
2567 /* wow, you wrapped 64 bits ... more likely a bug */
2568 if (WARN_ON(roc->cookie == 0)) {
2569 roc->cookie = 1;
2570 local->roc_cookie_counter++;
2571 }
2572 *cookie = roc->cookie; 2580 *cookie = roc->cookie;
2573 } else { 2581 } else {
2574 *cookie = (unsigned long)txskb; 2582 roc->mgmt_tx_cookie = *cookie;
2575 } 2583 }
2576 2584
2577 /* if there's one pending or we're scanning, queue this one */ 2585 /* if there's one pending or we're scanning, queue this one */
@@ -3244,13 +3252,43 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
3244 return err; 3252 return err;
3245} 3253}
3246 3254
3255static struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local,
3256 struct sk_buff *skb, u64 *cookie,
3257 gfp_t gfp)
3258{
3259 unsigned long spin_flags;
3260 struct sk_buff *ack_skb;
3261 int id;
3262
3263 ack_skb = skb_copy(skb, gfp);
3264 if (!ack_skb)
3265 return ERR_PTR(-ENOMEM);
3266
3267 spin_lock_irqsave(&local->ack_status_lock, spin_flags);
3268 id = idr_alloc(&local->ack_status_frames, ack_skb,
3269 1, 0x10000, GFP_ATOMIC);
3270 spin_unlock_irqrestore(&local->ack_status_lock, spin_flags);
3271
3272 if (id < 0) {
3273 kfree_skb(ack_skb);
3274 return ERR_PTR(-ENOMEM);
3275 }
3276
3277 IEEE80211_SKB_CB(skb)->ack_frame_id = id;
3278
3279 *cookie = ieee80211_mgmt_tx_cookie(local);
3280 IEEE80211_SKB_CB(ack_skb)->ack.cookie = *cookie;
3281
3282 return ack_skb;
3283}
3284
3247static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, 3285static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
3248 struct cfg80211_mgmt_tx_params *params, 3286 struct cfg80211_mgmt_tx_params *params,
3249 u64 *cookie) 3287 u64 *cookie)
3250{ 3288{
3251 struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); 3289 struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
3252 struct ieee80211_local *local = sdata->local; 3290 struct ieee80211_local *local = sdata->local;
3253 struct sk_buff *skb; 3291 struct sk_buff *skb, *ack_skb;
3254 struct sta_info *sta; 3292 struct sta_info *sta;
3255 const struct ieee80211_mgmt *mgmt = (void *)params->buf; 3293 const struct ieee80211_mgmt *mgmt = (void *)params->buf;
3256 bool need_offchan = false; 3294 bool need_offchan = false;
@@ -3299,8 +3337,14 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
3299 break; 3337 break;
3300 case NL80211_IFTYPE_STATION: 3338 case NL80211_IFTYPE_STATION:
3301 case NL80211_IFTYPE_P2P_CLIENT: 3339 case NL80211_IFTYPE_P2P_CLIENT:
3302 if (!sdata->u.mgd.associated) 3340 sdata_lock(sdata);
3341 if (!sdata->u.mgd.associated ||
3342 (params->offchan && params->wait &&
3343 local->ops->remain_on_channel &&
3344 memcmp(sdata->u.mgd.associated->bssid,
3345 mgmt->bssid, ETH_ALEN)))
3303 need_offchan = true; 3346 need_offchan = true;
3347 sdata_unlock(sdata);
3304 break; 3348 break;
3305 case NL80211_IFTYPE_P2P_DEVICE: 3349 case NL80211_IFTYPE_P2P_DEVICE:
3306 need_offchan = true; 3350 need_offchan = true;
@@ -3356,6 +3400,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
3356 /* Update CSA counters */ 3400 /* Update CSA counters */
3357 if (sdata->vif.csa_active && 3401 if (sdata->vif.csa_active &&
3358 (sdata->vif.type == NL80211_IFTYPE_AP || 3402 (sdata->vif.type == NL80211_IFTYPE_AP ||
3403 sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
3359 sdata->vif.type == NL80211_IFTYPE_ADHOC) && 3404 sdata->vif.type == NL80211_IFTYPE_ADHOC) &&
3360 params->n_csa_offsets) { 3405 params->n_csa_offsets) {
3361 int i; 3406 int i;
@@ -3382,8 +3427,23 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
3382 3427
3383 skb->dev = sdata->dev; 3428 skb->dev = sdata->dev;
3384 3429
3430 if (!params->dont_wait_for_ack) {
3431 /* make a copy to preserve the frame contents
3432 * in case of encryption.
3433 */
3434 ack_skb = ieee80211_make_ack_skb(local, skb, cookie,
3435 GFP_KERNEL);
3436 if (IS_ERR(ack_skb)) {
3437 ret = PTR_ERR(ack_skb);
3438 kfree_skb(skb);
3439 goto out_unlock;
3440 }
3441 } else {
3442 /* for cookie below */
3443 ack_skb = skb;
3444 }
3445
3385 if (!need_offchan) { 3446 if (!need_offchan) {
3386 *cookie = (unsigned long) skb;
3387 ieee80211_tx_skb(sdata, skb); 3447 ieee80211_tx_skb(sdata, skb);
3388 ret = 0; 3448 ret = 0;
3389 goto out_unlock; 3449 goto out_unlock;
@@ -3391,7 +3451,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
3391 3451
3392 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN | 3452 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN |
3393 IEEE80211_TX_INTFL_OFFCHAN_TX_OK; 3453 IEEE80211_TX_INTFL_OFFCHAN_TX_OK;
3394 if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) 3454 if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL))
3395 IEEE80211_SKB_CB(skb)->hw_queue = 3455 IEEE80211_SKB_CB(skb)->hw_queue =
3396 local->hw.offchannel_tx_hw_queue; 3456 local->hw.offchannel_tx_hw_queue;
3397 3457
@@ -3476,7 +3536,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
3476 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 3536 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
3477 struct ieee80211_local *local = sdata->local; 3537 struct ieee80211_local *local = sdata->local;
3478 struct ieee80211_qos_hdr *nullfunc; 3538 struct ieee80211_qos_hdr *nullfunc;
3479 struct sk_buff *skb; 3539 struct sk_buff *skb, *ack_skb;
3480 int size = sizeof(*nullfunc); 3540 int size = sizeof(*nullfunc);
3481 __le16 fc; 3541 __le16 fc;
3482 bool qos; 3542 bool qos;
@@ -3484,20 +3544,24 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
3484 struct sta_info *sta; 3544 struct sta_info *sta;
3485 struct ieee80211_chanctx_conf *chanctx_conf; 3545 struct ieee80211_chanctx_conf *chanctx_conf;
3486 enum ieee80211_band band; 3546 enum ieee80211_band band;
3547 int ret;
3548
3549 /* the lock is needed to assign the cookie later */
3550 mutex_lock(&local->mtx);
3487 3551
3488 rcu_read_lock(); 3552 rcu_read_lock();
3489 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); 3553 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
3490 if (WARN_ON(!chanctx_conf)) { 3554 if (WARN_ON(!chanctx_conf)) {
3491 rcu_read_unlock(); 3555 ret = -EINVAL;
3492 return -EINVAL; 3556 goto unlock;
3493 } 3557 }
3494 band = chanctx_conf->def.chan->band; 3558 band = chanctx_conf->def.chan->band;
3495 sta = sta_info_get_bss(sdata, peer); 3559 sta = sta_info_get_bss(sdata, peer);
3496 if (sta) { 3560 if (sta) {
3497 qos = sta->sta.wme; 3561 qos = sta->sta.wme;
3498 } else { 3562 } else {
3499 rcu_read_unlock(); 3563 ret = -ENOLINK;
3500 return -ENOLINK; 3564 goto unlock;
3501 } 3565 }
3502 3566
3503 if (qos) { 3567 if (qos) {
@@ -3513,8 +3577,8 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
3513 3577
3514 skb = dev_alloc_skb(local->hw.extra_tx_headroom + size); 3578 skb = dev_alloc_skb(local->hw.extra_tx_headroom + size);
3515 if (!skb) { 3579 if (!skb) {
3516 rcu_read_unlock(); 3580 ret = -ENOMEM;
3517 return -ENOMEM; 3581 goto unlock;
3518 } 3582 }
3519 3583
3520 skb->dev = dev; 3584 skb->dev = dev;
@@ -3540,13 +3604,23 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
3540 if (qos) 3604 if (qos)
3541 nullfunc->qos_ctrl = cpu_to_le16(7); 3605 nullfunc->qos_ctrl = cpu_to_le16(7);
3542 3606
3607 ack_skb = ieee80211_make_ack_skb(local, skb, cookie, GFP_ATOMIC);
3608 if (IS_ERR(ack_skb)) {
3609 kfree_skb(skb);
3610 ret = PTR_ERR(ack_skb);
3611 goto unlock;
3612 }
3613
3543 local_bh_disable(); 3614 local_bh_disable();
3544 ieee80211_xmit(sdata, sta, skb); 3615 ieee80211_xmit(sdata, sta, skb);
3545 local_bh_enable(); 3616 local_bh_enable();
3617
3618 ret = 0;
3619unlock:
3546 rcu_read_unlock(); 3620 rcu_read_unlock();
3621 mutex_unlock(&local->mtx);
3547 3622
3548 *cookie = (unsigned long) skb; 3623 return ret;
3549 return 0;
3550} 3624}
3551 3625
3552static int ieee80211_cfg_get_channel(struct wiphy *wiphy, 3626static int ieee80211_cfg_get_channel(struct wiphy *wiphy,
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 5bcd4e5589d3..f01c18a3160e 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -664,6 +664,8 @@ out:
664 ieee80211_bss_info_change_notify(sdata, 664 ieee80211_bss_info_change_notify(sdata,
665 BSS_CHANGED_IDLE); 665 BSS_CHANGED_IDLE);
666 666
667 ieee80211_check_fast_xmit_iface(sdata);
668
667 return ret; 669 return ret;
668} 670}
669 671
@@ -1008,6 +1010,8 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
1008 if (WARN_ON(!chandef)) 1010 if (WARN_ON(!chandef))
1009 return -EINVAL; 1011 return -EINVAL;
1010 1012
1013 ieee80211_change_chanctx(local, new_ctx, chandef);
1014
1011 vif_chsw[0].vif = &sdata->vif; 1015 vif_chsw[0].vif = &sdata->vif;
1012 vif_chsw[0].old_ctx = &old_ctx->conf; 1016 vif_chsw[0].old_ctx = &old_ctx->conf;
1013 vif_chsw[0].new_ctx = &new_ctx->conf; 1017 vif_chsw[0].new_ctx = &new_ctx->conf;
@@ -1030,6 +1034,8 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
1030 if (sdata->vif.type == NL80211_IFTYPE_AP) 1034 if (sdata->vif.type == NL80211_IFTYPE_AP)
1031 __ieee80211_vif_copy_chanctx_to_vlans(sdata, false); 1035 __ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
1032 1036
1037 ieee80211_check_fast_xmit_iface(sdata);
1038
1033 if (ieee80211_chanctx_refcount(local, old_ctx) == 0) 1039 if (ieee80211_chanctx_refcount(local, old_ctx) == 0)
1034 ieee80211_free_chanctx(local, old_ctx); 1040 ieee80211_free_chanctx(local, old_ctx);
1035 1041
@@ -1079,6 +1085,8 @@ ieee80211_vif_use_reserved_assign(struct ieee80211_sub_if_data *sdata)
1079 if (WARN_ON(!chandef)) 1085 if (WARN_ON(!chandef))
1080 return -EINVAL; 1086 return -EINVAL;
1081 1087
1088 ieee80211_change_chanctx(local, new_ctx, chandef);
1089
1082 list_del(&sdata->reserved_chanctx_list); 1090 list_del(&sdata->reserved_chanctx_list);
1083 sdata->reserved_chanctx = NULL; 1091 sdata->reserved_chanctx = NULL;
1084 1092
@@ -1376,6 +1384,8 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
1376 __ieee80211_vif_copy_chanctx_to_vlans(sdata, 1384 __ieee80211_vif_copy_chanctx_to_vlans(sdata,
1377 false); 1385 false);
1378 1386
1387 ieee80211_check_fast_xmit_iface(sdata);
1388
1379 sdata->radar_required = sdata->reserved_radar_required; 1389 sdata->radar_required = sdata->reserved_radar_required;
1380 1390
1381 if (sdata->vif.bss_conf.chandef.width != 1391 if (sdata->vif.bss_conf.chandef.width !=
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 23813ebb349c..3ea8b7de9633 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -1,4 +1,3 @@
1
2/* 1/*
3 * mac80211 debugfs for wireless PHYs 2 * mac80211 debugfs for wireless PHYs
4 * 3 *
@@ -92,62 +91,66 @@ static const struct file_operations reset_ops = {
92}; 91};
93#endif 92#endif
94 93
94static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = {
95#define FLAG(F) [IEEE80211_HW_##F] = #F
96 FLAG(HAS_RATE_CONTROL),
97 FLAG(RX_INCLUDES_FCS),
98 FLAG(HOST_BROADCAST_PS_BUFFERING),
99 FLAG(SIGNAL_UNSPEC),
100 FLAG(SIGNAL_DBM),
101 FLAG(NEED_DTIM_BEFORE_ASSOC),
102 FLAG(SPECTRUM_MGMT),
103 FLAG(AMPDU_AGGREGATION),
104 FLAG(SUPPORTS_PS),
105 FLAG(PS_NULLFUNC_STACK),
106 FLAG(SUPPORTS_DYNAMIC_PS),
107 FLAG(MFP_CAPABLE),
108 FLAG(WANT_MONITOR_VIF),
109 FLAG(NO_AUTO_VIF),
110 FLAG(SW_CRYPTO_CONTROL),
111 FLAG(SUPPORT_FAST_XMIT),
112 FLAG(REPORTS_TX_ACK_STATUS),
113 FLAG(CONNECTION_MONITOR),
114 FLAG(QUEUE_CONTROL),
115 FLAG(SUPPORTS_PER_STA_GTK),
116 FLAG(AP_LINK_PS),
117 FLAG(TX_AMPDU_SETUP_IN_HW),
118 FLAG(SUPPORTS_RC_TABLE),
119 FLAG(P2P_DEV_ADDR_FOR_INTF),
120 FLAG(TIMING_BEACON_ONLY),
121 FLAG(SUPPORTS_HT_CCK_RATES),
122 FLAG(CHANCTX_STA_CSA),
123 FLAG(SUPPORTS_CLONED_SKBS),
124 FLAG(SINGLE_SCAN_ON_ALL_BANDS),
125
126 /* keep last for the build bug below */
127 (void *)0x1
128#undef FLAG
129};
130
95static ssize_t hwflags_read(struct file *file, char __user *user_buf, 131static ssize_t hwflags_read(struct file *file, char __user *user_buf,
96 size_t count, loff_t *ppos) 132 size_t count, loff_t *ppos)
97{ 133{
98 struct ieee80211_local *local = file->private_data; 134 struct ieee80211_local *local = file->private_data;
99 int mxln = 500; 135 size_t bufsz = 30 * NUM_IEEE80211_HW_FLAGS;
136 char *buf = kzalloc(bufsz, GFP_KERNEL);
137 char *pos = buf, *end = buf + bufsz - 1;
100 ssize_t rv; 138 ssize_t rv;
101 char *buf = kzalloc(mxln, GFP_KERNEL); 139 int i;
102 int sf = 0; /* how many written so far */
103 140
104 if (!buf) 141 if (!buf)
105 return 0; 142 return -ENOMEM;
106 143
107 sf += scnprintf(buf, mxln - sf, "0x%x\n", local->hw.flags); 144 /* fail compilation if somebody adds or removes
108 if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) 145 * a flag without updating the name array above
109 sf += scnprintf(buf + sf, mxln - sf, "HAS_RATE_CONTROL\n"); 146 */
110 if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) 147 BUILD_BUG_ON(hw_flag_names[NUM_IEEE80211_HW_FLAGS] != (void *)0x1);
111 sf += scnprintf(buf + sf, mxln - sf, "RX_INCLUDES_FCS\n"); 148
112 if (local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING) 149 for (i = 0; i < NUM_IEEE80211_HW_FLAGS; i++) {
113 sf += scnprintf(buf + sf, mxln - sf, 150 if (test_bit(i, local->hw.flags))
114 "HOST_BCAST_PS_BUFFERING\n"); 151 pos += scnprintf(pos, end - pos, "%s",
115 if (local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE) 152 hw_flag_names[i]);
116 sf += scnprintf(buf + sf, mxln - sf, 153 }
117 "2GHZ_SHORT_SLOT_INCAPABLE\n");
118 if (local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE)
119 sf += scnprintf(buf + sf, mxln - sf,
120 "2GHZ_SHORT_PREAMBLE_INCAPABLE\n");
121 if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
122 sf += scnprintf(buf + sf, mxln - sf, "SIGNAL_UNSPEC\n");
123 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
124 sf += scnprintf(buf + sf, mxln - sf, "SIGNAL_DBM\n");
125 if (local->hw.flags & IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC)
126 sf += scnprintf(buf + sf, mxln - sf,
127 "NEED_DTIM_BEFORE_ASSOC\n");
128 if (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT)
129 sf += scnprintf(buf + sf, mxln - sf, "SPECTRUM_MGMT\n");
130 if (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)
131 sf += scnprintf(buf + sf, mxln - sf, "AMPDU_AGGREGATION\n");
132 if (local->hw.flags & IEEE80211_HW_SUPPORTS_PS)
133 sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_PS\n");
134 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
135 sf += scnprintf(buf + sf, mxln - sf, "PS_NULLFUNC_STACK\n");
136 if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)
137 sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_DYNAMIC_PS\n");
138 if (local->hw.flags & IEEE80211_HW_MFP_CAPABLE)
139 sf += scnprintf(buf + sf, mxln - sf, "MFP_CAPABLE\n");
140 if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
141 sf += scnprintf(buf + sf, mxln - sf,
142 "REPORTS_TX_ACK_STATUS\n");
143 if (local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
144 sf += scnprintf(buf + sf, mxln - sf, "CONNECTION_MONITOR\n");
145 if (local->hw.flags & IEEE80211_HW_SUPPORTS_PER_STA_GTK)
146 sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_PER_STA_GTK\n");
147 if (local->hw.flags & IEEE80211_HW_AP_LINK_PS)
148 sf += scnprintf(buf + sf, mxln - sf, "AP_LINK_PS\n");
149 if (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW)
150 sf += scnprintf(buf + sf, mxln - sf, "TX_AMPDU_SETUP_IN_HW\n");
151 154
152 rv = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); 155 rv = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
153 kfree(buf); 156 kfree(buf);
@@ -219,8 +222,8 @@ static const struct file_operations stats_ ##name## _ops = { \
219 .llseek = generic_file_llseek, \ 222 .llseek = generic_file_llseek, \
220}; 223};
221 224
222#define DEBUGFS_STATS_ADD(name, field) \ 225#define DEBUGFS_STATS_ADD(name) \
223 debugfs_create_u32(#name, 0400, statsd, (u32 *) &field); 226 debugfs_create_u32(#name, 0400, statsd, &local->name);
224#define DEBUGFS_DEVSTATS_ADD(name) \ 227#define DEBUGFS_DEVSTATS_ADD(name) \
225 debugfs_create_file(#name, 0400, statsd, local, &stats_ ##name## _ops); 228 debugfs_create_file(#name, 0400, statsd, local, &stats_ ##name## _ops);
226 229
@@ -255,53 +258,31 @@ void debugfs_hw_add(struct ieee80211_local *local)
255 if (!statsd) 258 if (!statsd)
256 return; 259 return;
257 260
258 DEBUGFS_STATS_ADD(transmitted_fragment_count,
259 local->dot11TransmittedFragmentCount);
260 DEBUGFS_STATS_ADD(multicast_transmitted_frame_count,
261 local->dot11MulticastTransmittedFrameCount);
262 DEBUGFS_STATS_ADD(failed_count, local->dot11FailedCount);
263 DEBUGFS_STATS_ADD(retry_count, local->dot11RetryCount);
264 DEBUGFS_STATS_ADD(multiple_retry_count,
265 local->dot11MultipleRetryCount);
266 DEBUGFS_STATS_ADD(frame_duplicate_count,
267 local->dot11FrameDuplicateCount);
268 DEBUGFS_STATS_ADD(received_fragment_count,
269 local->dot11ReceivedFragmentCount);
270 DEBUGFS_STATS_ADD(multicast_received_frame_count,
271 local->dot11MulticastReceivedFrameCount);
272 DEBUGFS_STATS_ADD(transmitted_frame_count,
273 local->dot11TransmittedFrameCount);
274#ifdef CONFIG_MAC80211_DEBUG_COUNTERS 261#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
275 DEBUGFS_STATS_ADD(tx_handlers_drop, local->tx_handlers_drop); 262 DEBUGFS_STATS_ADD(dot11TransmittedFragmentCount);
276 DEBUGFS_STATS_ADD(tx_handlers_queued, local->tx_handlers_queued); 263 DEBUGFS_STATS_ADD(dot11MulticastTransmittedFrameCount);
277 DEBUGFS_STATS_ADD(tx_handlers_drop_fragment, 264 DEBUGFS_STATS_ADD(dot11FailedCount);
278 local->tx_handlers_drop_fragment); 265 DEBUGFS_STATS_ADD(dot11RetryCount);
279 DEBUGFS_STATS_ADD(tx_handlers_drop_wep, 266 DEBUGFS_STATS_ADD(dot11MultipleRetryCount);
280 local->tx_handlers_drop_wep); 267 DEBUGFS_STATS_ADD(dot11FrameDuplicateCount);
281 DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc, 268 DEBUGFS_STATS_ADD(dot11ReceivedFragmentCount);
282 local->tx_handlers_drop_not_assoc); 269 DEBUGFS_STATS_ADD(dot11MulticastReceivedFrameCount);
283 DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port, 270 DEBUGFS_STATS_ADD(dot11TransmittedFrameCount);
284 local->tx_handlers_drop_unauth_port); 271 DEBUGFS_STATS_ADD(tx_handlers_drop);
285 DEBUGFS_STATS_ADD(rx_handlers_drop, local->rx_handlers_drop); 272 DEBUGFS_STATS_ADD(tx_handlers_queued);
286 DEBUGFS_STATS_ADD(rx_handlers_queued, local->rx_handlers_queued); 273 DEBUGFS_STATS_ADD(tx_handlers_drop_wep);
287 DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc, 274 DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc);
288 local->rx_handlers_drop_nullfunc); 275 DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port);
289 DEBUGFS_STATS_ADD(rx_handlers_drop_defrag, 276 DEBUGFS_STATS_ADD(rx_handlers_drop);
290 local->rx_handlers_drop_defrag); 277 DEBUGFS_STATS_ADD(rx_handlers_queued);
291 DEBUGFS_STATS_ADD(rx_handlers_drop_short, 278 DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc);
292 local->rx_handlers_drop_short); 279 DEBUGFS_STATS_ADD(rx_handlers_drop_defrag);
293 DEBUGFS_STATS_ADD(tx_expand_skb_head, 280 DEBUGFS_STATS_ADD(rx_handlers_drop_short);
294 local->tx_expand_skb_head); 281 DEBUGFS_STATS_ADD(tx_expand_skb_head);
295 DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned, 282 DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned);
296 local->tx_expand_skb_head_cloned); 283 DEBUGFS_STATS_ADD(rx_expand_skb_head_defrag);
297 DEBUGFS_STATS_ADD(rx_expand_skb_head, 284 DEBUGFS_STATS_ADD(rx_handlers_fragments);
298 local->rx_expand_skb_head); 285 DEBUGFS_STATS_ADD(tx_status_drop);
299 DEBUGFS_STATS_ADD(rx_expand_skb_head2,
300 local->rx_expand_skb_head2);
301 DEBUGFS_STATS_ADD(rx_handlers_fragments,
302 local->rx_handlers_fragments);
303 DEBUGFS_STATS_ADD(tx_status_drop,
304 local->tx_status_drop);
305#endif 286#endif
306 DEBUGFS_DEVSTATS_ADD(dot11ACKFailureCount); 287 DEBUGFS_DEVSTATS_ADD(dot11ACKFailureCount);
307 DEBUGFS_DEVSTATS_ADD(dot11RTSFailureCount); 288 DEBUGFS_DEVSTATS_ADD(dot11RTSFailureCount);
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 71ac1b5f4da5..e82bf1e9d7a8 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -95,28 +95,13 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
95 break; 95 break;
96 case WLAN_CIPHER_SUITE_CCMP: 96 case WLAN_CIPHER_SUITE_CCMP:
97 case WLAN_CIPHER_SUITE_CCMP_256: 97 case WLAN_CIPHER_SUITE_CCMP_256:
98 pn = atomic64_read(&key->u.ccmp.tx_pn);
99 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
100 (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
101 (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
102 break;
103 case WLAN_CIPHER_SUITE_AES_CMAC: 98 case WLAN_CIPHER_SUITE_AES_CMAC:
104 case WLAN_CIPHER_SUITE_BIP_CMAC_256: 99 case WLAN_CIPHER_SUITE_BIP_CMAC_256:
105 pn = atomic64_read(&key->u.aes_cmac.tx_pn);
106 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
107 (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
108 (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
109 break;
110 case WLAN_CIPHER_SUITE_BIP_GMAC_128: 100 case WLAN_CIPHER_SUITE_BIP_GMAC_128:
111 case WLAN_CIPHER_SUITE_BIP_GMAC_256: 101 case WLAN_CIPHER_SUITE_BIP_GMAC_256:
112 pn = atomic64_read(&key->u.aes_gmac.tx_pn);
113 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
114 (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
115 (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
116 break;
117 case WLAN_CIPHER_SUITE_GCMP: 102 case WLAN_CIPHER_SUITE_GCMP:
118 case WLAN_CIPHER_SUITE_GCMP_256: 103 case WLAN_CIPHER_SUITE_GCMP_256:
119 pn = atomic64_read(&key->u.gcmp.tx_pn); 104 pn = atomic64_read(&key->conf.tx_pn);
120 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", 105 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
121 (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24), 106 (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
122 (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn); 107 (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 252859e90e8a..06d52935036d 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -29,8 +29,6 @@ static ssize_t sta_ ##name## _read(struct file *file, \
29 format_string, sta->field); \ 29 format_string, sta->field); \
30} 30}
31#define STA_READ_D(name, field) STA_READ(name, field, "%d\n") 31#define STA_READ_D(name, field) STA_READ(name, field, "%d\n")
32#define STA_READ_U(name, field) STA_READ(name, field, "%u\n")
33#define STA_READ_S(name, field) STA_READ(name, field, "%s\n")
34 32
35#define STA_OPS(name) \ 33#define STA_OPS(name) \
36static const struct file_operations sta_ ##name## _ops = { \ 34static const struct file_operations sta_ ##name## _ops = { \
@@ -52,10 +50,7 @@ static const struct file_operations sta_ ##name## _ops = { \
52 STA_OPS(name) 50 STA_OPS(name)
53 51
54STA_FILE(aid, sta.aid, D); 52STA_FILE(aid, sta.aid, D);
55STA_FILE(dev, sdata->name, S);
56STA_FILE(last_signal, last_signal, D);
57STA_FILE(last_ack_signal, last_ack_signal, D); 53STA_FILE(last_ack_signal, last_ack_signal, D);
58STA_FILE(beacon_loss_count, beacon_loss_count, D);
59 54
60static ssize_t sta_flags_read(struct file *file, char __user *userbuf, 55static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
61 size_t count, loff_t *ppos) 56 size_t count, loff_t *ppos)
@@ -101,40 +96,6 @@ static ssize_t sta_num_ps_buf_frames_read(struct file *file,
101} 96}
102STA_OPS(num_ps_buf_frames); 97STA_OPS(num_ps_buf_frames);
103 98
104static ssize_t sta_inactive_ms_read(struct file *file, char __user *userbuf,
105 size_t count, loff_t *ppos)
106{
107 struct sta_info *sta = file->private_data;
108 return mac80211_format_buffer(userbuf, count, ppos, "%d\n",
109 jiffies_to_msecs(jiffies - sta->last_rx));
110}
111STA_OPS(inactive_ms);
112
113
114static ssize_t sta_connected_time_read(struct file *file, char __user *userbuf,
115 size_t count, loff_t *ppos)
116{
117 struct sta_info *sta = file->private_data;
118 struct timespec uptime;
119 struct tm result;
120 long connected_time_secs;
121 char buf[100];
122 int res;
123 ktime_get_ts(&uptime);
124 connected_time_secs = uptime.tv_sec - sta->last_connected;
125 time_to_tm(connected_time_secs, 0, &result);
126 result.tm_year -= 70;
127 result.tm_mday -= 1;
128 res = scnprintf(buf, sizeof(buf),
129 "years - %ld\nmonths - %d\ndays - %d\nclock - %d:%d:%d\n\n",
130 result.tm_year, result.tm_mon, result.tm_mday,
131 result.tm_hour, result.tm_min, result.tm_sec);
132 return simple_read_from_buffer(userbuf, count, ppos, buf, res);
133}
134STA_OPS(connected_time);
135
136
137
138static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf, 99static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf,
139 size_t count, loff_t *ppos) 100 size_t count, loff_t *ppos)
140{ 101{
@@ -359,37 +320,6 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
359} 320}
360STA_OPS(vht_capa); 321STA_OPS(vht_capa);
361 322
362static ssize_t sta_current_tx_rate_read(struct file *file, char __user *userbuf,
363 size_t count, loff_t *ppos)
364{
365 struct sta_info *sta = file->private_data;
366 struct rate_info rinfo;
367 u16 rate;
368 sta_set_rate_info_tx(sta, &sta->last_tx_rate, &rinfo);
369 rate = cfg80211_calculate_bitrate(&rinfo);
370
371 return mac80211_format_buffer(userbuf, count, ppos,
372 "%d.%d MBit/s\n",
373 rate/10, rate%10);
374}
375STA_OPS(current_tx_rate);
376
377static ssize_t sta_last_rx_rate_read(struct file *file, char __user *userbuf,
378 size_t count, loff_t *ppos)
379{
380 struct sta_info *sta = file->private_data;
381 struct rate_info rinfo;
382 u16 rate;
383
384 sta_set_rate_info_rx(sta, &rinfo);
385
386 rate = cfg80211_calculate_bitrate(&rinfo);
387
388 return mac80211_format_buffer(userbuf, count, ppos,
389 "%d.%d MBit/s\n",
390 rate/10, rate%10);
391}
392STA_OPS(last_rx_rate);
393 323
394#define DEBUGFS_ADD(name) \ 324#define DEBUGFS_ADD(name) \
395 debugfs_create_file(#name, 0400, \ 325 debugfs_create_file(#name, 0400, \
@@ -432,30 +362,15 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
432 362
433 DEBUGFS_ADD(flags); 363 DEBUGFS_ADD(flags);
434 DEBUGFS_ADD(num_ps_buf_frames); 364 DEBUGFS_ADD(num_ps_buf_frames);
435 DEBUGFS_ADD(inactive_ms);
436 DEBUGFS_ADD(connected_time);
437 DEBUGFS_ADD(last_seq_ctrl); 365 DEBUGFS_ADD(last_seq_ctrl);
438 DEBUGFS_ADD(agg_status); 366 DEBUGFS_ADD(agg_status);
439 DEBUGFS_ADD(dev);
440 DEBUGFS_ADD(last_signal);
441 DEBUGFS_ADD(beacon_loss_count);
442 DEBUGFS_ADD(ht_capa); 367 DEBUGFS_ADD(ht_capa);
443 DEBUGFS_ADD(vht_capa); 368 DEBUGFS_ADD(vht_capa);
444 DEBUGFS_ADD(last_ack_signal); 369 DEBUGFS_ADD(last_ack_signal);
445 DEBUGFS_ADD(current_tx_rate);
446 DEBUGFS_ADD(last_rx_rate);
447 370
448 DEBUGFS_ADD_COUNTER(rx_packets, rx_packets);
449 DEBUGFS_ADD_COUNTER(tx_packets, tx_packets);
450 DEBUGFS_ADD_COUNTER(rx_bytes, rx_bytes);
451 DEBUGFS_ADD_COUNTER(tx_bytes, tx_bytes);
452 DEBUGFS_ADD_COUNTER(rx_duplicates, num_duplicates); 371 DEBUGFS_ADD_COUNTER(rx_duplicates, num_duplicates);
453 DEBUGFS_ADD_COUNTER(rx_fragments, rx_fragments); 372 DEBUGFS_ADD_COUNTER(rx_fragments, rx_fragments);
454 DEBUGFS_ADD_COUNTER(rx_dropped, rx_dropped);
455 DEBUGFS_ADD_COUNTER(tx_fragments, tx_fragments);
456 DEBUGFS_ADD_COUNTER(tx_filtered, tx_filtered_count); 373 DEBUGFS_ADD_COUNTER(tx_filtered, tx_filtered_count);
457 DEBUGFS_ADD_COUNTER(tx_retry_failed, tx_retry_failed);
458 DEBUGFS_ADD_COUNTER(tx_retry_count, tx_retry_count);
459 374
460 if (sizeof(sta->driver_buffered_tids) == sizeof(u32)) 375 if (sizeof(sta->driver_buffered_tids) == sizeof(u32))
461 debugfs_create_x32("driver_buffered_tids", 0400, 376 debugfs_create_x32("driver_buffered_tids", 0400,
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 26e1ca8a474a..32a2e707e222 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -146,7 +146,7 @@ static inline int drv_add_interface(struct ieee80211_local *local,
146 146
147 if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN || 147 if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
148 (sdata->vif.type == NL80211_IFTYPE_MONITOR && 148 (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
149 !(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF) && 149 !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) &&
150 !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)))) 150 !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))))
151 return -EINVAL; 151 return -EINVAL;
152 152
@@ -417,12 +417,13 @@ static inline int drv_get_stats(struct ieee80211_local *local,
417 return ret; 417 return ret;
418} 418}
419 419
420static inline void drv_get_tkip_seq(struct ieee80211_local *local, 420static inline void drv_get_key_seq(struct ieee80211_local *local,
421 u8 hw_key_idx, u32 *iv32, u16 *iv16) 421 struct ieee80211_key *key,
422 struct ieee80211_key_seq *seq)
422{ 423{
423 if (local->ops->get_tkip_seq) 424 if (local->ops->get_key_seq)
424 local->ops->get_tkip_seq(&local->hw, hw_key_idx, iv32, iv16); 425 local->ops->get_key_seq(&local->hw, &key->conf, seq);
425 trace_drv_get_tkip_seq(local, hw_key_idx, iv32, iv16); 426 trace_drv_get_key_seq(local, &key->conf);
426} 427}
427 428
428static inline int drv_set_frag_threshold(struct ieee80211_local *local, 429static inline int drv_set_frag_threshold(struct ieee80211_local *local,
diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c
index 52bcea6ad9e8..188faab11c24 100644
--- a/net/mac80211/ethtool.c
+++ b/net/mac80211/ethtool.c
@@ -38,7 +38,7 @@ static void ieee80211_get_ringparam(struct net_device *dev,
38static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = { 38static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = {
39 "rx_packets", "rx_bytes", 39 "rx_packets", "rx_bytes",
40 "rx_duplicates", "rx_fragments", "rx_dropped", 40 "rx_duplicates", "rx_fragments", "rx_dropped",
41 "tx_packets", "tx_bytes", "tx_fragments", 41 "tx_packets", "tx_bytes",
42 "tx_filtered", "tx_retry_failed", "tx_retries", 42 "tx_filtered", "tx_retry_failed", "tx_retries",
43 "beacon_loss", "sta_state", "txrate", "rxrate", "signal", 43 "beacon_loss", "sta_state", "txrate", "rxrate", "signal",
44 "channel", "noise", "ch_time", "ch_time_busy", 44 "channel", "noise", "ch_time", "ch_time_busy",
@@ -87,7 +87,6 @@ static void ieee80211_get_stats(struct net_device *dev,
87 \ 87 \
88 data[i++] += sinfo.tx_packets; \ 88 data[i++] += sinfo.tx_packets; \
89 data[i++] += sinfo.tx_bytes; \ 89 data[i++] += sinfo.tx_bytes; \
90 data[i++] += sta->tx_fragments; \
91 data[i++] += sta->tx_filtered_count; \ 90 data[i++] += sta->tx_filtered_count; \
92 data[i++] += sta->tx_retry_failed; \ 91 data[i++] += sta->tx_retry_failed; \
93 data[i++] += sta->tx_retry_count; \ 92 data[i++] += sta->tx_retry_count; \
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index bfef1b215050..7f72bc9bae2e 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -146,6 +146,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
146 csa_settings->chandef.chan->center_freq); 146 csa_settings->chandef.chan->center_freq);
147 presp->csa_counter_offsets[0] = (pos - presp->head); 147 presp->csa_counter_offsets[0] = (pos - presp->head);
148 *pos++ = csa_settings->count; 148 *pos++ = csa_settings->count;
149 presp->csa_current_counter = csa_settings->count;
149 } 150 }
150 151
151 /* put the remaining rates in WLAN_EID_EXT_SUPP_RATES */ 152 /* put the remaining rates in WLAN_EID_EXT_SUPP_RATES */
@@ -1031,8 +1032,11 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
1031 } 1032 }
1032 } 1033 }
1033 1034
1034 if (sta && elems->wmm_info && local->hw.queues >= IEEE80211_NUM_ACS) 1035 if (sta && !sta->sta.wme &&
1036 elems->wmm_info && local->hw.queues >= IEEE80211_NUM_ACS) {
1035 sta->sta.wme = true; 1037 sta->sta.wme = true;
1038 ieee80211_check_fast_xmit(sta);
1039 }
1036 1040
1037 if (sta && elems->ht_operation && elems->ht_cap_elem && 1041 if (sta && elems->ht_operation && elems->ht_cap_elem &&
1038 sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT && 1042 sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT &&
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c0a9187bc3a9..b12f61507f9f 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -181,8 +181,6 @@ typedef unsigned __bitwise__ ieee80211_rx_result;
181 181
182/** 182/**
183 * enum ieee80211_packet_rx_flags - packet RX flags 183 * enum ieee80211_packet_rx_flags - packet RX flags
184 * @IEEE80211_RX_RA_MATCH: frame is destined to interface currently processed
185 * (incl. multicast frames)
186 * @IEEE80211_RX_FRAGMENTED: fragmented frame 184 * @IEEE80211_RX_FRAGMENTED: fragmented frame
187 * @IEEE80211_RX_AMSDU: a-MSDU packet 185 * @IEEE80211_RX_AMSDU: a-MSDU packet
188 * @IEEE80211_RX_MALFORMED_ACTION_FRM: action frame is malformed 186 * @IEEE80211_RX_MALFORMED_ACTION_FRM: action frame is malformed
@@ -192,7 +190,6 @@ typedef unsigned __bitwise__ ieee80211_rx_result;
192 * @rx_flags field of &struct ieee80211_rx_status. 190 * @rx_flags field of &struct ieee80211_rx_status.
193 */ 191 */
194enum ieee80211_packet_rx_flags { 192enum ieee80211_packet_rx_flags {
195 IEEE80211_RX_RA_MATCH = BIT(1),
196 IEEE80211_RX_FRAGMENTED = BIT(2), 193 IEEE80211_RX_FRAGMENTED = BIT(2),
197 IEEE80211_RX_AMSDU = BIT(3), 194 IEEE80211_RX_AMSDU = BIT(3),
198 IEEE80211_RX_MALFORMED_ACTION_FRM = BIT(4), 195 IEEE80211_RX_MALFORMED_ACTION_FRM = BIT(4),
@@ -722,7 +719,6 @@ struct ieee80211_if_mesh {
722 * enum ieee80211_sub_if_data_flags - virtual interface flags 719 * enum ieee80211_sub_if_data_flags - virtual interface flags
723 * 720 *
724 * @IEEE80211_SDATA_ALLMULTI: interface wants all multicast packets 721 * @IEEE80211_SDATA_ALLMULTI: interface wants all multicast packets
725 * @IEEE80211_SDATA_PROMISC: interface is promisc
726 * @IEEE80211_SDATA_OPERATING_GMODE: operating in G-only mode 722 * @IEEE80211_SDATA_OPERATING_GMODE: operating in G-only mode
727 * @IEEE80211_SDATA_DONT_BRIDGE_PACKETS: bridge packets between 723 * @IEEE80211_SDATA_DONT_BRIDGE_PACKETS: bridge packets between
728 * associated stations and deliver multicast frames both 724 * associated stations and deliver multicast frames both
@@ -732,7 +728,6 @@ struct ieee80211_if_mesh {
732 */ 728 */
733enum ieee80211_sub_if_data_flags { 729enum ieee80211_sub_if_data_flags {
734 IEEE80211_SDATA_ALLMULTI = BIT(0), 730 IEEE80211_SDATA_ALLMULTI = BIT(0),
735 IEEE80211_SDATA_PROMISC = BIT(1),
736 IEEE80211_SDATA_OPERATING_GMODE = BIT(2), 731 IEEE80211_SDATA_OPERATING_GMODE = BIT(2),
737 IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3), 732 IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3),
738 IEEE80211_SDATA_DISCONNECT_RESUME = BIT(4), 733 IEEE80211_SDATA_DISCONNECT_RESUME = BIT(4),
@@ -1040,7 +1035,6 @@ enum queue_stop_reason {
1040 1035
1041#ifdef CONFIG_MAC80211_LEDS 1036#ifdef CONFIG_MAC80211_LEDS
1042struct tpt_led_trigger { 1037struct tpt_led_trigger {
1043 struct led_trigger trig;
1044 char name[32]; 1038 char name[32];
1045 const struct ieee80211_tpt_blink *blink_table; 1039 const struct ieee80211_tpt_blink *blink_table;
1046 unsigned int blink_table_len; 1040 unsigned int blink_table_len;
@@ -1208,8 +1202,8 @@ struct ieee80211_local {
1208 1202
1209 atomic_t agg_queue_stop[IEEE80211_MAX_QUEUES]; 1203 atomic_t agg_queue_stop[IEEE80211_MAX_QUEUES];
1210 1204
1211 /* number of interfaces with corresponding IFF_ flags */ 1205 /* number of interfaces with allmulti RX */
1212 atomic_t iff_allmultis, iff_promiscs; 1206 atomic_t iff_allmultis;
1213 1207
1214 struct rate_control_ref *rate_ctrl; 1208 struct rate_control_ref *rate_ctrl;
1215 1209
@@ -1261,6 +1255,15 @@ struct ieee80211_local {
1261 struct list_head chanctx_list; 1255 struct list_head chanctx_list;
1262 struct mutex chanctx_mtx; 1256 struct mutex chanctx_mtx;
1263 1257
1258#ifdef CONFIG_MAC80211_LEDS
1259 struct led_trigger tx_led, rx_led, assoc_led, radio_led;
1260 struct led_trigger tpt_led;
1261 atomic_t tx_led_active, rx_led_active, assoc_led_active;
1262 atomic_t radio_led_active, tpt_led_active;
1263 struct tpt_led_trigger *tpt_led_trigger;
1264#endif
1265
1266#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
1264 /* SNMP counters */ 1267 /* SNMP counters */
1265 /* dot11CountersTable */ 1268 /* dot11CountersTable */
1266 u32 dot11TransmittedFragmentCount; 1269 u32 dot11TransmittedFragmentCount;
@@ -1273,18 +1276,9 @@ struct ieee80211_local {
1273 u32 dot11MulticastReceivedFrameCount; 1276 u32 dot11MulticastReceivedFrameCount;
1274 u32 dot11TransmittedFrameCount; 1277 u32 dot11TransmittedFrameCount;
1275 1278
1276#ifdef CONFIG_MAC80211_LEDS
1277 struct led_trigger *tx_led, *rx_led, *assoc_led, *radio_led;
1278 struct tpt_led_trigger *tpt_led_trigger;
1279 char tx_led_name[32], rx_led_name[32],
1280 assoc_led_name[32], radio_led_name[32];
1281#endif
1282
1283#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
1284 /* TX/RX handler statistics */ 1279 /* TX/RX handler statistics */
1285 unsigned int tx_handlers_drop; 1280 unsigned int tx_handlers_drop;
1286 unsigned int tx_handlers_queued; 1281 unsigned int tx_handlers_queued;
1287 unsigned int tx_handlers_drop_fragment;
1288 unsigned int tx_handlers_drop_wep; 1282 unsigned int tx_handlers_drop_wep;
1289 unsigned int tx_handlers_drop_not_assoc; 1283 unsigned int tx_handlers_drop_not_assoc;
1290 unsigned int tx_handlers_drop_unauth_port; 1284 unsigned int tx_handlers_drop_unauth_port;
@@ -1295,8 +1289,7 @@ struct ieee80211_local {
1295 unsigned int rx_handlers_drop_short; 1289 unsigned int rx_handlers_drop_short;
1296 unsigned int tx_expand_skb_head; 1290 unsigned int tx_expand_skb_head;
1297 unsigned int tx_expand_skb_head_cloned; 1291 unsigned int tx_expand_skb_head_cloned;
1298 unsigned int rx_expand_skb_head; 1292 unsigned int rx_expand_skb_head_defrag;
1299 unsigned int rx_expand_skb_head2;
1300 unsigned int rx_handlers_fragments; 1293 unsigned int rx_handlers_fragments;
1301 unsigned int tx_status_drop; 1294 unsigned int tx_status_drop;
1302#define I802_DEBUG_INC(c) (c)++ 1295#define I802_DEBUG_INC(c) (c)++
@@ -1648,6 +1641,11 @@ struct sk_buff *
1648ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata, 1641ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata,
1649 struct sk_buff *skb, u32 info_flags); 1642 struct sk_buff *skb, u32 info_flags);
1650 1643
1644void ieee80211_check_fast_xmit(struct sta_info *sta);
1645void ieee80211_check_fast_xmit_all(struct ieee80211_local *local);
1646void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata);
1647void ieee80211_clear_fast_xmit(struct sta_info *sta);
1648
1651/* HT */ 1649/* HT */
1652void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, 1650void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
1653 struct ieee80211_sta_ht_cap *ht_cap); 1651 struct ieee80211_sta_ht_cap *ht_cap);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 84cef600c573..ed1edac14372 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -338,7 +338,7 @@ static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata,
338 if ((iftype != NL80211_IFTYPE_AP && 338 if ((iftype != NL80211_IFTYPE_AP &&
339 iftype != NL80211_IFTYPE_P2P_GO && 339 iftype != NL80211_IFTYPE_P2P_GO &&
340 iftype != NL80211_IFTYPE_MESH_POINT) || 340 iftype != NL80211_IFTYPE_MESH_POINT) ||
341 !(sdata->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)) { 341 !ieee80211_hw_check(&sdata->local->hw, QUEUE_CONTROL)) {
342 sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE; 342 sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE;
343 return 0; 343 return 0;
344 } 344 }
@@ -378,7 +378,7 @@ static void ieee80211_set_default_queues(struct ieee80211_sub_if_data *sdata)
378 int i; 378 int i;
379 379
380 for (i = 0; i < IEEE80211_NUM_ACS; i++) { 380 for (i = 0; i < IEEE80211_NUM_ACS; i++) {
381 if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) 381 if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL))
382 sdata->vif.hw_queue[i] = IEEE80211_INVAL_HW_QUEUE; 382 sdata->vif.hw_queue[i] = IEEE80211_INVAL_HW_QUEUE;
383 else if (local->hw.queues >= IEEE80211_NUM_ACS) 383 else if (local->hw.queues >= IEEE80211_NUM_ACS)
384 sdata->vif.hw_queue[i] = i; 384 sdata->vif.hw_queue[i] = i;
@@ -393,7 +393,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
393 struct ieee80211_sub_if_data *sdata; 393 struct ieee80211_sub_if_data *sdata;
394 int ret; 394 int ret;
395 395
396 if (!(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF)) 396 if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
397 return 0; 397 return 0;
398 398
399 ASSERT_RTNL(); 399 ASSERT_RTNL();
@@ -454,7 +454,7 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
454{ 454{
455 struct ieee80211_sub_if_data *sdata; 455 struct ieee80211_sub_if_data *sdata;
456 456
457 if (!(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF)) 457 if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
458 return; 458 return;
459 459
460 ASSERT_RTNL(); 460 ASSERT_RTNL();
@@ -703,9 +703,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
703 if (sdata->flags & IEEE80211_SDATA_ALLMULTI) 703 if (sdata->flags & IEEE80211_SDATA_ALLMULTI)
704 atomic_inc(&local->iff_allmultis); 704 atomic_inc(&local->iff_allmultis);
705 705
706 if (sdata->flags & IEEE80211_SDATA_PROMISC)
707 atomic_inc(&local->iff_promiscs);
708
709 if (coming_up) 706 if (coming_up)
710 local->open_count++; 707 local->open_count++;
711 708
@@ -835,13 +832,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
835 ((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) || 832 ((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) ||
836 (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1))); 833 (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1)));
837 834
838 /* don't count this interface for promisc/allmulti while it is down */ 835 /* don't count this interface for allmulti while it is down */
839 if (sdata->flags & IEEE80211_SDATA_ALLMULTI) 836 if (sdata->flags & IEEE80211_SDATA_ALLMULTI)
840 atomic_dec(&local->iff_allmultis); 837 atomic_dec(&local->iff_allmultis);
841 838
842 if (sdata->flags & IEEE80211_SDATA_PROMISC)
843 atomic_dec(&local->iff_promiscs);
844
845 if (sdata->vif.type == NL80211_IFTYPE_AP) { 839 if (sdata->vif.type == NL80211_IFTYPE_AP) {
846 local->fif_pspoll--; 840 local->fif_pspoll--;
847 local->fif_probe_req--; 841 local->fif_probe_req--;
@@ -1055,12 +1049,10 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
1055{ 1049{
1056 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1050 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1057 struct ieee80211_local *local = sdata->local; 1051 struct ieee80211_local *local = sdata->local;
1058 int allmulti, promisc, sdata_allmulti, sdata_promisc; 1052 int allmulti, sdata_allmulti;
1059 1053
1060 allmulti = !!(dev->flags & IFF_ALLMULTI); 1054 allmulti = !!(dev->flags & IFF_ALLMULTI);
1061 promisc = !!(dev->flags & IFF_PROMISC);
1062 sdata_allmulti = !!(sdata->flags & IEEE80211_SDATA_ALLMULTI); 1055 sdata_allmulti = !!(sdata->flags & IEEE80211_SDATA_ALLMULTI);
1063 sdata_promisc = !!(sdata->flags & IEEE80211_SDATA_PROMISC);
1064 1056
1065 if (allmulti != sdata_allmulti) { 1057 if (allmulti != sdata_allmulti) {
1066 if (dev->flags & IFF_ALLMULTI) 1058 if (dev->flags & IFF_ALLMULTI)
@@ -1070,13 +1062,6 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
1070 sdata->flags ^= IEEE80211_SDATA_ALLMULTI; 1062 sdata->flags ^= IEEE80211_SDATA_ALLMULTI;
1071 } 1063 }
1072 1064
1073 if (promisc != sdata_promisc) {
1074 if (dev->flags & IFF_PROMISC)
1075 atomic_inc(&local->iff_promiscs);
1076 else
1077 atomic_dec(&local->iff_promiscs);
1078 sdata->flags ^= IEEE80211_SDATA_PROMISC;
1079 }
1080 spin_lock_bh(&local->filter_lock); 1065 spin_lock_bh(&local->filter_lock);
1081 __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len); 1066 __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len);
1082 spin_unlock_bh(&local->filter_lock); 1067 spin_unlock_bh(&local->filter_lock);
@@ -1117,6 +1102,35 @@ static u16 ieee80211_netdev_select_queue(struct net_device *dev,
1117 return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb); 1102 return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb);
1118} 1103}
1119 1104
1105static struct rtnl_link_stats64 *
1106ieee80211_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
1107{
1108 int i;
1109
1110 for_each_possible_cpu(i) {
1111 const struct pcpu_sw_netstats *tstats;
1112 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
1113 unsigned int start;
1114
1115 tstats = per_cpu_ptr(dev->tstats, i);
1116
1117 do {
1118 start = u64_stats_fetch_begin_irq(&tstats->syncp);
1119 rx_packets = tstats->rx_packets;
1120 tx_packets = tstats->tx_packets;
1121 rx_bytes = tstats->rx_bytes;
1122 tx_bytes = tstats->tx_bytes;
1123 } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
1124
1125 stats->rx_packets += rx_packets;
1126 stats->tx_packets += tx_packets;
1127 stats->rx_bytes += rx_bytes;
1128 stats->tx_bytes += tx_bytes;
1129 }
1130
1131 return stats;
1132}
1133
1120static const struct net_device_ops ieee80211_dataif_ops = { 1134static const struct net_device_ops ieee80211_dataif_ops = {
1121 .ndo_open = ieee80211_open, 1135 .ndo_open = ieee80211_open,
1122 .ndo_stop = ieee80211_stop, 1136 .ndo_stop = ieee80211_stop,
@@ -1126,6 +1140,7 @@ static const struct net_device_ops ieee80211_dataif_ops = {
1126 .ndo_change_mtu = ieee80211_change_mtu, 1140 .ndo_change_mtu = ieee80211_change_mtu,
1127 .ndo_set_mac_address = ieee80211_change_mac, 1141 .ndo_set_mac_address = ieee80211_change_mac,
1128 .ndo_select_queue = ieee80211_netdev_select_queue, 1142 .ndo_select_queue = ieee80211_netdev_select_queue,
1143 .ndo_get_stats64 = ieee80211_get_stats64,
1129}; 1144};
1130 1145
1131static u16 ieee80211_monitor_select_queue(struct net_device *dev, 1146static u16 ieee80211_monitor_select_queue(struct net_device *dev,
@@ -1159,14 +1174,21 @@ static const struct net_device_ops ieee80211_monitorif_ops = {
1159 .ndo_change_mtu = ieee80211_change_mtu, 1174 .ndo_change_mtu = ieee80211_change_mtu,
1160 .ndo_set_mac_address = ieee80211_change_mac, 1175 .ndo_set_mac_address = ieee80211_change_mac,
1161 .ndo_select_queue = ieee80211_monitor_select_queue, 1176 .ndo_select_queue = ieee80211_monitor_select_queue,
1177 .ndo_get_stats64 = ieee80211_get_stats64,
1162}; 1178};
1163 1179
1180static void ieee80211_if_free(struct net_device *dev)
1181{
1182 free_percpu(dev->tstats);
1183 free_netdev(dev);
1184}
1185
1164static void ieee80211_if_setup(struct net_device *dev) 1186static void ieee80211_if_setup(struct net_device *dev)
1165{ 1187{
1166 ether_setup(dev); 1188 ether_setup(dev);
1167 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1189 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1168 dev->netdev_ops = &ieee80211_dataif_ops; 1190 dev->netdev_ops = &ieee80211_dataif_ops;
1169 dev->destructor = free_netdev; 1191 dev->destructor = ieee80211_if_free;
1170} 1192}
1171 1193
1172static void ieee80211_iface_work(struct work_struct *work) 1194static void ieee80211_iface_work(struct work_struct *work)
@@ -1564,7 +1586,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
1564 break; 1586 break;
1565 case NL80211_IFTYPE_P2P_CLIENT: 1587 case NL80211_IFTYPE_P2P_CLIENT:
1566 case NL80211_IFTYPE_P2P_GO: 1588 case NL80211_IFTYPE_P2P_GO:
1567 if (local->hw.flags & IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF) { 1589 if (ieee80211_hw_check(&local->hw, P2P_DEV_ADDR_FOR_INTF)) {
1568 list_for_each_entry(sdata, &local->interfaces, list) { 1590 list_for_each_entry(sdata, &local->interfaces, list) {
1569 if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) 1591 if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE)
1570 continue; 1592 continue;
@@ -1707,6 +1729,12 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
1707 return -ENOMEM; 1729 return -ENOMEM;
1708 dev_net_set(ndev, wiphy_net(local->hw.wiphy)); 1730 dev_net_set(ndev, wiphy_net(local->hw.wiphy));
1709 1731
1732 ndev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1733 if (!ndev->tstats) {
1734 free_netdev(ndev);
1735 return -ENOMEM;
1736 }
1737
1710 ndev->needed_headroom = local->tx_headroom + 1738 ndev->needed_headroom = local->tx_headroom +
1711 4*6 /* four MAC addresses */ 1739 4*6 /* four MAC addresses */
1712 + 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */ 1740 + 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index a907f2d5c12d..b22df3a79a41 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -66,12 +66,15 @@ update_vlan_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta)
66 if (sdata->vif.type != NL80211_IFTYPE_AP) 66 if (sdata->vif.type != NL80211_IFTYPE_AP)
67 return; 67 return;
68 68
69 mutex_lock(&sdata->local->mtx); 69 /* crypto_tx_tailroom_needed_cnt is protected by this */
70 assert_key_lock(sdata->local);
71
72 rcu_read_lock();
70 73
71 list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) 74 list_for_each_entry_rcu(vlan, &sdata->u.ap.vlans, u.vlan.list)
72 vlan->crypto_tx_tailroom_needed_cnt += delta; 75 vlan->crypto_tx_tailroom_needed_cnt += delta;
73 76
74 mutex_unlock(&sdata->local->mtx); 77 rcu_read_unlock();
75} 78}
76 79
77static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) 80static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata)
@@ -95,6 +98,8 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata)
95 * http://mid.gmane.org/1308590980.4322.19.camel@jlt3.sipsolutions.net 98 * http://mid.gmane.org/1308590980.4322.19.camel@jlt3.sipsolutions.net
96 */ 99 */
97 100
101 assert_key_lock(sdata->local);
102
98 update_vlan_tailroom_need_count(sdata, 1); 103 update_vlan_tailroom_need_count(sdata, 1);
99 104
100 if (!sdata->crypto_tx_tailroom_needed_cnt++) { 105 if (!sdata->crypto_tx_tailroom_needed_cnt++) {
@@ -109,6 +114,8 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata)
109static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata, 114static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata,
110 int delta) 115 int delta)
111{ 116{
117 assert_key_lock(sdata->local);
118
112 WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt < delta); 119 WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt < delta);
113 120
114 update_vlan_tailroom_need_count(sdata, -delta); 121 update_vlan_tailroom_need_count(sdata, -delta);
@@ -147,7 +154,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
147 * is supported; if not, return. 154 * is supported; if not, return.
148 */ 155 */
149 if (sta && !(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE) && 156 if (sta && !(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE) &&
150 !(key->local->hw.flags & IEEE80211_HW_SUPPORTS_PER_STA_GTK)) 157 !ieee80211_hw_check(&key->local->hw, SUPPORTS_PER_STA_GTK))
151 goto out_unsupported; 158 goto out_unsupported;
152 159
153 if (sta && !sta->uploaded) 160 if (sta && !sta->uploaded)
@@ -201,7 +208,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
201 /* all of these we can do in software - if driver can */ 208 /* all of these we can do in software - if driver can */
202 if (ret == 1) 209 if (ret == 1)
203 return 0; 210 return 0;
204 if (key->local->hw.flags & IEEE80211_HW_SW_CRYPTO_CONTROL) 211 if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL))
205 return -EINVAL; 212 return -EINVAL;
206 return 0; 213 return 0;
207 default: 214 default:
@@ -256,6 +263,7 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
256 263
257 if (uni) { 264 if (uni) {
258 rcu_assign_pointer(sdata->default_unicast_key, key); 265 rcu_assign_pointer(sdata->default_unicast_key, key);
266 ieee80211_check_fast_xmit_iface(sdata);
259 drv_set_default_unicast_key(sdata->local, sdata, idx); 267 drv_set_default_unicast_key(sdata->local, sdata, idx);
260 } 268 }
261 269
@@ -325,6 +333,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
325 if (pairwise) { 333 if (pairwise) {
326 rcu_assign_pointer(sta->ptk[idx], new); 334 rcu_assign_pointer(sta->ptk[idx], new);
327 sta->ptk_idx = idx; 335 sta->ptk_idx = idx;
336 ieee80211_check_fast_xmit(sta);
328 } else { 337 } else {
329 rcu_assign_pointer(sta->gtk[idx], new); 338 rcu_assign_pointer(sta->gtk[idx], new);
330 sta->gtk_idx = idx; 339 sta->gtk_idx = idx;
@@ -510,15 +519,17 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
510 break; 519 break;
511 default: 520 default:
512 if (cs) { 521 if (cs) {
513 size_t len = (seq_len > MAX_PN_LEN) ? 522 if (seq_len && seq_len != cs->pn_len) {
514 MAX_PN_LEN : seq_len; 523 kfree(key);
524 return ERR_PTR(-EINVAL);
525 }
515 526
516 key->conf.iv_len = cs->hdr_len; 527 key->conf.iv_len = cs->hdr_len;
517 key->conf.icv_len = cs->mic_len; 528 key->conf.icv_len = cs->mic_len;
518 for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) 529 for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++)
519 for (j = 0; j < len; j++) 530 for (j = 0; j < seq_len; j++)
520 key->u.gen.rx_pn[i][j] = 531 key->u.gen.rx_pn[i][j] =
521 seq[len - j - 1]; 532 seq[seq_len - j - 1];
522 key->flags |= KEY_FLAG_CIPHER_SCHEME; 533 key->flags |= KEY_FLAG_CIPHER_SCHEME;
523 } 534 }
524 } 535 }
@@ -892,27 +903,19 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf,
892 break; 903 break;
893 case WLAN_CIPHER_SUITE_CCMP: 904 case WLAN_CIPHER_SUITE_CCMP:
894 case WLAN_CIPHER_SUITE_CCMP_256: 905 case WLAN_CIPHER_SUITE_CCMP_256:
895 pn64 = atomic64_read(&key->u.ccmp.tx_pn);
896 seq->ccmp.pn[5] = pn64;
897 seq->ccmp.pn[4] = pn64 >> 8;
898 seq->ccmp.pn[3] = pn64 >> 16;
899 seq->ccmp.pn[2] = pn64 >> 24;
900 seq->ccmp.pn[1] = pn64 >> 32;
901 seq->ccmp.pn[0] = pn64 >> 40;
902 break;
903 case WLAN_CIPHER_SUITE_AES_CMAC: 906 case WLAN_CIPHER_SUITE_AES_CMAC:
904 case WLAN_CIPHER_SUITE_BIP_CMAC_256: 907 case WLAN_CIPHER_SUITE_BIP_CMAC_256:
905 pn64 = atomic64_read(&key->u.aes_cmac.tx_pn); 908 BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
906 seq->ccmp.pn[5] = pn64; 909 offsetof(typeof(*seq), aes_cmac));
907 seq->ccmp.pn[4] = pn64 >> 8;
908 seq->ccmp.pn[3] = pn64 >> 16;
909 seq->ccmp.pn[2] = pn64 >> 24;
910 seq->ccmp.pn[1] = pn64 >> 32;
911 seq->ccmp.pn[0] = pn64 >> 40;
912 break;
913 case WLAN_CIPHER_SUITE_BIP_GMAC_128: 910 case WLAN_CIPHER_SUITE_BIP_GMAC_128:
914 case WLAN_CIPHER_SUITE_BIP_GMAC_256: 911 case WLAN_CIPHER_SUITE_BIP_GMAC_256:
915 pn64 = atomic64_read(&key->u.aes_gmac.tx_pn); 912 BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
913 offsetof(typeof(*seq), aes_gmac));
914 case WLAN_CIPHER_SUITE_GCMP:
915 case WLAN_CIPHER_SUITE_GCMP_256:
916 BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
917 offsetof(typeof(*seq), gcmp));
918 pn64 = atomic64_read(&key->conf.tx_pn);
916 seq->ccmp.pn[5] = pn64; 919 seq->ccmp.pn[5] = pn64;
917 seq->ccmp.pn[4] = pn64 >> 8; 920 seq->ccmp.pn[4] = pn64 >> 8;
918 seq->ccmp.pn[3] = pn64 >> 16; 921 seq->ccmp.pn[3] = pn64 >> 16;
@@ -920,16 +923,6 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf,
920 seq->ccmp.pn[1] = pn64 >> 32; 923 seq->ccmp.pn[1] = pn64 >> 32;
921 seq->ccmp.pn[0] = pn64 >> 40; 924 seq->ccmp.pn[0] = pn64 >> 40;
922 break; 925 break;
923 case WLAN_CIPHER_SUITE_GCMP:
924 case WLAN_CIPHER_SUITE_GCMP_256:
925 pn64 = atomic64_read(&key->u.gcmp.tx_pn);
926 seq->gcmp.pn[5] = pn64;
927 seq->gcmp.pn[4] = pn64 >> 8;
928 seq->gcmp.pn[3] = pn64 >> 16;
929 seq->gcmp.pn[2] = pn64 >> 24;
930 seq->gcmp.pn[1] = pn64 >> 32;
931 seq->gcmp.pn[0] = pn64 >> 40;
932 break;
933 default: 926 default:
934 WARN_ON(1); 927 WARN_ON(1);
935 } 928 }
@@ -1004,43 +997,25 @@ void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf,
1004 break; 997 break;
1005 case WLAN_CIPHER_SUITE_CCMP: 998 case WLAN_CIPHER_SUITE_CCMP:
1006 case WLAN_CIPHER_SUITE_CCMP_256: 999 case WLAN_CIPHER_SUITE_CCMP_256:
1007 pn64 = (u64)seq->ccmp.pn[5] |
1008 ((u64)seq->ccmp.pn[4] << 8) |
1009 ((u64)seq->ccmp.pn[3] << 16) |
1010 ((u64)seq->ccmp.pn[2] << 24) |
1011 ((u64)seq->ccmp.pn[1] << 32) |
1012 ((u64)seq->ccmp.pn[0] << 40);
1013 atomic64_set(&key->u.ccmp.tx_pn, pn64);
1014 break;
1015 case WLAN_CIPHER_SUITE_AES_CMAC: 1000 case WLAN_CIPHER_SUITE_AES_CMAC:
1016 case WLAN_CIPHER_SUITE_BIP_CMAC_256: 1001 case WLAN_CIPHER_SUITE_BIP_CMAC_256:
1017 pn64 = (u64)seq->aes_cmac.pn[5] | 1002 BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
1018 ((u64)seq->aes_cmac.pn[4] << 8) | 1003 offsetof(typeof(*seq), aes_cmac));
1019 ((u64)seq->aes_cmac.pn[3] << 16) |
1020 ((u64)seq->aes_cmac.pn[2] << 24) |
1021 ((u64)seq->aes_cmac.pn[1] << 32) |
1022 ((u64)seq->aes_cmac.pn[0] << 40);
1023 atomic64_set(&key->u.aes_cmac.tx_pn, pn64);
1024 break;
1025 case WLAN_CIPHER_SUITE_BIP_GMAC_128: 1004 case WLAN_CIPHER_SUITE_BIP_GMAC_128:
1026 case WLAN_CIPHER_SUITE_BIP_GMAC_256: 1005 case WLAN_CIPHER_SUITE_BIP_GMAC_256:
1027 pn64 = (u64)seq->aes_gmac.pn[5] | 1006 BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
1028 ((u64)seq->aes_gmac.pn[4] << 8) | 1007 offsetof(typeof(*seq), aes_gmac));
1029 ((u64)seq->aes_gmac.pn[3] << 16) |
1030 ((u64)seq->aes_gmac.pn[2] << 24) |
1031 ((u64)seq->aes_gmac.pn[1] << 32) |
1032 ((u64)seq->aes_gmac.pn[0] << 40);
1033 atomic64_set(&key->u.aes_gmac.tx_pn, pn64);
1034 break;
1035 case WLAN_CIPHER_SUITE_GCMP: 1008 case WLAN_CIPHER_SUITE_GCMP:
1036 case WLAN_CIPHER_SUITE_GCMP_256: 1009 case WLAN_CIPHER_SUITE_GCMP_256:
1037 pn64 = (u64)seq->gcmp.pn[5] | 1010 BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
1038 ((u64)seq->gcmp.pn[4] << 8) | 1011 offsetof(typeof(*seq), gcmp));
1039 ((u64)seq->gcmp.pn[3] << 16) | 1012 pn64 = (u64)seq->ccmp.pn[5] |
1040 ((u64)seq->gcmp.pn[2] << 24) | 1013 ((u64)seq->ccmp.pn[4] << 8) |
1041 ((u64)seq->gcmp.pn[1] << 32) | 1014 ((u64)seq->ccmp.pn[3] << 16) |
1042 ((u64)seq->gcmp.pn[0] << 40); 1015 ((u64)seq->ccmp.pn[2] << 24) |
1043 atomic64_set(&key->u.gcmp.tx_pn, pn64); 1016 ((u64)seq->ccmp.pn[1] << 32) |
1017 ((u64)seq->ccmp.pn[0] << 40);
1018 atomic64_set(&key->conf.tx_pn, pn64);
1044 break; 1019 break;
1045 default: 1020 default:
1046 WARN_ON(1); 1021 WARN_ON(1);
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 96557dd1e77d..3f4f9eaac140 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -18,7 +18,6 @@
18 18
19#define NUM_DEFAULT_KEYS 4 19#define NUM_DEFAULT_KEYS 4
20#define NUM_DEFAULT_MGMT_KEYS 2 20#define NUM_DEFAULT_MGMT_KEYS 2
21#define MAX_PN_LEN 16
22 21
23struct ieee80211_local; 22struct ieee80211_local;
24struct ieee80211_sub_if_data; 23struct ieee80211_sub_if_data;
@@ -78,7 +77,6 @@ struct ieee80211_key {
78 u32 mic_failures; 77 u32 mic_failures;
79 } tkip; 78 } tkip;
80 struct { 79 struct {
81 atomic64_t tx_pn;
82 /* 80 /*
83 * Last received packet number. The first 81 * Last received packet number. The first
84 * IEEE80211_NUM_TIDS counters are used with Data 82 * IEEE80211_NUM_TIDS counters are used with Data
@@ -90,21 +88,18 @@ struct ieee80211_key {
90 u32 replays; /* dot11RSNAStatsCCMPReplays */ 88 u32 replays; /* dot11RSNAStatsCCMPReplays */
91 } ccmp; 89 } ccmp;
92 struct { 90 struct {
93 atomic64_t tx_pn;
94 u8 rx_pn[IEEE80211_CMAC_PN_LEN]; 91 u8 rx_pn[IEEE80211_CMAC_PN_LEN];
95 struct crypto_cipher *tfm; 92 struct crypto_cipher *tfm;
96 u32 replays; /* dot11RSNAStatsCMACReplays */ 93 u32 replays; /* dot11RSNAStatsCMACReplays */
97 u32 icverrors; /* dot11RSNAStatsCMACICVErrors */ 94 u32 icverrors; /* dot11RSNAStatsCMACICVErrors */
98 } aes_cmac; 95 } aes_cmac;
99 struct { 96 struct {
100 atomic64_t tx_pn;
101 u8 rx_pn[IEEE80211_GMAC_PN_LEN]; 97 u8 rx_pn[IEEE80211_GMAC_PN_LEN];
102 struct crypto_aead *tfm; 98 struct crypto_aead *tfm;
103 u32 replays; /* dot11RSNAStatsCMACReplays */ 99 u32 replays; /* dot11RSNAStatsCMACReplays */
104 u32 icverrors; /* dot11RSNAStatsCMACICVErrors */ 100 u32 icverrors; /* dot11RSNAStatsCMACICVErrors */
105 } aes_gmac; 101 } aes_gmac;
106 struct { 102 struct {
107 atomic64_t tx_pn;
108 /* Last received packet number. The first 103 /* Last received packet number. The first
109 * IEEE80211_NUM_TIDS counters are used with Data 104 * IEEE80211_NUM_TIDS counters are used with Data
110 * frames and the last counter is used with Robust 105 * frames and the last counter is used with Robust
@@ -116,7 +111,7 @@ struct ieee80211_key {
116 } gcmp; 111 } gcmp;
117 struct { 112 struct {
118 /* generic cipher scheme */ 113 /* generic cipher scheme */
119 u8 rx_pn[IEEE80211_NUM_TIDS + 1][MAX_PN_LEN]; 114 u8 rx_pn[IEEE80211_NUM_TIDS + 1][IEEE80211_MAX_PN_LEN];
120 } gen; 115 } gen;
121 } u; 116 } u;
122 117
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index e2b836446af3..0505845b7ab8 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -12,96 +12,175 @@
12#include <linux/export.h> 12#include <linux/export.h>
13#include "led.h" 13#include "led.h"
14 14
15#define MAC80211_BLINK_DELAY 50 /* ms */
16
17void ieee80211_led_rx(struct ieee80211_local *local)
18{
19 unsigned long led_delay = MAC80211_BLINK_DELAY;
20 if (unlikely(!local->rx_led))
21 return;
22 led_trigger_blink_oneshot(local->rx_led, &led_delay, &led_delay, 0);
23}
24
25void ieee80211_led_tx(struct ieee80211_local *local)
26{
27 unsigned long led_delay = MAC80211_BLINK_DELAY;
28 if (unlikely(!local->tx_led))
29 return;
30 led_trigger_blink_oneshot(local->tx_led, &led_delay, &led_delay, 0);
31}
32
33void ieee80211_led_assoc(struct ieee80211_local *local, bool associated) 15void ieee80211_led_assoc(struct ieee80211_local *local, bool associated)
34{ 16{
35 if (unlikely(!local->assoc_led)) 17 if (!atomic_read(&local->assoc_led_active))
36 return; 18 return;
37 if (associated) 19 if (associated)
38 led_trigger_event(local->assoc_led, LED_FULL); 20 led_trigger_event(&local->assoc_led, LED_FULL);
39 else 21 else
40 led_trigger_event(local->assoc_led, LED_OFF); 22 led_trigger_event(&local->assoc_led, LED_OFF);
41} 23}
42 24
43void ieee80211_led_radio(struct ieee80211_local *local, bool enabled) 25void ieee80211_led_radio(struct ieee80211_local *local, bool enabled)
44{ 26{
45 if (unlikely(!local->radio_led)) 27 if (!atomic_read(&local->radio_led_active))
46 return; 28 return;
47 if (enabled) 29 if (enabled)
48 led_trigger_event(local->radio_led, LED_FULL); 30 led_trigger_event(&local->radio_led, LED_FULL);
49 else 31 else
50 led_trigger_event(local->radio_led, LED_OFF); 32 led_trigger_event(&local->radio_led, LED_OFF);
33}
34
35void ieee80211_alloc_led_names(struct ieee80211_local *local)
36{
37 local->rx_led.name = kasprintf(GFP_KERNEL, "%srx",
38 wiphy_name(local->hw.wiphy));
39 local->tx_led.name = kasprintf(GFP_KERNEL, "%stx",
40 wiphy_name(local->hw.wiphy));
41 local->assoc_led.name = kasprintf(GFP_KERNEL, "%sassoc",
42 wiphy_name(local->hw.wiphy));
43 local->radio_led.name = kasprintf(GFP_KERNEL, "%sradio",
44 wiphy_name(local->hw.wiphy));
45}
46
47void ieee80211_free_led_names(struct ieee80211_local *local)
48{
49 kfree(local->rx_led.name);
50 kfree(local->tx_led.name);
51 kfree(local->assoc_led.name);
52 kfree(local->radio_led.name);
53}
54
55static void ieee80211_tx_led_activate(struct led_classdev *led_cdev)
56{
57 struct ieee80211_local *local = container_of(led_cdev->trigger,
58 struct ieee80211_local,
59 tx_led);
60
61 atomic_inc(&local->tx_led_active);
62}
63
64static void ieee80211_tx_led_deactivate(struct led_classdev *led_cdev)
65{
66 struct ieee80211_local *local = container_of(led_cdev->trigger,
67 struct ieee80211_local,
68 tx_led);
69
70 atomic_dec(&local->tx_led_active);
71}
72
73static void ieee80211_rx_led_activate(struct led_classdev *led_cdev)
74{
75 struct ieee80211_local *local = container_of(led_cdev->trigger,
76 struct ieee80211_local,
77 rx_led);
78
79 atomic_inc(&local->rx_led_active);
80}
81
82static void ieee80211_rx_led_deactivate(struct led_classdev *led_cdev)
83{
84 struct ieee80211_local *local = container_of(led_cdev->trigger,
85 struct ieee80211_local,
86 rx_led);
87
88 atomic_dec(&local->rx_led_active);
89}
90
91static void ieee80211_assoc_led_activate(struct led_classdev *led_cdev)
92{
93 struct ieee80211_local *local = container_of(led_cdev->trigger,
94 struct ieee80211_local,
95 assoc_led);
96
97 atomic_inc(&local->assoc_led_active);
98}
99
100static void ieee80211_assoc_led_deactivate(struct led_classdev *led_cdev)
101{
102 struct ieee80211_local *local = container_of(led_cdev->trigger,
103 struct ieee80211_local,
104 assoc_led);
105
106 atomic_dec(&local->assoc_led_active);
107}
108
109static void ieee80211_radio_led_activate(struct led_classdev *led_cdev)
110{
111 struct ieee80211_local *local = container_of(led_cdev->trigger,
112 struct ieee80211_local,
113 radio_led);
114
115 atomic_inc(&local->radio_led_active);
116}
117
118static void ieee80211_radio_led_deactivate(struct led_classdev *led_cdev)
119{
120 struct ieee80211_local *local = container_of(led_cdev->trigger,
121 struct ieee80211_local,
122 radio_led);
123
124 atomic_dec(&local->radio_led_active);
125}
126
127static void ieee80211_tpt_led_activate(struct led_classdev *led_cdev)
128{
129 struct ieee80211_local *local = container_of(led_cdev->trigger,
130 struct ieee80211_local,
131 tpt_led);
132
133 atomic_inc(&local->tpt_led_active);
51} 134}
52 135
53void ieee80211_led_names(struct ieee80211_local *local) 136static void ieee80211_tpt_led_deactivate(struct led_classdev *led_cdev)
54{ 137{
55 snprintf(local->rx_led_name, sizeof(local->rx_led_name), 138 struct ieee80211_local *local = container_of(led_cdev->trigger,
56 "%srx", wiphy_name(local->hw.wiphy)); 139 struct ieee80211_local,
57 snprintf(local->tx_led_name, sizeof(local->tx_led_name), 140 tpt_led);
58 "%stx", wiphy_name(local->hw.wiphy)); 141
59 snprintf(local->assoc_led_name, sizeof(local->assoc_led_name), 142 atomic_dec(&local->tpt_led_active);
60 "%sassoc", wiphy_name(local->hw.wiphy));
61 snprintf(local->radio_led_name, sizeof(local->radio_led_name),
62 "%sradio", wiphy_name(local->hw.wiphy));
63} 143}
64 144
65void ieee80211_led_init(struct ieee80211_local *local) 145void ieee80211_led_init(struct ieee80211_local *local)
66{ 146{
67 local->rx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); 147 atomic_set(&local->rx_led_active, 0);
68 if (local->rx_led) { 148 local->rx_led.activate = ieee80211_rx_led_activate;
69 local->rx_led->name = local->rx_led_name; 149 local->rx_led.deactivate = ieee80211_rx_led_deactivate;
70 if (led_trigger_register(local->rx_led)) { 150 if (local->rx_led.name && led_trigger_register(&local->rx_led)) {
71 kfree(local->rx_led); 151 kfree(local->rx_led.name);
72 local->rx_led = NULL; 152 local->rx_led.name = NULL;
73 }
74 } 153 }
75 154
76 local->tx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); 155 atomic_set(&local->tx_led_active, 0);
77 if (local->tx_led) { 156 local->tx_led.activate = ieee80211_tx_led_activate;
78 local->tx_led->name = local->tx_led_name; 157 local->tx_led.deactivate = ieee80211_tx_led_deactivate;
79 if (led_trigger_register(local->tx_led)) { 158 if (local->tx_led.name && led_trigger_register(&local->tx_led)) {
80 kfree(local->tx_led); 159 kfree(local->tx_led.name);
81 local->tx_led = NULL; 160 local->tx_led.name = NULL;
82 }
83 } 161 }
84 162
85 local->assoc_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); 163 atomic_set(&local->assoc_led_active, 0);
86 if (local->assoc_led) { 164 local->assoc_led.activate = ieee80211_assoc_led_activate;
87 local->assoc_led->name = local->assoc_led_name; 165 local->assoc_led.deactivate = ieee80211_assoc_led_deactivate;
88 if (led_trigger_register(local->assoc_led)) { 166 if (local->assoc_led.name && led_trigger_register(&local->assoc_led)) {
89 kfree(local->assoc_led); 167 kfree(local->assoc_led.name);
90 local->assoc_led = NULL; 168 local->assoc_led.name = NULL;
91 }
92 } 169 }
93 170
94 local->radio_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); 171 atomic_set(&local->radio_led_active, 0);
95 if (local->radio_led) { 172 local->radio_led.activate = ieee80211_radio_led_activate;
96 local->radio_led->name = local->radio_led_name; 173 local->radio_led.deactivate = ieee80211_radio_led_deactivate;
97 if (led_trigger_register(local->radio_led)) { 174 if (local->radio_led.name && led_trigger_register(&local->radio_led)) {
98 kfree(local->radio_led); 175 kfree(local->radio_led.name);
99 local->radio_led = NULL; 176 local->radio_led.name = NULL;
100 }
101 } 177 }
102 178
179 atomic_set(&local->tpt_led_active, 0);
103 if (local->tpt_led_trigger) { 180 if (local->tpt_led_trigger) {
104 if (led_trigger_register(&local->tpt_led_trigger->trig)) { 181 local->tpt_led.activate = ieee80211_tpt_led_activate;
182 local->tpt_led.deactivate = ieee80211_tpt_led_deactivate;
183 if (led_trigger_register(&local->tpt_led)) {
105 kfree(local->tpt_led_trigger); 184 kfree(local->tpt_led_trigger);
106 local->tpt_led_trigger = NULL; 185 local->tpt_led_trigger = NULL;
107 } 186 }
@@ -110,58 +189,50 @@ void ieee80211_led_init(struct ieee80211_local *local)
110 189
111void ieee80211_led_exit(struct ieee80211_local *local) 190void ieee80211_led_exit(struct ieee80211_local *local)
112{ 191{
113 if (local->radio_led) { 192 if (local->radio_led.name)
114 led_trigger_unregister(local->radio_led); 193 led_trigger_unregister(&local->radio_led);
115 kfree(local->radio_led); 194 if (local->assoc_led.name)
116 } 195 led_trigger_unregister(&local->assoc_led);
117 if (local->assoc_led) { 196 if (local->tx_led.name)
118 led_trigger_unregister(local->assoc_led); 197 led_trigger_unregister(&local->tx_led);
119 kfree(local->assoc_led); 198 if (local->rx_led.name)
120 } 199 led_trigger_unregister(&local->rx_led);
121 if (local->tx_led) {
122 led_trigger_unregister(local->tx_led);
123 kfree(local->tx_led);
124 }
125 if (local->rx_led) {
126 led_trigger_unregister(local->rx_led);
127 kfree(local->rx_led);
128 }
129 200
130 if (local->tpt_led_trigger) { 201 if (local->tpt_led_trigger) {
131 led_trigger_unregister(&local->tpt_led_trigger->trig); 202 led_trigger_unregister(&local->tpt_led);
132 kfree(local->tpt_led_trigger); 203 kfree(local->tpt_led_trigger);
133 } 204 }
134} 205}
135 206
136char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw) 207const char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw)
137{ 208{
138 struct ieee80211_local *local = hw_to_local(hw); 209 struct ieee80211_local *local = hw_to_local(hw);
139 210
140 return local->radio_led_name; 211 return local->radio_led.name;
141} 212}
142EXPORT_SYMBOL(__ieee80211_get_radio_led_name); 213EXPORT_SYMBOL(__ieee80211_get_radio_led_name);
143 214
144char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) 215const char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw)
145{ 216{
146 struct ieee80211_local *local = hw_to_local(hw); 217 struct ieee80211_local *local = hw_to_local(hw);
147 218
148 return local->assoc_led_name; 219 return local->assoc_led.name;
149} 220}
150EXPORT_SYMBOL(__ieee80211_get_assoc_led_name); 221EXPORT_SYMBOL(__ieee80211_get_assoc_led_name);
151 222
152char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw) 223const char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw)
153{ 224{
154 struct ieee80211_local *local = hw_to_local(hw); 225 struct ieee80211_local *local = hw_to_local(hw);
155 226
156 return local->tx_led_name; 227 return local->tx_led.name;
157} 228}
158EXPORT_SYMBOL(__ieee80211_get_tx_led_name); 229EXPORT_SYMBOL(__ieee80211_get_tx_led_name);
159 230
160char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw) 231const char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw)
161{ 232{
162 struct ieee80211_local *local = hw_to_local(hw); 233 struct ieee80211_local *local = hw_to_local(hw);
163 234
164 return local->rx_led_name; 235 return local->rx_led.name;
165} 236}
166EXPORT_SYMBOL(__ieee80211_get_rx_led_name); 237EXPORT_SYMBOL(__ieee80211_get_rx_led_name);
167 238
@@ -205,16 +276,17 @@ static void tpt_trig_timer(unsigned long data)
205 } 276 }
206 } 277 }
207 278
208 read_lock(&tpt_trig->trig.leddev_list_lock); 279 read_lock(&local->tpt_led.leddev_list_lock);
209 list_for_each_entry(led_cdev, &tpt_trig->trig.led_cdevs, trig_list) 280 list_for_each_entry(led_cdev, &local->tpt_led.led_cdevs, trig_list)
210 led_blink_set(led_cdev, &on, &off); 281 led_blink_set(led_cdev, &on, &off);
211 read_unlock(&tpt_trig->trig.leddev_list_lock); 282 read_unlock(&local->tpt_led.leddev_list_lock);
212} 283}
213 284
214char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, 285const char *
215 unsigned int flags, 286__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw,
216 const struct ieee80211_tpt_blink *blink_table, 287 unsigned int flags,
217 unsigned int blink_table_len) 288 const struct ieee80211_tpt_blink *blink_table,
289 unsigned int blink_table_len)
218{ 290{
219 struct ieee80211_local *local = hw_to_local(hw); 291 struct ieee80211_local *local = hw_to_local(hw);
220 struct tpt_led_trigger *tpt_trig; 292 struct tpt_led_trigger *tpt_trig;
@@ -229,7 +301,7 @@ char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw,
229 snprintf(tpt_trig->name, sizeof(tpt_trig->name), 301 snprintf(tpt_trig->name, sizeof(tpt_trig->name),
230 "%stpt", wiphy_name(local->hw.wiphy)); 302 "%stpt", wiphy_name(local->hw.wiphy));
231 303
232 tpt_trig->trig.name = tpt_trig->name; 304 local->tpt_led.name = tpt_trig->name;
233 305
234 tpt_trig->blink_table = blink_table; 306 tpt_trig->blink_table = blink_table;
235 tpt_trig->blink_table_len = blink_table_len; 307 tpt_trig->blink_table_len = blink_table_len;
@@ -269,10 +341,10 @@ static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local)
269 tpt_trig->running = false; 341 tpt_trig->running = false;
270 del_timer_sync(&tpt_trig->timer); 342 del_timer_sync(&tpt_trig->timer);
271 343
272 read_lock(&tpt_trig->trig.leddev_list_lock); 344 read_lock(&local->tpt_led.leddev_list_lock);
273 list_for_each_entry(led_cdev, &tpt_trig->trig.led_cdevs, trig_list) 345 list_for_each_entry(led_cdev, &local->tpt_led.led_cdevs, trig_list)
274 led_set_brightness(led_cdev, LED_OFF); 346 led_set_brightness(led_cdev, LED_OFF);
275 read_unlock(&tpt_trig->trig.leddev_list_lock); 347 read_unlock(&local->tpt_led.leddev_list_lock);
276} 348}
277 349
278void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, 350void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local,
diff --git a/net/mac80211/led.h b/net/mac80211/led.h
index 89f4344f13b9..a7893a1ac98b 100644
--- a/net/mac80211/led.h
+++ b/net/mac80211/led.h
@@ -11,25 +11,42 @@
11#include <linux/leds.h> 11#include <linux/leds.h>
12#include "ieee80211_i.h" 12#include "ieee80211_i.h"
13 13
14#define MAC80211_BLINK_DELAY 50 /* ms */
15
16static inline void ieee80211_led_rx(struct ieee80211_local *local)
17{
18#ifdef CONFIG_MAC80211_LEDS
19 unsigned long led_delay = MAC80211_BLINK_DELAY;
20
21 if (!atomic_read(&local->rx_led_active))
22 return;
23 led_trigger_blink_oneshot(&local->rx_led, &led_delay, &led_delay, 0);
24#endif
25}
26
27static inline void ieee80211_led_tx(struct ieee80211_local *local)
28{
29#ifdef CONFIG_MAC80211_LEDS
30 unsigned long led_delay = MAC80211_BLINK_DELAY;
31
32 if (!atomic_read(&local->tx_led_active))
33 return;
34 led_trigger_blink_oneshot(&local->tx_led, &led_delay, &led_delay, 0);
35#endif
36}
37
14#ifdef CONFIG_MAC80211_LEDS 38#ifdef CONFIG_MAC80211_LEDS
15void ieee80211_led_rx(struct ieee80211_local *local);
16void ieee80211_led_tx(struct ieee80211_local *local);
17void ieee80211_led_assoc(struct ieee80211_local *local, 39void ieee80211_led_assoc(struct ieee80211_local *local,
18 bool associated); 40 bool associated);
19void ieee80211_led_radio(struct ieee80211_local *local, 41void ieee80211_led_radio(struct ieee80211_local *local,
20 bool enabled); 42 bool enabled);
21void ieee80211_led_names(struct ieee80211_local *local); 43void ieee80211_alloc_led_names(struct ieee80211_local *local);
44void ieee80211_free_led_names(struct ieee80211_local *local);
22void ieee80211_led_init(struct ieee80211_local *local); 45void ieee80211_led_init(struct ieee80211_local *local);
23void ieee80211_led_exit(struct ieee80211_local *local); 46void ieee80211_led_exit(struct ieee80211_local *local);
24void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, 47void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local,
25 unsigned int types_on, unsigned int types_off); 48 unsigned int types_on, unsigned int types_off);
26#else 49#else
27static inline void ieee80211_led_rx(struct ieee80211_local *local)
28{
29}
30static inline void ieee80211_led_tx(struct ieee80211_local *local)
31{
32}
33static inline void ieee80211_led_assoc(struct ieee80211_local *local, 50static inline void ieee80211_led_assoc(struct ieee80211_local *local,
34 bool associated) 51 bool associated)
35{ 52{
@@ -38,7 +55,10 @@ static inline void ieee80211_led_radio(struct ieee80211_local *local,
38 bool enabled) 55 bool enabled)
39{ 56{
40} 57}
41static inline void ieee80211_led_names(struct ieee80211_local *local) 58static inline void ieee80211_alloc_led_names(struct ieee80211_local *local)
59{
60}
61static inline void ieee80211_free_led_names(struct ieee80211_local *local)
42{ 62{
43} 63}
44static inline void ieee80211_led_init(struct ieee80211_local *local) 64static inline void ieee80211_led_init(struct ieee80211_local *local)
@@ -58,7 +78,7 @@ static inline void
58ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, __le16 fc, int bytes) 78ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, __le16 fc, int bytes)
59{ 79{
60#ifdef CONFIG_MAC80211_LEDS 80#ifdef CONFIG_MAC80211_LEDS
61 if (local->tpt_led_trigger && ieee80211_is_data(fc)) 81 if (ieee80211_is_data(fc) && atomic_read(&local->tpt_led_active))
62 local->tpt_led_trigger->tx_bytes += bytes; 82 local->tpt_led_trigger->tx_bytes += bytes;
63#endif 83#endif
64} 84}
@@ -67,7 +87,7 @@ static inline void
67ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, __le16 fc, int bytes) 87ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, __le16 fc, int bytes)
68{ 88{
69#ifdef CONFIG_MAC80211_LEDS 89#ifdef CONFIG_MAC80211_LEDS
70 if (local->tpt_led_trigger && ieee80211_is_data(fc)) 90 if (ieee80211_is_data(fc) && atomic_read(&local->tpt_led_active))
71 local->tpt_led_trigger->rx_bytes += bytes; 91 local->tpt_led_trigger->rx_bytes += bytes;
72#endif 92#endif
73} 93}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index df3051d96aff..3c63468b4dfb 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -41,9 +41,6 @@ void ieee80211_configure_filter(struct ieee80211_local *local)
41 unsigned int changed_flags; 41 unsigned int changed_flags;
42 unsigned int new_flags = 0; 42 unsigned int new_flags = 0;
43 43
44 if (atomic_read(&local->iff_promiscs))
45 new_flags |= FIF_PROMISC_IN_BSS;
46
47 if (atomic_read(&local->iff_allmultis)) 44 if (atomic_read(&local->iff_allmultis))
48 new_flags |= FIF_ALLMULTI; 45 new_flags |= FIF_ALLMULTI;
49 46
@@ -249,6 +246,7 @@ static void ieee80211_restart_work(struct work_struct *work)
249{ 246{
250 struct ieee80211_local *local = 247 struct ieee80211_local *local =
251 container_of(work, struct ieee80211_local, restart_work); 248 container_of(work, struct ieee80211_local, restart_work);
249 struct ieee80211_sub_if_data *sdata;
252 250
253 /* wait for scan work complete */ 251 /* wait for scan work complete */
254 flush_workqueue(local->workqueue); 252 flush_workqueue(local->workqueue);
@@ -257,6 +255,8 @@ static void ieee80211_restart_work(struct work_struct *work)
257 "%s called with hardware scan in progress\n", __func__); 255 "%s called with hardware scan in progress\n", __func__);
258 256
259 rtnl_lock(); 257 rtnl_lock();
258 list_for_each_entry(sdata, &local->interfaces, list)
259 flush_delayed_work(&sdata->dec_tailroom_needed_wk);
260 ieee80211_scan_cancel(local); 260 ieee80211_scan_cancel(local);
261 ieee80211_reconfig(local); 261 ieee80211_reconfig(local);
262 rtnl_unlock(); 262 rtnl_unlock();
@@ -646,7 +646,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
646 skb_queue_head_init(&local->skb_queue); 646 skb_queue_head_init(&local->skb_queue);
647 skb_queue_head_init(&local->skb_queue_unreliable); 647 skb_queue_head_init(&local->skb_queue_unreliable);
648 648
649 ieee80211_led_names(local); 649 ieee80211_alloc_led_names(local);
650 650
651 ieee80211_roc_setup(local); 651 ieee80211_roc_setup(local);
652 652
@@ -661,7 +661,7 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
661{ 661{
662 bool have_wep = !(IS_ERR(local->wep_tx_tfm) || 662 bool have_wep = !(IS_ERR(local->wep_tx_tfm) ||
663 IS_ERR(local->wep_rx_tfm)); 663 IS_ERR(local->wep_rx_tfm));
664 bool have_mfp = local->hw.flags & IEEE80211_HW_MFP_CAPABLE; 664 bool have_mfp = ieee80211_hw_check(&local->hw, MFP_CAPABLE);
665 int n_suites = 0, r = 0, w = 0; 665 int n_suites = 0, r = 0, w = 0;
666 u32 *suites; 666 u32 *suites;
667 static const u32 cipher_suites[] = { 667 static const u32 cipher_suites[] = {
@@ -681,7 +681,7 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
681 WLAN_CIPHER_SUITE_BIP_GMAC_256, 681 WLAN_CIPHER_SUITE_BIP_GMAC_256,
682 }; 682 };
683 683
684 if (local->hw.flags & IEEE80211_HW_SW_CRYPTO_CONTROL || 684 if (ieee80211_hw_check(&local->hw, SW_CRYPTO_CONTROL) ||
685 local->hw.wiphy->cipher_suites) { 685 local->hw.wiphy->cipher_suites) {
686 /* If the driver advertises, or doesn't support SW crypto, 686 /* If the driver advertises, or doesn't support SW crypto,
687 * we only need to remove WEP if necessary. 687 * we only need to remove WEP if necessary.
@@ -771,8 +771,13 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
771 suites[w++] = WLAN_CIPHER_SUITE_BIP_GMAC_256; 771 suites[w++] = WLAN_CIPHER_SUITE_BIP_GMAC_256;
772 } 772 }
773 773
774 for (r = 0; r < local->hw.n_cipher_schemes; r++) 774 for (r = 0; r < local->hw.n_cipher_schemes; r++) {
775 suites[w++] = cs[r].cipher; 775 suites[w++] = cs[r].cipher;
776 if (WARN_ON(cs[r].pn_len > IEEE80211_MAX_PN_LEN)) {
777 kfree(suites);
778 return -EINVAL;
779 }
780 }
776 } 781 }
777 782
778 local->hw.wiphy->cipher_suites = suites; 783 local->hw.wiphy->cipher_suites = suites;
@@ -792,7 +797,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
792 netdev_features_t feature_whitelist; 797 netdev_features_t feature_whitelist;
793 struct cfg80211_chan_def dflt_chandef = {}; 798 struct cfg80211_chan_def dflt_chandef = {};
794 799
795 if (hw->flags & IEEE80211_HW_QUEUE_CONTROL && 800 if (ieee80211_hw_check(hw, QUEUE_CONTROL) &&
796 (local->hw.offchannel_tx_hw_queue == IEEE80211_INVAL_HW_QUEUE || 801 (local->hw.offchannel_tx_hw_queue == IEEE80211_INVAL_HW_QUEUE ||
797 local->hw.offchannel_tx_hw_queue >= local->hw.queues)) 802 local->hw.offchannel_tx_hw_queue >= local->hw.queues))
798 return -EINVAL; 803 return -EINVAL;
@@ -840,7 +845,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
840 845
841 /* Only HW csum features are currently compatible with mac80211 */ 846 /* Only HW csum features are currently compatible with mac80211 */
842 feature_whitelist = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 847 feature_whitelist = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
843 NETIF_F_HW_CSUM; 848 NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA |
849 NETIF_F_GSO_SOFTWARE;
844 if (WARN_ON(hw->netdev_features & ~feature_whitelist)) 850 if (WARN_ON(hw->netdev_features & ~feature_whitelist))
845 return -EINVAL; 851 return -EINVAL;
846 852
@@ -939,9 +945,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
939 /* mac80211 supports control port protocol changing */ 945 /* mac80211 supports control port protocol changing */
940 local->hw.wiphy->flags |= WIPHY_FLAG_CONTROL_PORT_PROTOCOL; 946 local->hw.wiphy->flags |= WIPHY_FLAG_CONTROL_PORT_PROTOCOL;
941 947
942 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { 948 if (ieee80211_hw_check(&local->hw, SIGNAL_DBM)) {
943 local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM; 949 local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM;
944 } else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) { 950 } else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC)) {
945 local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC; 951 local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC;
946 if (hw->max_signal <= 0) { 952 if (hw->max_signal <= 0) {
947 result = -EINVAL; 953 result = -EINVAL;
@@ -995,7 +1001,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
995 local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP; 1001 local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP;
996 1002
997 /* mac80211 supports eCSA, if the driver supports STA CSA at all */ 1003 /* mac80211 supports eCSA, if the driver supports STA CSA at all */
998 if (local->hw.flags & IEEE80211_HW_CHANCTX_STA_CSA) 1004 if (ieee80211_hw_check(&local->hw, CHANCTX_STA_CSA))
999 local->ext_capa[0] |= WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING; 1005 local->ext_capa[0] |= WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING;
1000 1006
1001 local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM; 1007 local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM;
@@ -1063,7 +1069,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
1063 1069
1064 /* add one default STA interface if supported */ 1070 /* add one default STA interface if supported */
1065 if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION) && 1071 if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION) &&
1066 !(hw->flags & IEEE80211_HW_NO_AUTO_VIF)) { 1072 !ieee80211_hw_check(hw, NO_AUTO_VIF)) {
1067 result = ieee80211_if_add(local, "wlan%d", NET_NAME_ENUM, NULL, 1073 result = ieee80211_if_add(local, "wlan%d", NET_NAME_ENUM, NULL,
1068 NL80211_IFTYPE_STATION, NULL); 1074 NL80211_IFTYPE_STATION, NULL);
1069 if (result) 1075 if (result)
@@ -1209,6 +1215,8 @@ void ieee80211_free_hw(struct ieee80211_hw *hw)
1209 1215
1210 sta_info_stop(local); 1216 sta_info_stop(local);
1211 1217
1218 ieee80211_free_led_names(local);
1219
1212 wiphy_free(local->hw.wiphy); 1220 wiphy_free(local->hw.wiphy);
1213} 1221}
1214EXPORT_SYMBOL(ieee80211_free_hw); 1222EXPORT_SYMBOL(ieee80211_free_hw);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index d4684242e78b..817098add1d6 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -680,6 +680,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
680 *pos++ = 0x0; 680 *pos++ = 0x0;
681 *pos++ = ieee80211_frequency_to_channel( 681 *pos++ = ieee80211_frequency_to_channel(
682 csa->settings.chandef.chan->center_freq); 682 csa->settings.chandef.chan->center_freq);
683 bcn->csa_current_counter = csa->settings.count;
683 bcn->csa_counter_offsets[0] = hdr_len + 6; 684 bcn->csa_counter_offsets[0] = hdr_len + 6;
684 *pos++ = csa->settings.count; 685 *pos++ = csa->settings.count;
685 *pos++ = WLAN_EID_CHAN_SWITCH_PARAM; 686 *pos++ = WLAN_EID_CHAN_SWITCH_PARAM;
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 214e63b84e5c..085edc1d056b 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -510,14 +510,14 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
510 510
511static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, 511static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
512 struct ieee80211_mgmt *mgmt, 512 struct ieee80211_mgmt *mgmt,
513 const u8 *preq_elem, u32 metric) 513 const u8 *preq_elem, u32 orig_metric)
514{ 514{
515 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 515 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
516 struct mesh_path *mpath = NULL; 516 struct mesh_path *mpath = NULL;
517 const u8 *target_addr, *orig_addr; 517 const u8 *target_addr, *orig_addr;
518 const u8 *da; 518 const u8 *da;
519 u8 target_flags, ttl, flags; 519 u8 target_flags, ttl, flags;
520 u32 orig_sn, target_sn, lifetime, orig_metric; 520 u32 orig_sn, target_sn, lifetime, target_metric;
521 bool reply = false; 521 bool reply = false;
522 bool forward = true; 522 bool forward = true;
523 bool root_is_gate; 523 bool root_is_gate;
@@ -528,7 +528,6 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
528 target_sn = PREQ_IE_TARGET_SN(preq_elem); 528 target_sn = PREQ_IE_TARGET_SN(preq_elem);
529 orig_sn = PREQ_IE_ORIG_SN(preq_elem); 529 orig_sn = PREQ_IE_ORIG_SN(preq_elem);
530 target_flags = PREQ_IE_TARGET_F(preq_elem); 530 target_flags = PREQ_IE_TARGET_F(preq_elem);
531 orig_metric = metric;
532 /* Proactive PREQ gate announcements */ 531 /* Proactive PREQ gate announcements */
533 flags = PREQ_IE_FLAGS(preq_elem); 532 flags = PREQ_IE_FLAGS(preq_elem);
534 root_is_gate = !!(flags & RANN_FLAG_IS_GATE); 533 root_is_gate = !!(flags & RANN_FLAG_IS_GATE);
@@ -539,7 +538,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
539 mhwmp_dbg(sdata, "PREQ is for us\n"); 538 mhwmp_dbg(sdata, "PREQ is for us\n");
540 forward = false; 539 forward = false;
541 reply = true; 540 reply = true;
542 metric = 0; 541 target_metric = 0;
543 if (time_after(jiffies, ifmsh->last_sn_update + 542 if (time_after(jiffies, ifmsh->last_sn_update +
544 net_traversal_jiffies(sdata)) || 543 net_traversal_jiffies(sdata)) ||
545 time_before(jiffies, ifmsh->last_sn_update)) { 544 time_before(jiffies, ifmsh->last_sn_update)) {
@@ -556,7 +555,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
556 reply = true; 555 reply = true;
557 target_addr = sdata->vif.addr; 556 target_addr = sdata->vif.addr;
558 target_sn = ++ifmsh->sn; 557 target_sn = ++ifmsh->sn;
559 metric = 0; 558 target_metric = 0;
560 ifmsh->last_sn_update = jiffies; 559 ifmsh->last_sn_update = jiffies;
561 } 560 }
562 if (root_is_gate) 561 if (root_is_gate)
@@ -574,7 +573,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
574 } else if ((!(target_flags & MP_F_DO)) && 573 } else if ((!(target_flags & MP_F_DO)) &&
575 (mpath->flags & MESH_PATH_ACTIVE)) { 574 (mpath->flags & MESH_PATH_ACTIVE)) {
576 reply = true; 575 reply = true;
577 metric = mpath->metric; 576 target_metric = mpath->metric;
578 target_sn = mpath->sn; 577 target_sn = mpath->sn;
579 if (target_flags & MP_F_RF) 578 if (target_flags & MP_F_RF)
580 target_flags |= MP_F_DO; 579 target_flags |= MP_F_DO;
@@ -593,7 +592,8 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
593 mesh_path_sel_frame_tx(MPATH_PREP, 0, orig_addr, 592 mesh_path_sel_frame_tx(MPATH_PREP, 0, orig_addr,
594 orig_sn, 0, target_addr, 593 orig_sn, 0, target_addr,
595 target_sn, mgmt->sa, 0, ttl, 594 target_sn, mgmt->sa, 0, ttl,
596 lifetime, metric, 0, sdata); 595 lifetime, target_metric, 0,
596 sdata);
597 } else { 597 } else {
598 ifmsh->mshstats.dropped_frames_ttl++; 598 ifmsh->mshstats.dropped_frames_ttl++;
599 } 599 }
@@ -619,13 +619,12 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
619 if (flags & IEEE80211_PREQ_PROACTIVE_PREP_FLAG) { 619 if (flags & IEEE80211_PREQ_PROACTIVE_PREP_FLAG) {
620 target_addr = PREQ_IE_TARGET_ADDR(preq_elem); 620 target_addr = PREQ_IE_TARGET_ADDR(preq_elem);
621 target_sn = PREQ_IE_TARGET_SN(preq_elem); 621 target_sn = PREQ_IE_TARGET_SN(preq_elem);
622 metric = orig_metric;
623 } 622 }
624 623
625 mesh_path_sel_frame_tx(MPATH_PREQ, flags, orig_addr, 624 mesh_path_sel_frame_tx(MPATH_PREQ, flags, orig_addr,
626 orig_sn, target_flags, target_addr, 625 orig_sn, target_flags, target_addr,
627 target_sn, da, hopcount, ttl, lifetime, 626 target_sn, da, hopcount, ttl, lifetime,
628 metric, preq_id, sdata); 627 orig_metric, preq_id, sdata);
629 if (!is_multicast_ether_addr(da)) 628 if (!is_multicast_ether_addr(da))
630 ifmsh->mshstats.fwded_unicast++; 629 ifmsh->mshstats.fwded_unicast++;
631 else 630 else
@@ -854,7 +853,7 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
854{ 853{
855 struct ieee802_11_elems elems; 854 struct ieee802_11_elems elems;
856 size_t baselen; 855 size_t baselen;
857 u32 last_hop_metric; 856 u32 path_metric;
858 struct sta_info *sta; 857 struct sta_info *sta;
859 858
860 /* need action_code */ 859 /* need action_code */
@@ -877,21 +876,21 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
877 if (elems.preq_len != 37) 876 if (elems.preq_len != 37)
878 /* Right now we support just 1 destination and no AE */ 877 /* Right now we support just 1 destination and no AE */
879 return; 878 return;
880 last_hop_metric = hwmp_route_info_get(sdata, mgmt, elems.preq, 879 path_metric = hwmp_route_info_get(sdata, mgmt, elems.preq,
881 MPATH_PREQ); 880 MPATH_PREQ);
882 if (last_hop_metric) 881 if (path_metric)
883 hwmp_preq_frame_process(sdata, mgmt, elems.preq, 882 hwmp_preq_frame_process(sdata, mgmt, elems.preq,
884 last_hop_metric); 883 path_metric);
885 } 884 }
886 if (elems.prep) { 885 if (elems.prep) {
887 if (elems.prep_len != 31) 886 if (elems.prep_len != 31)
888 /* Right now we support no AE */ 887 /* Right now we support no AE */
889 return; 888 return;
890 last_hop_metric = hwmp_route_info_get(sdata, mgmt, elems.prep, 889 path_metric = hwmp_route_info_get(sdata, mgmt, elems.prep,
891 MPATH_PREP); 890 MPATH_PREP);
892 if (last_hop_metric) 891 if (path_metric)
893 hwmp_prep_frame_process(sdata, mgmt, elems.prep, 892 hwmp_prep_frame_process(sdata, mgmt, elems.prep,
894 last_hop_metric); 893 path_metric);
895 } 894 }
896 if (elems.perr) { 895 if (elems.perr) {
897 if (elems.perr_len != 15) 896 if (elems.perr_len != 15)
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 60d737f144e3..5438d13e2f00 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -72,10 +72,11 @@ static bool rssi_threshold_check(struct ieee80211_sub_if_data *sdata,
72 * 72 *
73 * @sta: mesh peer link to restart 73 * @sta: mesh peer link to restart
74 * 74 *
75 * Locking: this function must be called holding sta->lock 75 * Locking: this function must be called holding sta->plink_lock
76 */ 76 */
77static inline void mesh_plink_fsm_restart(struct sta_info *sta) 77static inline void mesh_plink_fsm_restart(struct sta_info *sta)
78{ 78{
79 lockdep_assert_held(&sta->plink_lock);
79 sta->plink_state = NL80211_PLINK_LISTEN; 80 sta->plink_state = NL80211_PLINK_LISTEN;
80 sta->llid = sta->plid = sta->reason = 0; 81 sta->llid = sta->plid = sta->reason = 0;
81 sta->plink_retries = 0; 82 sta->plink_retries = 0;
@@ -105,9 +106,7 @@ static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata)
105 /* (IEEE 802.11-2012 19.4.5) */ 106 /* (IEEE 802.11-2012 19.4.5) */
106 short_slot = true; 107 short_slot = true;
107 goto out; 108 goto out;
108 } else if (band != IEEE80211_BAND_2GHZ || 109 } else if (band != IEEE80211_BAND_2GHZ)
109 (band == IEEE80211_BAND_2GHZ &&
110 local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
111 goto out; 110 goto out;
112 111
113 for (i = 0; i < sband->n_bitrates; i++) 112 for (i = 0; i < sband->n_bitrates; i++)
@@ -213,13 +212,15 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
213 * All mesh paths with this peer as next hop will be flushed 212 * All mesh paths with this peer as next hop will be flushed
214 * Returns beacon changed flag if the beacon content changed. 213 * Returns beacon changed flag if the beacon content changed.
215 * 214 *
216 * Locking: the caller must hold sta->lock 215 * Locking: the caller must hold sta->plink_lock
217 */ 216 */
218static u32 __mesh_plink_deactivate(struct sta_info *sta) 217static u32 __mesh_plink_deactivate(struct sta_info *sta)
219{ 218{
220 struct ieee80211_sub_if_data *sdata = sta->sdata; 219 struct ieee80211_sub_if_data *sdata = sta->sdata;
221 u32 changed = 0; 220 u32 changed = 0;
222 221
222 lockdep_assert_held(&sta->plink_lock);
223
223 if (sta->plink_state == NL80211_PLINK_ESTAB) 224 if (sta->plink_state == NL80211_PLINK_ESTAB)
224 changed = mesh_plink_dec_estab_count(sdata); 225 changed = mesh_plink_dec_estab_count(sdata);
225 sta->plink_state = NL80211_PLINK_BLOCKED; 226 sta->plink_state = NL80211_PLINK_BLOCKED;
@@ -244,13 +245,13 @@ u32 mesh_plink_deactivate(struct sta_info *sta)
244 struct ieee80211_sub_if_data *sdata = sta->sdata; 245 struct ieee80211_sub_if_data *sdata = sta->sdata;
245 u32 changed; 246 u32 changed;
246 247
247 spin_lock_bh(&sta->lock); 248 spin_lock_bh(&sta->plink_lock);
248 changed = __mesh_plink_deactivate(sta); 249 changed = __mesh_plink_deactivate(sta);
249 sta->reason = WLAN_REASON_MESH_PEER_CANCELED; 250 sta->reason = WLAN_REASON_MESH_PEER_CANCELED;
250 mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, 251 mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE,
251 sta->sta.addr, sta->llid, sta->plid, 252 sta->sta.addr, sta->llid, sta->plid,
252 sta->reason); 253 sta->reason);
253 spin_unlock_bh(&sta->lock); 254 spin_unlock_bh(&sta->plink_lock);
254 255
255 return changed; 256 return changed;
256} 257}
@@ -387,12 +388,13 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
387 sband = local->hw.wiphy->bands[band]; 388 sband = local->hw.wiphy->bands[band];
388 rates = ieee80211_sta_get_rates(sdata, elems, band, &basic_rates); 389 rates = ieee80211_sta_get_rates(sdata, elems, band, &basic_rates);
389 390
390 spin_lock_bh(&sta->lock); 391 spin_lock_bh(&sta->plink_lock);
391 sta->last_rx = jiffies; 392 sta->last_rx = jiffies;
392 393
393 /* rates and capabilities don't change during peering */ 394 /* rates and capabilities don't change during peering */
394 if (sta->plink_state == NL80211_PLINK_ESTAB) 395 if (sta->plink_state == NL80211_PLINK_ESTAB && sta->processed_beacon)
395 goto out; 396 goto out;
397 sta->processed_beacon = true;
396 398
397 if (sta->sta.supp_rates[band] != rates) 399 if (sta->sta.supp_rates[band] != rates)
398 changed |= IEEE80211_RC_SUPP_RATES_CHANGED; 400 changed |= IEEE80211_RC_SUPP_RATES_CHANGED;
@@ -419,7 +421,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
419 else 421 else
420 rate_control_rate_update(local, sband, sta, changed); 422 rate_control_rate_update(local, sband, sta, changed);
421out: 423out:
422 spin_unlock_bh(&sta->lock); 424 spin_unlock_bh(&sta->plink_lock);
423} 425}
424 426
425static struct sta_info * 427static struct sta_info *
@@ -552,7 +554,7 @@ static void mesh_plink_timer(unsigned long data)
552 if (sta->sdata->local->quiescing) 554 if (sta->sdata->local->quiescing)
553 return; 555 return;
554 556
555 spin_lock_bh(&sta->lock); 557 spin_lock_bh(&sta->plink_lock);
556 558
557 /* If a timer fires just before a state transition on another CPU, 559 /* If a timer fires just before a state transition on another CPU,
558 * we may have already extended the timeout and changed state by the 560 * we may have already extended the timeout and changed state by the
@@ -563,7 +565,7 @@ static void mesh_plink_timer(unsigned long data)
563 mpl_dbg(sta->sdata, 565 mpl_dbg(sta->sdata,
564 "Ignoring timer for %pM in state %s (timer adjusted)", 566 "Ignoring timer for %pM in state %s (timer adjusted)",
565 sta->sta.addr, mplstates[sta->plink_state]); 567 sta->sta.addr, mplstates[sta->plink_state]);
566 spin_unlock_bh(&sta->lock); 568 spin_unlock_bh(&sta->plink_lock);
567 return; 569 return;
568 } 570 }
569 571
@@ -573,7 +575,7 @@ static void mesh_plink_timer(unsigned long data)
573 mpl_dbg(sta->sdata, 575 mpl_dbg(sta->sdata,
574 "Ignoring timer for %pM in state %s (timer deleted)", 576 "Ignoring timer for %pM in state %s (timer deleted)",
575 sta->sta.addr, mplstates[sta->plink_state]); 577 sta->sta.addr, mplstates[sta->plink_state]);
576 spin_unlock_bh(&sta->lock); 578 spin_unlock_bh(&sta->plink_lock);
577 return; 579 return;
578 } 580 }
579 581
@@ -619,7 +621,7 @@ static void mesh_plink_timer(unsigned long data)
619 default: 621 default:
620 break; 622 break;
621 } 623 }
622 spin_unlock_bh(&sta->lock); 624 spin_unlock_bh(&sta->plink_lock);
623 if (action) 625 if (action)
624 mesh_plink_frame_tx(sdata, action, sta->sta.addr, 626 mesh_plink_frame_tx(sdata, action, sta->sta.addr,
625 sta->llid, sta->plid, reason); 627 sta->llid, sta->plid, reason);
@@ -674,16 +676,16 @@ u32 mesh_plink_open(struct sta_info *sta)
674 if (!test_sta_flag(sta, WLAN_STA_AUTH)) 676 if (!test_sta_flag(sta, WLAN_STA_AUTH))
675 return 0; 677 return 0;
676 678
677 spin_lock_bh(&sta->lock); 679 spin_lock_bh(&sta->plink_lock);
678 sta->llid = mesh_get_new_llid(sdata); 680 sta->llid = mesh_get_new_llid(sdata);
679 if (sta->plink_state != NL80211_PLINK_LISTEN && 681 if (sta->plink_state != NL80211_PLINK_LISTEN &&
680 sta->plink_state != NL80211_PLINK_BLOCKED) { 682 sta->plink_state != NL80211_PLINK_BLOCKED) {
681 spin_unlock_bh(&sta->lock); 683 spin_unlock_bh(&sta->plink_lock);
682 return 0; 684 return 0;
683 } 685 }
684 sta->plink_state = NL80211_PLINK_OPN_SNT; 686 sta->plink_state = NL80211_PLINK_OPN_SNT;
685 mesh_plink_timer_set(sta, sdata->u.mesh.mshcfg.dot11MeshRetryTimeout); 687 mesh_plink_timer_set(sta, sdata->u.mesh.mshcfg.dot11MeshRetryTimeout);
686 spin_unlock_bh(&sta->lock); 688 spin_unlock_bh(&sta->plink_lock);
687 mpl_dbg(sdata, 689 mpl_dbg(sdata,
688 "Mesh plink: starting establishment with %pM\n", 690 "Mesh plink: starting establishment with %pM\n",
689 sta->sta.addr); 691 sta->sta.addr);
@@ -700,10 +702,10 @@ u32 mesh_plink_block(struct sta_info *sta)
700{ 702{
701 u32 changed; 703 u32 changed;
702 704
703 spin_lock_bh(&sta->lock); 705 spin_lock_bh(&sta->plink_lock);
704 changed = __mesh_plink_deactivate(sta); 706 changed = __mesh_plink_deactivate(sta);
705 sta->plink_state = NL80211_PLINK_BLOCKED; 707 sta->plink_state = NL80211_PLINK_BLOCKED;
706 spin_unlock_bh(&sta->lock); 708 spin_unlock_bh(&sta->plink_lock);
707 709
708 return changed; 710 return changed;
709} 711}
@@ -758,7 +760,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata,
758 mpl_dbg(sdata, "peer %pM in state %s got event %s\n", sta->sta.addr, 760 mpl_dbg(sdata, "peer %pM in state %s got event %s\n", sta->sta.addr,
759 mplstates[sta->plink_state], mplevents[event]); 761 mplstates[sta->plink_state], mplevents[event]);
760 762
761 spin_lock_bh(&sta->lock); 763 spin_lock_bh(&sta->plink_lock);
762 switch (sta->plink_state) { 764 switch (sta->plink_state) {
763 case NL80211_PLINK_LISTEN: 765 case NL80211_PLINK_LISTEN:
764 switch (event) { 766 switch (event) {
@@ -872,7 +874,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata,
872 */ 874 */
873 break; 875 break;
874 } 876 }
875 spin_unlock_bh(&sta->lock); 877 spin_unlock_bh(&sta->plink_lock);
876 if (action) { 878 if (action) {
877 mesh_plink_frame_tx(sdata, action, sta->sta.addr, 879 mesh_plink_frame_tx(sdata, action, sta->sta.addr,
878 sta->llid, sta->plid, sta->reason); 880 sta->llid, sta->plid, sta->reason);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 26053bf2faa8..9b2cc278ac2a 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -118,7 +118,7 @@ void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata)
118 if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER) 118 if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)
119 return; 119 return;
120 120
121 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) 121 if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
122 return; 122 return;
123 123
124 mod_timer(&sdata->u.mgd.bcn_mon_timer, 124 mod_timer(&sdata->u.mgd.bcn_mon_timer,
@@ -134,7 +134,7 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata)
134 134
135 ifmgd->probe_send_count = 0; 135 ifmgd->probe_send_count = 0;
136 136
137 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) 137 if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
138 return; 138 return;
139 139
140 mod_timer(&sdata->u.mgd.conn_mon_timer, 140 mod_timer(&sdata->u.mgd.conn_mon_timer,
@@ -669,17 +669,15 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
669 capab = WLAN_CAPABILITY_ESS; 669 capab = WLAN_CAPABILITY_ESS;
670 670
671 if (sband->band == IEEE80211_BAND_2GHZ) { 671 if (sband->band == IEEE80211_BAND_2GHZ) {
672 if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) 672 capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
673 capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME; 673 capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
674 if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE))
675 capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
676 } 674 }
677 675
678 if (assoc_data->capability & WLAN_CAPABILITY_PRIVACY) 676 if (assoc_data->capability & WLAN_CAPABILITY_PRIVACY)
679 capab |= WLAN_CAPABILITY_PRIVACY; 677 capab |= WLAN_CAPABILITY_PRIVACY;
680 678
681 if ((assoc_data->capability & WLAN_CAPABILITY_SPECTRUM_MGMT) && 679 if ((assoc_data->capability & WLAN_CAPABILITY_SPECTRUM_MGMT) &&
682 (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT)) 680 ieee80211_hw_check(&local->hw, SPECTRUM_MGMT))
683 capab |= WLAN_CAPABILITY_SPECTRUM_MGMT; 681 capab |= WLAN_CAPABILITY_SPECTRUM_MGMT;
684 682
685 if (ifmgd->flags & IEEE80211_STA_ENABLE_RRM) 683 if (ifmgd->flags & IEEE80211_STA_ENABLE_RRM)
@@ -887,7 +885,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
887 drv_mgd_prepare_tx(local, sdata); 885 drv_mgd_prepare_tx(local, sdata);
888 886
889 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; 887 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
890 if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) 888 if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
891 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS | 889 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS |
892 IEEE80211_TX_INTFL_MLME_CONN_TX; 890 IEEE80211_TX_INTFL_MLME_CONN_TX;
893 ieee80211_tx_skb(sdata, skb); 891 ieee80211_tx_skb(sdata, skb);
@@ -929,7 +927,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
929 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | 927 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
930 IEEE80211_TX_INTFL_OFFCHAN_TX_OK; 928 IEEE80211_TX_INTFL_OFFCHAN_TX_OK;
931 929
932 if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) 930 if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
933 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; 931 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
934 932
935 if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL) 933 if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL)
@@ -1098,6 +1096,24 @@ static void ieee80211_chswitch_timer(unsigned long data)
1098 ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work); 1096 ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work);
1099} 1097}
1100 1098
1099static void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata)
1100{
1101 struct sta_info *sta;
1102 u16 reason = WLAN_REASON_TDLS_TEARDOWN_UNSPECIFIED;
1103
1104 rcu_read_lock();
1105 list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) {
1106 if (!sta->sta.tdls || sta->sdata != sdata || !sta->uploaded ||
1107 !test_sta_flag(sta, WLAN_STA_AUTHORIZED))
1108 continue;
1109
1110 ieee80211_tdls_oper_request(&sdata->vif, sta->sta.addr,
1111 NL80211_TDLS_TEARDOWN, reason,
1112 GFP_ATOMIC);
1113 }
1114 rcu_read_unlock();
1115}
1116
1101static void 1117static void
1102ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, 1118ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
1103 u64 timestamp, u32 device_timestamp, 1119 u64 timestamp, u32 device_timestamp,
@@ -1161,6 +1177,14 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
1161 return; 1177 return;
1162 } 1178 }
1163 1179
1180 /*
1181 * Drop all TDLS peers - either we disconnect or move to a different
1182 * channel from this point on. There's no telling what our peer will do.
1183 * The TDLS WIDER_BW scenario is also problematic, as peers might now
1184 * have an incompatible wider chandef.
1185 */
1186 ieee80211_teardown_tdls_peers(sdata);
1187
1164 mutex_lock(&local->mtx); 1188 mutex_lock(&local->mtx);
1165 mutex_lock(&local->chanctx_mtx); 1189 mutex_lock(&local->chanctx_mtx);
1166 conf = rcu_dereference_protected(sdata->vif.chanctx_conf, 1190 conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
@@ -1174,7 +1198,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
1174 chanctx = container_of(conf, struct ieee80211_chanctx, conf); 1198 chanctx = container_of(conf, struct ieee80211_chanctx, conf);
1175 1199
1176 if (local->use_chanctx && 1200 if (local->use_chanctx &&
1177 !(local->hw.flags & IEEE80211_HW_CHANCTX_STA_CSA)) { 1201 !ieee80211_hw_check(&local->hw, CHANCTX_STA_CSA)) {
1178 sdata_info(sdata, 1202 sdata_info(sdata,
1179 "driver doesn't support chan-switch with channel contexts\n"); 1203 "driver doesn't support chan-switch with channel contexts\n");
1180 goto drop_connection; 1204 goto drop_connection;
@@ -1383,15 +1407,15 @@ static void ieee80211_enable_ps(struct ieee80211_local *local,
1383 return; 1407 return;
1384 1408
1385 if (conf->dynamic_ps_timeout > 0 && 1409 if (conf->dynamic_ps_timeout > 0 &&
1386 !(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)) { 1410 !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) {
1387 mod_timer(&local->dynamic_ps_timer, jiffies + 1411 mod_timer(&local->dynamic_ps_timer, jiffies +
1388 msecs_to_jiffies(conf->dynamic_ps_timeout)); 1412 msecs_to_jiffies(conf->dynamic_ps_timeout));
1389 } else { 1413 } else {
1390 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) 1414 if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
1391 ieee80211_send_nullfunc(local, sdata, 1); 1415 ieee80211_send_nullfunc(local, sdata, 1);
1392 1416
1393 if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) && 1417 if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) &&
1394 (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) 1418 ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
1395 return; 1419 return;
1396 1420
1397 conf->flags |= IEEE80211_CONF_PS; 1421 conf->flags |= IEEE80211_CONF_PS;
@@ -1450,7 +1474,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
1450 int count = 0; 1474 int count = 0;
1451 int timeout; 1475 int timeout;
1452 1476
1453 if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) { 1477 if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS)) {
1454 local->ps_sdata = NULL; 1478 local->ps_sdata = NULL;
1455 return; 1479 return;
1456 } 1480 }
@@ -1596,7 +1620,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
1596 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 1620 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
1597 } 1621 }
1598 1622
1599 if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) && 1623 if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) &&
1600 !(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) { 1624 !(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) {
1601 if (drv_tx_frames_pending(local)) { 1625 if (drv_tx_frames_pending(local)) {
1602 mod_timer(&local->dynamic_ps_timer, jiffies + 1626 mod_timer(&local->dynamic_ps_timer, jiffies +
@@ -1609,8 +1633,8 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
1609 } 1633 }
1610 } 1634 }
1611 1635
1612 if (!((local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) && 1636 if (!(ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS) &&
1613 (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) || 1637 ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK)) ||
1614 (ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) { 1638 (ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) {
1615 ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; 1639 ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
1616 local->hw.conf.flags |= IEEE80211_CONF_PS; 1640 local->hw.conf.flags |= IEEE80211_CONF_PS;
@@ -2135,7 +2159,7 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata)
2135 ieee80211_recalc_ps(local, -1); 2159 ieee80211_recalc_ps(local, -1);
2136 mutex_unlock(&local->iflist_mtx); 2160 mutex_unlock(&local->iflist_mtx);
2137 2161
2138 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) 2162 if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
2139 goto out; 2163 goto out;
2140 2164
2141 /* 2165 /*
@@ -2233,7 +2257,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
2233 */ 2257 */
2234 ifmgd->probe_send_count++; 2258 ifmgd->probe_send_count++;
2235 2259
2236 if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { 2260 if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) {
2237 ifmgd->nullfunc_failed = false; 2261 ifmgd->nullfunc_failed = false;
2238 ieee80211_send_nullfunc(sdata->local, sdata, 0); 2262 ieee80211_send_nullfunc(sdata->local, sdata, 0);
2239 } else { 2263 } else {
@@ -2495,6 +2519,34 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata,
2495 sdata->u.mgd.auth_data = NULL; 2519 sdata->u.mgd.auth_data = NULL;
2496} 2520}
2497 2521
2522static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
2523 bool assoc)
2524{
2525 struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data;
2526
2527 sdata_assert_lock(sdata);
2528
2529 if (!assoc) {
2530 /*
2531 * we are not associated yet, the only timer that could be
2532 * running is the timeout for the association response which
2533 * which is not relevant anymore.
2534 */
2535 del_timer_sync(&sdata->u.mgd.timer);
2536 sta_info_destroy_addr(sdata, assoc_data->bss->bssid);
2537
2538 eth_zero_addr(sdata->u.mgd.bssid);
2539 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
2540 sdata->u.mgd.flags = 0;
2541 mutex_lock(&sdata->local->mtx);
2542 ieee80211_vif_release_channel(sdata);
2543 mutex_unlock(&sdata->local->mtx);
2544 }
2545
2546 kfree(assoc_data);
2547 sdata->u.mgd.assoc_data = NULL;
2548}
2549
2498static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, 2550static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
2499 struct ieee80211_mgmt *mgmt, size_t len) 2551 struct ieee80211_mgmt *mgmt, size_t len)
2500{ 2552{
@@ -2510,7 +2562,7 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
2510 return; 2562 return;
2511 auth_data->expected_transaction = 4; 2563 auth_data->expected_transaction = 4;
2512 drv_mgd_prepare_tx(sdata->local, sdata); 2564 drv_mgd_prepare_tx(sdata->local, sdata);
2513 if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) 2565 if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
2514 tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS | 2566 tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
2515 IEEE80211_TX_INTFL_MLME_CONN_TX; 2567 IEEE80211_TX_INTFL_MLME_CONN_TX;
2516 ieee80211_send_auth(sdata, 3, auth_data->algorithm, 0, 2568 ieee80211_send_auth(sdata, 3, auth_data->algorithm, 0,
@@ -2687,28 +2739,42 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
2687 struct ieee80211_mgmt *mgmt, size_t len) 2739 struct ieee80211_mgmt *mgmt, size_t len)
2688{ 2740{
2689 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 2741 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2690 const u8 *bssid = NULL; 2742 u16 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
2691 u16 reason_code;
2692 2743
2693 sdata_assert_lock(sdata); 2744 sdata_assert_lock(sdata);
2694 2745
2695 if (len < 24 + 2) 2746 if (len < 24 + 2)
2696 return; 2747 return;
2697 2748
2698 if (!ifmgd->associated || 2749 if (ifmgd->associated &&
2699 !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) 2750 ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) {
2700 return; 2751 const u8 *bssid = ifmgd->associated->bssid;
2701 2752
2702 bssid = ifmgd->associated->bssid; 2753 sdata_info(sdata, "deauthenticated from %pM (Reason: %u=%s)\n",
2754 bssid, reason_code,
2755 ieee80211_get_reason_code_string(reason_code));
2703 2756
2704 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); 2757 ieee80211_set_disassoc(sdata, 0, 0, false, NULL);
2705 2758
2706 sdata_info(sdata, "deauthenticated from %pM (Reason: %u=%s)\n", 2759 ieee80211_report_disconnect(sdata, (u8 *)mgmt, len, false,
2707 bssid, reason_code, ieee80211_get_reason_code_string(reason_code)); 2760 reason_code);
2761 return;
2762 }
2708 2763
2709 ieee80211_set_disassoc(sdata, 0, 0, false, NULL); 2764 if (ifmgd->assoc_data &&
2765 ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) {
2766 const u8 *bssid = ifmgd->assoc_data->bss->bssid;
2710 2767
2711 ieee80211_report_disconnect(sdata, (u8 *)mgmt, len, false, reason_code); 2768 sdata_info(sdata,
2769 "deauthenticated from %pM while associating (Reason: %u=%s)\n",
2770 bssid, reason_code,
2771 ieee80211_get_reason_code_string(reason_code));
2772
2773 ieee80211_destroy_assoc_data(sdata, false);
2774
2775 cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
2776 return;
2777 }
2712} 2778}
2713 2779
2714 2780
@@ -2788,34 +2854,6 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
2788 } 2854 }
2789} 2855}
2790 2856
2791static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
2792 bool assoc)
2793{
2794 struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data;
2795
2796 sdata_assert_lock(sdata);
2797
2798 if (!assoc) {
2799 /*
2800 * we are not associated yet, the only timer that could be
2801 * running is the timeout for the association response which
2802 * which is not relevant anymore.
2803 */
2804 del_timer_sync(&sdata->u.mgd.timer);
2805 sta_info_destroy_addr(sdata, assoc_data->bss->bssid);
2806
2807 eth_zero_addr(sdata->u.mgd.bssid);
2808 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
2809 sdata->u.mgd.flags = 0;
2810 mutex_lock(&sdata->local->mtx);
2811 ieee80211_vif_release_channel(sdata);
2812 mutex_unlock(&sdata->local->mtx);
2813 }
2814
2815 kfree(assoc_data);
2816 sdata->u.mgd.assoc_data = NULL;
2817}
2818
2819static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, 2857static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
2820 struct cfg80211_bss *cbss, 2858 struct cfg80211_bss *cbss,
2821 struct ieee80211_mgmt *mgmt, size_t len) 2859 struct ieee80211_mgmt *mgmt, size_t len)
@@ -3299,7 +3337,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
3299 } 3337 }
3300 ifmgd->have_beacon = true; 3338 ifmgd->have_beacon = true;
3301 ifmgd->assoc_data->need_beacon = false; 3339 ifmgd->assoc_data->need_beacon = false;
3302 if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { 3340 if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
3303 sdata->vif.bss_conf.sync_tsf = 3341 sdata->vif.bss_conf.sync_tsf =
3304 le64_to_cpu(mgmt->u.beacon.timestamp); 3342 le64_to_cpu(mgmt->u.beacon.timestamp);
3305 sdata->vif.bss_conf.sync_device_ts = 3343 sdata->vif.bss_conf.sync_device_ts =
@@ -3405,7 +3443,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
3405 len - baselen, false, &elems, 3443 len - baselen, false, &elems,
3406 care_about_ies, ncrc); 3444 care_about_ies, ncrc);
3407 3445
3408 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) { 3446 if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK)) {
3409 bool directed_tim = ieee80211_check_tim(elems.tim, 3447 bool directed_tim = ieee80211_check_tim(elems.tim,
3410 elems.tim_len, 3448 elems.tim_len,
3411 ifmgd->aid); 3449 ifmgd->aid);
@@ -3473,7 +3511,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
3473 * the driver will use them. The synchronized view is currently 3511 * the driver will use them. The synchronized view is currently
3474 * guaranteed only in certain callbacks. 3512 * guaranteed only in certain callbacks.
3475 */ 3513 */
3476 if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { 3514 if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
3477 sdata->vif.bss_conf.sync_tsf = 3515 sdata->vif.bss_conf.sync_tsf =
3478 le64_to_cpu(mgmt->u.beacon.timestamp); 3516 le64_to_cpu(mgmt->u.beacon.timestamp);
3479 sdata->vif.bss_conf.sync_device_ts = 3517 sdata->vif.bss_conf.sync_device_ts =
@@ -3711,7 +3749,7 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
3711 auth_data->expected_transaction = trans; 3749 auth_data->expected_transaction = trans;
3712 } 3750 }
3713 3751
3714 if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) 3752 if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
3715 tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS | 3753 tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
3716 IEEE80211_TX_INTFL_MLME_CONN_TX; 3754 IEEE80211_TX_INTFL_MLME_CONN_TX;
3717 3755
@@ -3784,7 +3822,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
3784 IEEE80211_ASSOC_MAX_TRIES); 3822 IEEE80211_ASSOC_MAX_TRIES);
3785 ieee80211_send_assoc(sdata); 3823 ieee80211_send_assoc(sdata);
3786 3824
3787 if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { 3825 if (!ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
3788 assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT; 3826 assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT;
3789 assoc_data->timeout_started = true; 3827 assoc_data->timeout_started = true;
3790 run_again(sdata, assoc_data->timeout); 3828 run_again(sdata, assoc_data->timeout);
@@ -3898,7 +3936,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
3898 3936
3899 memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); 3937 memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
3900 3938
3901 if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) 3939 if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
3902 max_tries = max_nullfunc_tries; 3940 max_tries = max_nullfunc_tries;
3903 else 3941 else
3904 max_tries = max_probe_tries; 3942 max_tries = max_probe_tries;
@@ -3923,7 +3961,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
3923 } 3961 }
3924 } else if (time_is_after_jiffies(ifmgd->probe_timeout)) 3962 } else if (time_is_after_jiffies(ifmgd->probe_timeout))
3925 run_again(sdata, ifmgd->probe_timeout); 3963 run_again(sdata, ifmgd->probe_timeout);
3926 else if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { 3964 else if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
3927 mlme_dbg(sdata, 3965 mlme_dbg(sdata,
3928 "Failed to send nullfunc to AP %pM after %dms, disconnecting\n", 3966 "Failed to send nullfunc to AP %pM after %dms, disconnecting\n",
3929 bssid, probe_wait_ms); 3967 bssid, probe_wait_ms);
@@ -3992,14 +4030,11 @@ static void ieee80211_sta_monitor_work(struct work_struct *work)
3992 4030
3993static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) 4031static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
3994{ 4032{
3995 u32 flags;
3996
3997 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 4033 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
3998 __ieee80211_stop_poll(sdata); 4034 __ieee80211_stop_poll(sdata);
3999 4035
4000 /* let's probe the connection once */ 4036 /* let's probe the connection once */
4001 flags = sdata->local->hw.flags; 4037 if (!ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
4002 if (!(flags & IEEE80211_HW_CONNECTION_MONITOR))
4003 ieee80211_queue_work(&sdata->local->hw, 4038 ieee80211_queue_work(&sdata->local->hw,
4004 &sdata->u.mgd.monitor_work); 4039 &sdata->u.mgd.monitor_work);
4005 /* and do all the other regular work too */ 4040 /* and do all the other regular work too */
@@ -4307,15 +4342,15 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
4307} 4342}
4308 4343
4309static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, 4344static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
4310 struct cfg80211_bss *cbss, bool assoc) 4345 struct cfg80211_bss *cbss, bool assoc,
4346 bool override)
4311{ 4347{
4312 struct ieee80211_local *local = sdata->local; 4348 struct ieee80211_local *local = sdata->local;
4313 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 4349 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
4314 struct ieee80211_bss *bss = (void *)cbss->priv; 4350 struct ieee80211_bss *bss = (void *)cbss->priv;
4315 struct sta_info *new_sta = NULL; 4351 struct sta_info *new_sta = NULL;
4316 struct ieee80211_supported_band *sband; 4352 struct ieee80211_supported_band *sband;
4317 struct ieee80211_sta_ht_cap sta_ht_cap; 4353 bool have_sta = false;
4318 bool have_sta = false, is_override = false;
4319 int err; 4354 int err;
4320 4355
4321 sband = local->hw.wiphy->bands[cbss->channel->band]; 4356 sband = local->hw.wiphy->bands[cbss->channel->band];
@@ -4335,14 +4370,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
4335 return -ENOMEM; 4370 return -ENOMEM;
4336 } 4371 }
4337 4372
4338 memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap)); 4373 if (new_sta || override) {
4339 ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
4340
4341 is_override = (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) !=
4342 (sband->ht_cap.cap &
4343 IEEE80211_HT_CAP_SUP_WIDTH_20_40);
4344
4345 if (new_sta || is_override) {
4346 err = ieee80211_prep_channel(sdata, cbss); 4374 err = ieee80211_prep_channel(sdata, cbss);
4347 if (err) { 4375 if (err) {
4348 if (new_sta) 4376 if (new_sta)
@@ -4419,8 +4447,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
4419 sdata->vif.bss_conf.sync_dtim_count = tim_ie[2]; 4447 sdata->vif.bss_conf.sync_dtim_count = tim_ie[2];
4420 else 4448 else
4421 sdata->vif.bss_conf.sync_dtim_count = 0; 4449 sdata->vif.bss_conf.sync_dtim_count = 0;
4422 } else if (!(local->hw.flags & 4450 } else if (!ieee80211_hw_check(&sdata->local->hw,
4423 IEEE80211_HW_TIMING_BEACON_ONLY)) { 4451 TIMING_BEACON_ONLY)) {
4424 ies = rcu_dereference(cbss->proberesp_ies); 4452 ies = rcu_dereference(cbss->proberesp_ies);
4425 /* must be non-NULL since beacon IEs were NULL */ 4453 /* must be non-NULL since beacon IEs were NULL */
4426 sdata->vif.bss_conf.sync_tsf = ies->tsf; 4454 sdata->vif.bss_conf.sync_tsf = ies->tsf;
@@ -4552,7 +4580,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
4552 4580
4553 sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); 4581 sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid);
4554 4582
4555 err = ieee80211_prep_connection(sdata, req->bss, false); 4583 err = ieee80211_prep_connection(sdata, req->bss, false, false);
4556 if (err) 4584 if (err)
4557 goto err_clear; 4585 goto err_clear;
4558 4586
@@ -4570,6 +4598,9 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
4570 eth_zero_addr(ifmgd->bssid); 4598 eth_zero_addr(ifmgd->bssid);
4571 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); 4599 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
4572 ifmgd->auth_data = NULL; 4600 ifmgd->auth_data = NULL;
4601 mutex_lock(&sdata->local->mtx);
4602 ieee80211_vif_release_channel(sdata);
4603 mutex_unlock(&sdata->local->mtx);
4573 err_free: 4604 err_free:
4574 kfree(auth_data); 4605 kfree(auth_data);
4575 return err; 4606 return err;
@@ -4624,6 +4655,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4624 struct ieee80211_supported_band *sband; 4655 struct ieee80211_supported_band *sband;
4625 const u8 *ssidie, *ht_ie, *vht_ie; 4656 const u8 *ssidie, *ht_ie, *vht_ie;
4626 int i, err; 4657 int i, err;
4658 bool override = false;
4627 4659
4628 assoc_data = kzalloc(sizeof(*assoc_data) + req->ie_len, GFP_KERNEL); 4660 assoc_data = kzalloc(sizeof(*assoc_data) + req->ie_len, GFP_KERNEL);
4629 if (!assoc_data) 4661 if (!assoc_data)
@@ -4728,14 +4760,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4728 } 4760 }
4729 } 4761 }
4730 4762
4731 if (req->flags & ASSOC_REQ_DISABLE_HT) {
4732 ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
4733 ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
4734 }
4735
4736 if (req->flags & ASSOC_REQ_DISABLE_VHT)
4737 ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
4738
4739 /* Also disable HT if we don't support it or the AP doesn't use WMM */ 4763 /* Also disable HT if we don't support it or the AP doesn't use WMM */
4740 sband = local->hw.wiphy->bands[req->bss->channel->band]; 4764 sband = local->hw.wiphy->bands[req->bss->channel->band];
4741 if (!sband->ht_cap.ht_supported || 4765 if (!sband->ht_cap.ht_supported ||
@@ -4802,7 +4826,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4802 rcu_read_unlock(); 4826 rcu_read_unlock();
4803 4827
4804 if (WARN((sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD) && 4828 if (WARN((sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD) &&
4805 (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK), 4829 ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK),
4806 "U-APSD not supported with HW_PS_NULLFUNC_STACK\n")) 4830 "U-APSD not supported with HW_PS_NULLFUNC_STACK\n"))
4807 sdata->vif.driver_flags &= ~IEEE80211_VIF_SUPPORTS_UAPSD; 4831 sdata->vif.driver_flags &= ~IEEE80211_VIF_SUPPORTS_UAPSD;
4808 4832
@@ -4847,14 +4871,43 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4847 ifmgd->dtim_period = 0; 4871 ifmgd->dtim_period = 0;
4848 ifmgd->have_beacon = false; 4872 ifmgd->have_beacon = false;
4849 4873
4850 err = ieee80211_prep_connection(sdata, req->bss, true); 4874 /* override HT/VHT configuration only if the AP and we support it */
4875 if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
4876 struct ieee80211_sta_ht_cap sta_ht_cap;
4877
4878 if (req->flags & ASSOC_REQ_DISABLE_HT)
4879 override = true;
4880
4881 memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
4882 ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
4883
4884 /* check for 40 MHz disable override */
4885 if (!(ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ) &&
4886 sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 &&
4887 !(sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40))
4888 override = true;
4889
4890 if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
4891 req->flags & ASSOC_REQ_DISABLE_VHT)
4892 override = true;
4893 }
4894
4895 if (req->flags & ASSOC_REQ_DISABLE_HT) {
4896 ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
4897 ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
4898 }
4899
4900 if (req->flags & ASSOC_REQ_DISABLE_VHT)
4901 ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
4902
4903 err = ieee80211_prep_connection(sdata, req->bss, true, override);
4851 if (err) 4904 if (err)
4852 goto err_clear; 4905 goto err_clear;
4853 4906
4854 rcu_read_lock(); 4907 rcu_read_lock();
4855 beacon_ies = rcu_dereference(req->bss->beacon_ies); 4908 beacon_ies = rcu_dereference(req->bss->beacon_ies);
4856 4909
4857 if (sdata->local->hw.flags & IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC && 4910 if (ieee80211_hw_check(&sdata->local->hw, NEED_DTIM_BEFORE_ASSOC) &&
4858 !beacon_ies) { 4911 !beacon_ies) {
4859 /* 4912 /*
4860 * Wait up to one beacon interval ... 4913 * Wait up to one beacon interval ...
@@ -4881,7 +4934,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4881 assoc_data->timeout = jiffies; 4934 assoc_data->timeout = jiffies;
4882 assoc_data->timeout_started = true; 4935 assoc_data->timeout_started = true;
4883 4936
4884 if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { 4937 if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
4885 sdata->vif.bss_conf.sync_tsf = beacon_ies->tsf; 4938 sdata->vif.bss_conf.sync_tsf = beacon_ies->tsf;
4886 sdata->vif.bss_conf.sync_device_ts = 4939 sdata->vif.bss_conf.sync_device_ts =
4887 bss->device_ts_beacon; 4940 bss->device_ts_beacon;
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 683f0e3cb124..f2c75cf491fc 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -46,7 +46,7 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata)
46 } 46 }
47 47
48 if (!local->offchannel_ps_enabled || 48 if (!local->offchannel_ps_enabled ||
49 !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) 49 !ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
50 /* 50 /*
51 * If power save was enabled, no need to send a nullfunc 51 * If power save was enabled, no need to send a nullfunc
52 * frame because AP knows that we are sleeping. But if the 52 * frame because AP knows that we are sleeping. But if the
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index ac6ad6238e3a..06b60980c62c 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -23,7 +23,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
23 23
24 ieee80211_del_virtual_monitor(local); 24 ieee80211_del_virtual_monitor(local);
25 25
26 if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { 26 if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) {
27 mutex_lock(&local->sta_mtx); 27 mutex_lock(&local->sta_mtx);
28 list_for_each_entry(sta, &local->sta_list, list) { 28 list_for_each_entry(sta, &local->sta_list, list) {
29 set_sta_flag(sta, WLAN_STA_BLOCK_BA); 29 set_sta_flag(sta, WLAN_STA_BLOCK_BA);
@@ -82,7 +82,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
82 if (err < 0) { 82 if (err < 0) {
83 local->quiescing = false; 83 local->quiescing = false;
84 local->wowlan = false; 84 local->wowlan = false;
85 if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { 85 if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) {
86 mutex_lock(&local->sta_mtx); 86 mutex_lock(&local->sta_mtx);
87 list_for_each_entry(sta, 87 list_for_each_entry(sta,
88 &local->sta_list, list) { 88 &local->sta_list, list) {
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index d53355b011f5..fda33f961d83 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -103,7 +103,7 @@ ieee80211_rate_control_ops_get(const char *name)
103 const struct rate_control_ops *ops; 103 const struct rate_control_ops *ops;
104 const char *alg_name; 104 const char *alg_name;
105 105
106 kparam_block_sysfs_write(ieee80211_default_rc_algo); 106 kernel_param_lock(THIS_MODULE);
107 if (!name) 107 if (!name)
108 alg_name = ieee80211_default_rc_algo; 108 alg_name = ieee80211_default_rc_algo;
109 else 109 else
@@ -117,7 +117,7 @@ ieee80211_rate_control_ops_get(const char *name)
117 /* try built-in one if specific alg requested but not found */ 117 /* try built-in one if specific alg requested but not found */
118 if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT)) 118 if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT))
119 ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT); 119 ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT);
120 kparam_unblock_sysfs_write(ieee80211_default_rc_algo); 120 kernel_param_unlock(THIS_MODULE);
121 121
122 return ops; 122 return ops;
123} 123}
@@ -680,12 +680,18 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
680 info->control.rates[i].count = 0; 680 info->control.rates[i].count = 0;
681 } 681 }
682 682
683 if (sdata->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) 683 if (ieee80211_hw_check(&sdata->local->hw, HAS_RATE_CONTROL))
684 return; 684 return;
685 685
686 ref->ops->get_rate(ref->priv, ista, priv_sta, txrc); 686 if (ista) {
687 spin_lock_bh(&sta->rate_ctrl_lock);
688 ref->ops->get_rate(ref->priv, ista, priv_sta, txrc);
689 spin_unlock_bh(&sta->rate_ctrl_lock);
690 } else {
691 ref->ops->get_rate(ref->priv, NULL, NULL, txrc);
692 }
687 693
688 if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_RC_TABLE) 694 if (ieee80211_hw_check(&sdata->local->hw, SUPPORTS_RC_TABLE))
689 return; 695 return;
690 696
691 ieee80211_get_tx_rates(&sdata->vif, ista, txrc->skb, 697 ieee80211_get_tx_rates(&sdata->vif, ista, txrc->skb,
@@ -727,7 +733,7 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
727 if (local->open_count) 733 if (local->open_count)
728 return -EBUSY; 734 return -EBUSY;
729 735
730 if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) { 736 if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
731 if (WARN_ON(!local->ops->set_rts_threshold)) 737 if (WARN_ON(!local->ops->set_rts_threshold))
732 return -EINVAL; 738 return -EINVAL;
733 return 0; 739 return 0;
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 38652f09feaf..25c9be5dd7fd 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -42,10 +42,12 @@ static inline void rate_control_tx_status(struct ieee80211_local *local,
42 if (!ref || !test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) 42 if (!ref || !test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
43 return; 43 return;
44 44
45 spin_lock_bh(&sta->rate_ctrl_lock);
45 if (ref->ops->tx_status) 46 if (ref->ops->tx_status)
46 ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb); 47 ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb);
47 else 48 else
48 ref->ops->tx_status_noskb(ref->priv, sband, ista, priv_sta, info); 49 ref->ops->tx_status_noskb(ref->priv, sband, ista, priv_sta, info);
50 spin_unlock_bh(&sta->rate_ctrl_lock);
49} 51}
50 52
51static inline void 53static inline void
@@ -64,7 +66,9 @@ rate_control_tx_status_noskb(struct ieee80211_local *local,
64 if (WARN_ON_ONCE(!ref->ops->tx_status_noskb)) 66 if (WARN_ON_ONCE(!ref->ops->tx_status_noskb))
65 return; 67 return;
66 68
69 spin_lock_bh(&sta->rate_ctrl_lock);
67 ref->ops->tx_status_noskb(ref->priv, sband, ista, priv_sta, info); 70 ref->ops->tx_status_noskb(ref->priv, sband, ista, priv_sta, info);
71 spin_unlock_bh(&sta->rate_ctrl_lock);
68} 72}
69 73
70static inline void rate_control_rate_init(struct sta_info *sta) 74static inline void rate_control_rate_init(struct sta_info *sta)
@@ -91,8 +95,10 @@ static inline void rate_control_rate_init(struct sta_info *sta)
91 95
92 sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band]; 96 sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band];
93 97
98 spin_lock_bh(&sta->rate_ctrl_lock);
94 ref->ops->rate_init(ref->priv, sband, &chanctx_conf->def, ista, 99 ref->ops->rate_init(ref->priv, sband, &chanctx_conf->def, ista,
95 priv_sta); 100 priv_sta);
101 spin_unlock_bh(&sta->rate_ctrl_lock);
96 rcu_read_unlock(); 102 rcu_read_unlock();
97 set_sta_flag(sta, WLAN_STA_RATE_CONTROL); 103 set_sta_flag(sta, WLAN_STA_RATE_CONTROL);
98} 104}
@@ -115,18 +121,20 @@ static inline void rate_control_rate_update(struct ieee80211_local *local,
115 return; 121 return;
116 } 122 }
117 123
124 spin_lock_bh(&sta->rate_ctrl_lock);
118 ref->ops->rate_update(ref->priv, sband, &chanctx_conf->def, 125 ref->ops->rate_update(ref->priv, sband, &chanctx_conf->def,
119 ista, priv_sta, changed); 126 ista, priv_sta, changed);
127 spin_unlock_bh(&sta->rate_ctrl_lock);
120 rcu_read_unlock(); 128 rcu_read_unlock();
121 } 129 }
122 drv_sta_rc_update(local, sta->sdata, &sta->sta, changed); 130 drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
123} 131}
124 132
125static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, 133static inline void *rate_control_alloc_sta(struct rate_control_ref *ref,
126 struct ieee80211_sta *sta, 134 struct sta_info *sta, gfp_t gfp)
127 gfp_t gfp)
128{ 135{
129 return ref->ops->alloc_sta(ref->priv, sta, gfp); 136 spin_lock_init(&sta->rate_ctrl_lock);
137 return ref->ops->alloc_sta(ref->priv, &sta->sta, gfp);
130} 138}
131 139
132static inline void rate_control_free_sta(struct sta_info *sta) 140static inline void rate_control_free_sta(struct sta_info *sta)
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 7430a1df2ab1..543b67233535 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -1070,7 +1070,7 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
1070 if (sband->band != IEEE80211_BAND_2GHZ) 1070 if (sband->band != IEEE80211_BAND_2GHZ)
1071 return; 1071 return;
1072 1072
1073 if (!(mp->hw->flags & IEEE80211_HW_SUPPORTS_HT_CCK_RATES)) 1073 if (!ieee80211_hw_check(mp->hw, SUPPORTS_HT_CCK_RATES))
1074 return; 1074 return;
1075 1075
1076 mi->cck_supported = 0; 1076 mi->cck_supported = 0;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5793f75c5ffd..5dae166cb7f5 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -32,6 +32,16 @@
32#include "wme.h" 32#include "wme.h"
33#include "rate.h" 33#include "rate.h"
34 34
35static inline void ieee80211_rx_stats(struct net_device *dev, u32 len)
36{
37 struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
38
39 u64_stats_update_begin(&tstats->syncp);
40 tstats->rx_packets++;
41 tstats->rx_bytes += len;
42 u64_stats_update_end(&tstats->syncp);
43}
44
35/* 45/*
36 * monitor mode reception 46 * monitor mode reception
37 * 47 *
@@ -42,7 +52,7 @@ static struct sk_buff *remove_monitor_info(struct ieee80211_local *local,
42 struct sk_buff *skb, 52 struct sk_buff *skb,
43 unsigned int rtap_vendor_space) 53 unsigned int rtap_vendor_space)
44{ 54{
45 if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) { 55 if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)) {
46 if (likely(skb->len > FCS_LEN)) 56 if (likely(skb->len > FCS_LEN))
47 __pskb_trim(skb, skb->len - FCS_LEN); 57 __pskb_trim(skb, skb->len - FCS_LEN);
48 else { 58 else {
@@ -100,7 +110,7 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
100 len = ALIGN(len, 8); 110 len = ALIGN(len, 8);
101 len += 8; 111 len += 8;
102 } 112 }
103 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) 113 if (ieee80211_hw_check(&local->hw, SIGNAL_DBM))
104 len += 1; 114 len += 1;
105 115
106 /* antenna field, if we don't have per-chain info */ 116 /* antenna field, if we don't have per-chain info */
@@ -175,7 +185,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
175 } 185 }
176 186
177 mpdulen = skb->len; 187 mpdulen = skb->len;
178 if (!(has_fcs && (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS))) 188 if (!(has_fcs && ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)))
179 mpdulen += FCS_LEN; 189 mpdulen += FCS_LEN;
180 190
181 rthdr = (struct ieee80211_radiotap_header *)skb_push(skb, rtap_len); 191 rthdr = (struct ieee80211_radiotap_header *)skb_push(skb, rtap_len);
@@ -229,7 +239,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
229 } 239 }
230 240
231 /* IEEE80211_RADIOTAP_FLAGS */ 241 /* IEEE80211_RADIOTAP_FLAGS */
232 if (has_fcs && (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)) 242 if (has_fcs && ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS))
233 *pos |= IEEE80211_RADIOTAP_F_FCS; 243 *pos |= IEEE80211_RADIOTAP_F_FCS;
234 if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) 244 if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC))
235 *pos |= IEEE80211_RADIOTAP_F_BADFCS; 245 *pos |= IEEE80211_RADIOTAP_F_BADFCS;
@@ -279,7 +289,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
279 pos += 2; 289 pos += 2;
280 290
281 /* IEEE80211_RADIOTAP_DBM_ANTSIGNAL */ 291 /* IEEE80211_RADIOTAP_DBM_ANTSIGNAL */
282 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM && 292 if (ieee80211_hw_check(&local->hw, SIGNAL_DBM) &&
283 !(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { 293 !(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
284 *pos = status->signal; 294 *pos = status->signal;
285 rthdr->it_present |= 295 rthdr->it_present |=
@@ -448,7 +458,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
448 * the SKB because it has a bad FCS/PLCP checksum. 458 * the SKB because it has a bad FCS/PLCP checksum.
449 */ 459 */
450 460
451 if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) 461 if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS))
452 present_fcs_len = FCS_LEN; 462 present_fcs_len = FCS_LEN;
453 463
454 /* ensure hdr->frame_control and vendor radiotap data are in skb head */ 464 /* ensure hdr->frame_control and vendor radiotap data are in skb head */
@@ -529,8 +539,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
529 } 539 }
530 540
531 prev_dev = sdata->dev; 541 prev_dev = sdata->dev;
532 sdata->dev->stats.rx_packets++; 542 ieee80211_rx_stats(sdata->dev, skb->len);
533 sdata->dev->stats.rx_bytes += skb->len;
534 } 543 }
535 544
536 if (prev_dev) { 545 if (prev_dev) {
@@ -981,7 +990,6 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
981 struct sk_buff *skb = rx->skb; 990 struct sk_buff *skb = rx->skb;
982 struct ieee80211_local *local = rx->local; 991 struct ieee80211_local *local = rx->local;
983 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; 992 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
984 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
985 struct sta_info *sta = rx->sta; 993 struct sta_info *sta = rx->sta;
986 struct tid_ampdu_rx *tid_agg_rx; 994 struct tid_ampdu_rx *tid_agg_rx;
987 u16 sc; 995 u16 sc;
@@ -1016,10 +1024,6 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
1016 ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL) 1024 ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL)
1017 goto dont_reorder; 1025 goto dont_reorder;
1018 1026
1019 /* not actually part of this BA session */
1020 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
1021 goto dont_reorder;
1022
1023 /* new, potentially un-ordered, ampdu frame - process it */ 1027 /* new, potentially un-ordered, ampdu frame - process it */
1024 1028
1025 /* reset session timer */ 1029 /* reset session timer */
@@ -1073,10 +1077,8 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx)
1073 if (unlikely(ieee80211_has_retry(hdr->frame_control) && 1077 if (unlikely(ieee80211_has_retry(hdr->frame_control) &&
1074 rx->sta->last_seq_ctrl[rx->seqno_idx] == 1078 rx->sta->last_seq_ctrl[rx->seqno_idx] ==
1075 hdr->seq_ctrl)) { 1079 hdr->seq_ctrl)) {
1076 if (status->rx_flags & IEEE80211_RX_RA_MATCH) { 1080 I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount);
1077 rx->local->dot11FrameDuplicateCount++; 1081 rx->sta->num_duplicates++;
1078 rx->sta->num_duplicates++;
1079 }
1080 return RX_DROP_UNUSABLE; 1082 return RX_DROP_UNUSABLE;
1081 } else if (!(status->flag & RX_FLAG_AMSDU_MORE)) { 1083 } else if (!(status->flag & RX_FLAG_AMSDU_MORE)) {
1082 rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl; 1084 rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl;
@@ -1195,11 +1197,13 @@ static void sta_ps_start(struct sta_info *sta)
1195 1197
1196 atomic_inc(&ps->num_sta_ps); 1198 atomic_inc(&ps->num_sta_ps);
1197 set_sta_flag(sta, WLAN_STA_PS_STA); 1199 set_sta_flag(sta, WLAN_STA_PS_STA);
1198 if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS)) 1200 if (!ieee80211_hw_check(&local->hw, AP_LINK_PS))
1199 drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta); 1201 drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta);
1200 ps_dbg(sdata, "STA %pM aid %d enters power save mode\n", 1202 ps_dbg(sdata, "STA %pM aid %d enters power save mode\n",
1201 sta->sta.addr, sta->sta.aid); 1203 sta->sta.addr, sta->sta.aid);
1202 1204
1205 ieee80211_clear_fast_xmit(sta);
1206
1203 if (!sta->sta.txq[0]) 1207 if (!sta->sta.txq[0])
1204 return; 1208 return;
1205 1209
@@ -1241,7 +1245,7 @@ int ieee80211_sta_ps_transition(struct ieee80211_sta *sta, bool start)
1241 struct sta_info *sta_inf = container_of(sta, struct sta_info, sta); 1245 struct sta_info *sta_inf = container_of(sta, struct sta_info, sta);
1242 bool in_ps; 1246 bool in_ps;
1243 1247
1244 WARN_ON(!(sta_inf->local->hw.flags & IEEE80211_HW_AP_LINK_PS)); 1248 WARN_ON(!ieee80211_hw_check(&sta_inf->local->hw, AP_LINK_PS));
1245 1249
1246 /* Don't let the same PS state be set twice */ 1250 /* Don't let the same PS state be set twice */
1247 in_ps = test_sta_flag(sta_inf, WLAN_STA_PS_STA); 1251 in_ps = test_sta_flag(sta_inf, WLAN_STA_PS_STA);
@@ -1265,7 +1269,7 @@ ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx)
1265 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); 1269 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1266 int tid, ac; 1270 int tid, ac;
1267 1271
1268 if (!rx->sta || !(status->rx_flags & IEEE80211_RX_RA_MATCH)) 1272 if (!rx->sta)
1269 return RX_CONTINUE; 1273 return RX_CONTINUE;
1270 1274
1271 if (sdata->vif.type != NL80211_IFTYPE_AP && 1275 if (sdata->vif.type != NL80211_IFTYPE_AP &&
@@ -1277,7 +1281,7 @@ ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx)
1277 * uAPSD and PS-Poll frames (the latter shouldn't even come up from 1281 * uAPSD and PS-Poll frames (the latter shouldn't even come up from
1278 * it to mac80211 since they're handled.) 1282 * it to mac80211 since they're handled.)
1279 */ 1283 */
1280 if (sdata->local->hw.flags & IEEE80211_HW_AP_LINK_PS) 1284 if (ieee80211_hw_check(&sdata->local->hw, AP_LINK_PS))
1281 return RX_CONTINUE; 1285 return RX_CONTINUE;
1282 1286
1283 /* 1287 /*
@@ -1367,11 +1371,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1367 } 1371 }
1368 } 1372 }
1369 } else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) { 1373 } else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) {
1370 u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, 1374 sta->last_rx = jiffies;
1371 NL80211_IFTYPE_OCB);
1372 /* OCB uses wild-card BSSID */
1373 if (is_broadcast_ether_addr(bssid))
1374 sta->last_rx = jiffies;
1375 } else if (!is_multicast_ether_addr(hdr->addr1)) { 1375 } else if (!is_multicast_ether_addr(hdr->addr1)) {
1376 /* 1376 /*
1377 * Mesh beacons will update last_rx when if they are found to 1377 * Mesh beacons will update last_rx when if they are found to
@@ -1386,9 +1386,6 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1386 } 1386 }
1387 } 1387 }
1388 1388
1389 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
1390 return RX_CONTINUE;
1391
1392 if (rx->sdata->vif.type == NL80211_IFTYPE_STATION) 1389 if (rx->sdata->vif.type == NL80211_IFTYPE_STATION)
1393 ieee80211_sta_rx_notify(rx->sdata, hdr); 1390 ieee80211_sta_rx_notify(rx->sdata, hdr);
1394 1391
@@ -1416,7 +1413,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1416 * Change STA power saving mode only at the end of a frame 1413 * Change STA power saving mode only at the end of a frame
1417 * exchange sequence. 1414 * exchange sequence.
1418 */ 1415 */
1419 if (!(sta->local->hw.flags & IEEE80211_HW_AP_LINK_PS) && 1416 if (!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS) &&
1420 !ieee80211_has_morefrags(hdr->frame_control) && 1417 !ieee80211_has_morefrags(hdr->frame_control) &&
1421 !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && 1418 !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) &&
1422 (rx->sdata->vif.type == NL80211_IFTYPE_AP || 1419 (rx->sdata->vif.type == NL80211_IFTYPE_AP ||
@@ -1517,13 +1514,6 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
1517 * possible. 1514 * possible.
1518 */ 1515 */
1519 1516
1520 /*
1521 * No point in finding a key and decrypting if the frame is neither
1522 * addressed to us nor a multicast frame.
1523 */
1524 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
1525 return RX_CONTINUE;
1526
1527 /* start without a key */ 1517 /* start without a key */
1528 rx->key = NULL; 1518 rx->key = NULL;
1529 fc = hdr->frame_control; 1519 fc = hdr->frame_control;
@@ -1795,7 +1785,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1795 frag = sc & IEEE80211_SCTL_FRAG; 1785 frag = sc & IEEE80211_SCTL_FRAG;
1796 1786
1797 if (is_multicast_ether_addr(hdr->addr1)) { 1787 if (is_multicast_ether_addr(hdr->addr1)) {
1798 rx->local->dot11MulticastReceivedFrameCount++; 1788 I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount);
1799 goto out_no_led; 1789 goto out_no_led;
1800 } 1790 }
1801 1791
@@ -1878,7 +1868,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1878 1868
1879 rx->skb = __skb_dequeue(&entry->skb_list); 1869 rx->skb = __skb_dequeue(&entry->skb_list);
1880 if (skb_tailroom(rx->skb) < entry->extra_len) { 1870 if (skb_tailroom(rx->skb) < entry->extra_len) {
1881 I802_DEBUG_INC(rx->local->rx_expand_skb_head2); 1871 I802_DEBUG_INC(rx->local->rx_expand_skb_head_defrag);
1882 if (unlikely(pskb_expand_head(rx->skb, 0, entry->extra_len, 1872 if (unlikely(pskb_expand_head(rx->skb, 0, entry->extra_len,
1883 GFP_ATOMIC))) { 1873 GFP_ATOMIC))) {
1884 I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); 1874 I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag);
@@ -2054,18 +2044,15 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
2054 struct sk_buff *skb, *xmit_skb; 2044 struct sk_buff *skb, *xmit_skb;
2055 struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data; 2045 struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
2056 struct sta_info *dsta; 2046 struct sta_info *dsta;
2057 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
2058
2059 dev->stats.rx_packets++;
2060 dev->stats.rx_bytes += rx->skb->len;
2061 2047
2062 skb = rx->skb; 2048 skb = rx->skb;
2063 xmit_skb = NULL; 2049 xmit_skb = NULL;
2064 2050
2051 ieee80211_rx_stats(dev, skb->len);
2052
2065 if ((sdata->vif.type == NL80211_IFTYPE_AP || 2053 if ((sdata->vif.type == NL80211_IFTYPE_AP ||
2066 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && 2054 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
2067 !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && 2055 !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
2068 (status->rx_flags & IEEE80211_RX_RA_MATCH) &&
2069 (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) { 2056 (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) {
2070 if (is_multicast_ether_addr(ehdr->h_dest)) { 2057 if (is_multicast_ether_addr(ehdr->h_dest)) {
2071 /* 2058 /*
@@ -2207,7 +2194,6 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
2207 struct sk_buff *skb = rx->skb, *fwd_skb; 2194 struct sk_buff *skb = rx->skb, *fwd_skb;
2208 struct ieee80211_local *local = rx->local; 2195 struct ieee80211_local *local = rx->local;
2209 struct ieee80211_sub_if_data *sdata = rx->sdata; 2196 struct ieee80211_sub_if_data *sdata = rx->sdata;
2210 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
2211 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 2197 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
2212 u16 q, hdrlen; 2198 u16 q, hdrlen;
2213 2199
@@ -2238,8 +2224,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
2238 mesh_rmc_check(rx->sdata, hdr->addr3, mesh_hdr)) 2224 mesh_rmc_check(rx->sdata, hdr->addr3, mesh_hdr))
2239 return RX_DROP_MONITOR; 2225 return RX_DROP_MONITOR;
2240 2226
2241 if (!ieee80211_is_data(hdr->frame_control) || 2227 if (!ieee80211_is_data(hdr->frame_control))
2242 !(status->rx_flags & IEEE80211_RX_RA_MATCH))
2243 return RX_CONTINUE; 2228 return RX_CONTINUE;
2244 2229
2245 if (!mesh_hdr->ttl) 2230 if (!mesh_hdr->ttl)
@@ -2330,11 +2315,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
2330 IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_frames); 2315 IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_frames);
2331 ieee80211_add_pending_skb(local, fwd_skb); 2316 ieee80211_add_pending_skb(local, fwd_skb);
2332 out: 2317 out:
2333 if (is_multicast_ether_addr(hdr->addr1) || 2318 if (is_multicast_ether_addr(hdr->addr1))
2334 sdata->dev->flags & IFF_PROMISC)
2335 return RX_CONTINUE; 2319 return RX_CONTINUE;
2336 else 2320 return RX_DROP_MONITOR;
2337 return RX_DROP_MONITOR;
2338} 2321}
2339#endif 2322#endif
2340 2323
@@ -2445,6 +2428,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
2445 struct { 2428 struct {
2446 __le16 control, start_seq_num; 2429 __le16 control, start_seq_num;
2447 } __packed bar_data; 2430 } __packed bar_data;
2431 struct ieee80211_event event = {
2432 .type = BAR_RX_EVENT,
2433 };
2448 2434
2449 if (!rx->sta) 2435 if (!rx->sta)
2450 return RX_DROP_MONITOR; 2436 return RX_DROP_MONITOR;
@@ -2460,6 +2446,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
2460 return RX_DROP_MONITOR; 2446 return RX_DROP_MONITOR;
2461 2447
2462 start_seq_num = le16_to_cpu(bar_data.start_seq_num) >> 4; 2448 start_seq_num = le16_to_cpu(bar_data.start_seq_num) >> 4;
2449 event.u.ba.tid = tid;
2450 event.u.ba.ssn = start_seq_num;
2451 event.u.ba.sta = &rx->sta->sta;
2463 2452
2464 /* reset session timer */ 2453 /* reset session timer */
2465 if (tid_agg_rx->timeout) 2454 if (tid_agg_rx->timeout)
@@ -2472,6 +2461,8 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
2472 start_seq_num, frames); 2461 start_seq_num, frames);
2473 spin_unlock(&tid_agg_rx->reorder_lock); 2462 spin_unlock(&tid_agg_rx->reorder_lock);
2474 2463
2464 drv_event_callback(rx->local, rx->sdata, &event);
2465
2475 kfree_skb(skb); 2466 kfree_skb(skb);
2476 return RX_QUEUED; 2467 return RX_QUEUED;
2477 } 2468 }
@@ -2552,7 +2543,7 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
2552 !(rx->flags & IEEE80211_RX_BEACON_REPORTED)) { 2543 !(rx->flags & IEEE80211_RX_BEACON_REPORTED)) {
2553 int sig = 0; 2544 int sig = 0;
2554 2545
2555 if (rx->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) 2546 if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM))
2556 sig = status->signal; 2547 sig = status->signal;
2557 2548
2558 cfg80211_report_obss_beacon(rx->local->hw.wiphy, 2549 cfg80211_report_obss_beacon(rx->local->hw.wiphy,
@@ -2561,9 +2552,6 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
2561 rx->flags |= IEEE80211_RX_BEACON_REPORTED; 2552 rx->flags |= IEEE80211_RX_BEACON_REPORTED;
2562 } 2553 }
2563 2554
2564 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
2565 return RX_DROP_MONITOR;
2566
2567 if (ieee80211_drop_unencrypted_mgmt(rx)) 2555 if (ieee80211_drop_unencrypted_mgmt(rx))
2568 return RX_DROP_UNUSABLE; 2556 return RX_DROP_UNUSABLE;
2569 2557
@@ -2591,9 +2579,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2591 mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT) 2579 mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT)
2592 return RX_DROP_UNUSABLE; 2580 return RX_DROP_UNUSABLE;
2593 2581
2594 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
2595 return RX_DROP_UNUSABLE;
2596
2597 switch (mgmt->u.action.category) { 2582 switch (mgmt->u.action.category) {
2598 case WLAN_CATEGORY_HT: 2583 case WLAN_CATEGORY_HT:
2599 /* reject HT action frames from stations not supporting HT */ 2584 /* reject HT action frames from stations not supporting HT */
@@ -2889,7 +2874,7 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
2889 * it transmitted were processed or returned. 2874 * it transmitted were processed or returned.
2890 */ 2875 */
2891 2876
2892 if (rx->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) 2877 if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM))
2893 sig = status->signal; 2878 sig = status->signal;
2894 2879
2895 if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig, 2880 if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig,
@@ -2954,7 +2939,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
2954 info->flags = IEEE80211_TX_CTL_TX_OFFCHAN | 2939 info->flags = IEEE80211_TX_CTL_TX_OFFCHAN |
2955 IEEE80211_TX_INTFL_OFFCHAN_TX_OK | 2940 IEEE80211_TX_INTFL_OFFCHAN_TX_OK |
2956 IEEE80211_TX_CTL_NO_CCK_RATE; 2941 IEEE80211_TX_CTL_NO_CCK_RATE;
2957 if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) 2942 if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL))
2958 info->hw_queue = 2943 info->hw_queue =
2959 local->hw.offchannel_tx_hw_queue; 2944 local->hw.offchannel_tx_hw_queue;
2960 } 2945 }
@@ -3077,8 +3062,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
3077 } 3062 }
3078 3063
3079 prev_dev = sdata->dev; 3064 prev_dev = sdata->dev;
3080 sdata->dev->stats.rx_packets++; 3065 ieee80211_rx_stats(sdata->dev, skb->len);
3081 sdata->dev->stats.rx_bytes += skb->len;
3082 } 3066 }
3083 3067
3084 if (prev_dev) { 3068 if (prev_dev) {
@@ -3246,16 +3230,25 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
3246 ieee80211_sta_reorder_release(sta->sdata, tid_agg_rx, &frames); 3230 ieee80211_sta_reorder_release(sta->sdata, tid_agg_rx, &frames);
3247 spin_unlock(&tid_agg_rx->reorder_lock); 3231 spin_unlock(&tid_agg_rx->reorder_lock);
3248 3232
3233 if (!skb_queue_empty(&frames)) {
3234 struct ieee80211_event event = {
3235 .type = BA_FRAME_TIMEOUT,
3236 .u.ba.tid = tid,
3237 .u.ba.sta = &sta->sta,
3238 };
3239 drv_event_callback(rx.local, rx.sdata, &event);
3240 }
3241
3249 ieee80211_rx_handlers(&rx, &frames); 3242 ieee80211_rx_handlers(&rx, &frames);
3250} 3243}
3251 3244
3252/* main receive path */ 3245/* main receive path */
3253 3246
3254static bool prepare_for_handlers(struct ieee80211_rx_data *rx, 3247static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
3255 struct ieee80211_hdr *hdr)
3256{ 3248{
3257 struct ieee80211_sub_if_data *sdata = rx->sdata; 3249 struct ieee80211_sub_if_data *sdata = rx->sdata;
3258 struct sk_buff *skb = rx->skb; 3250 struct sk_buff *skb = rx->skb;
3251 struct ieee80211_hdr *hdr = (void *)skb->data;
3259 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 3252 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
3260 u8 *bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type); 3253 u8 *bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type);
3261 int multicast = is_multicast_ether_addr(hdr->addr1); 3254 int multicast = is_multicast_ether_addr(hdr->addr1);
@@ -3264,30 +3257,23 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
3264 case NL80211_IFTYPE_STATION: 3257 case NL80211_IFTYPE_STATION:
3265 if (!bssid && !sdata->u.mgd.use_4addr) 3258 if (!bssid && !sdata->u.mgd.use_4addr)
3266 return false; 3259 return false;
3267 if (!multicast && 3260 if (multicast)
3268 !ether_addr_equal(sdata->vif.addr, hdr->addr1)) { 3261 return true;
3269 if (!(sdata->dev->flags & IFF_PROMISC) || 3262 return ether_addr_equal(sdata->vif.addr, hdr->addr1);
3270 sdata->u.mgd.use_4addr)
3271 return false;
3272 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
3273 }
3274 break;
3275 case NL80211_IFTYPE_ADHOC: 3263 case NL80211_IFTYPE_ADHOC:
3276 if (!bssid) 3264 if (!bssid)
3277 return false; 3265 return false;
3278 if (ether_addr_equal(sdata->vif.addr, hdr->addr2) || 3266 if (ether_addr_equal(sdata->vif.addr, hdr->addr2) ||
3279 ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2)) 3267 ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2))
3280 return false; 3268 return false;
3281 if (ieee80211_is_beacon(hdr->frame_control)) { 3269 if (ieee80211_is_beacon(hdr->frame_control))
3282 return true; 3270 return true;
3283 } else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) { 3271 if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid))
3284 return false; 3272 return false;
3285 } else if (!multicast && 3273 if (!multicast &&
3286 !ether_addr_equal(sdata->vif.addr, hdr->addr1)) { 3274 !ether_addr_equal(sdata->vif.addr, hdr->addr1))
3287 if (!(sdata->dev->flags & IFF_PROMISC)) 3275 return false;
3288 return false; 3276 if (!rx->sta) {
3289 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
3290 } else if (!rx->sta) {
3291 int rate_idx; 3277 int rate_idx;
3292 if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT)) 3278 if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT))
3293 rate_idx = 0; /* TODO: HT/VHT rates */ 3279 rate_idx = 0; /* TODO: HT/VHT rates */
@@ -3296,25 +3282,18 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
3296 ieee80211_ibss_rx_no_sta(sdata, bssid, hdr->addr2, 3282 ieee80211_ibss_rx_no_sta(sdata, bssid, hdr->addr2,
3297 BIT(rate_idx)); 3283 BIT(rate_idx));
3298 } 3284 }
3299 break; 3285 return true;
3300 case NL80211_IFTYPE_OCB: 3286 case NL80211_IFTYPE_OCB:
3301 if (!bssid) 3287 if (!bssid)
3302 return false; 3288 return false;
3303 if (ieee80211_is_beacon(hdr->frame_control)) { 3289 if (ieee80211_is_beacon(hdr->frame_control))
3304 return false; 3290 return false;
3305 } else if (!is_broadcast_ether_addr(bssid)) { 3291 if (!is_broadcast_ether_addr(bssid))
3306 ocb_dbg(sdata, "BSSID mismatch in OCB mode!\n");
3307 return false; 3292 return false;
3308 } else if (!multicast && 3293 if (!multicast &&
3309 !ether_addr_equal(sdata->dev->dev_addr, 3294 !ether_addr_equal(sdata->dev->dev_addr, hdr->addr1))
3310 hdr->addr1)) { 3295 return false;
3311 /* if we are in promisc mode we also accept 3296 if (!rx->sta) {
3312 * packets not destined for us
3313 */
3314 if (!(sdata->dev->flags & IFF_PROMISC))
3315 return false;
3316 rx->flags &= ~IEEE80211_RX_RA_MATCH;
3317 } else if (!rx->sta) {
3318 int rate_idx; 3297 int rate_idx;
3319 if (status->flag & RX_FLAG_HT) 3298 if (status->flag & RX_FLAG_HT)
3320 rate_idx = 0; /* TODO: HT rates */ 3299 rate_idx = 0; /* TODO: HT rates */
@@ -3323,22 +3302,17 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
3323 ieee80211_ocb_rx_no_sta(sdata, bssid, hdr->addr2, 3302 ieee80211_ocb_rx_no_sta(sdata, bssid, hdr->addr2,
3324 BIT(rate_idx)); 3303 BIT(rate_idx));
3325 } 3304 }
3326 break; 3305 return true;
3327 case NL80211_IFTYPE_MESH_POINT: 3306 case NL80211_IFTYPE_MESH_POINT:
3328 if (!multicast && 3307 if (multicast)
3329 !ether_addr_equal(sdata->vif.addr, hdr->addr1)) { 3308 return true;
3330 if (!(sdata->dev->flags & IFF_PROMISC)) 3309 return ether_addr_equal(sdata->vif.addr, hdr->addr1);
3331 return false;
3332
3333 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
3334 }
3335 break;
3336 case NL80211_IFTYPE_AP_VLAN: 3310 case NL80211_IFTYPE_AP_VLAN:
3337 case NL80211_IFTYPE_AP: 3311 case NL80211_IFTYPE_AP:
3338 if (!bssid) { 3312 if (!bssid)
3339 if (!ether_addr_equal(sdata->vif.addr, hdr->addr1)) 3313 return ether_addr_equal(sdata->vif.addr, hdr->addr1);
3340 return false; 3314
3341 } else if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) { 3315 if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) {
3342 /* 3316 /*
3343 * Accept public action frames even when the 3317 * Accept public action frames even when the
3344 * BSSID doesn't match, this is used for P2P 3318 * BSSID doesn't match, this is used for P2P
@@ -3350,10 +3324,10 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
3350 return false; 3324 return false;
3351 if (ieee80211_is_public_action(hdr, skb->len)) 3325 if (ieee80211_is_public_action(hdr, skb->len))
3352 return true; 3326 return true;
3353 if (!ieee80211_is_beacon(hdr->frame_control)) 3327 return ieee80211_is_beacon(hdr->frame_control);
3354 return false; 3328 }
3355 status->rx_flags &= ~IEEE80211_RX_RA_MATCH; 3329
3356 } else if (!ieee80211_has_tods(hdr->frame_control)) { 3330 if (!ieee80211_has_tods(hdr->frame_control)) {
3357 /* ignore data frames to TDLS-peers */ 3331 /* ignore data frames to TDLS-peers */
3358 if (ieee80211_is_data(hdr->frame_control)) 3332 if (ieee80211_is_data(hdr->frame_control))
3359 return false; 3333 return false;
@@ -3362,30 +3336,22 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
3362 !ether_addr_equal(bssid, hdr->addr1)) 3336 !ether_addr_equal(bssid, hdr->addr1))
3363 return false; 3337 return false;
3364 } 3338 }
3365 break; 3339 return true;
3366 case NL80211_IFTYPE_WDS: 3340 case NL80211_IFTYPE_WDS:
3367 if (bssid || !ieee80211_is_data(hdr->frame_control)) 3341 if (bssid || !ieee80211_is_data(hdr->frame_control))
3368 return false; 3342 return false;
3369 if (!ether_addr_equal(sdata->u.wds.remote_addr, hdr->addr2)) 3343 return ether_addr_equal(sdata->u.wds.remote_addr, hdr->addr2);
3370 return false;
3371 break;
3372 case NL80211_IFTYPE_P2P_DEVICE: 3344 case NL80211_IFTYPE_P2P_DEVICE:
3373 if (!ieee80211_is_public_action(hdr, skb->len) && 3345 return ieee80211_is_public_action(hdr, skb->len) ||
3374 !ieee80211_is_probe_req(hdr->frame_control) && 3346 ieee80211_is_probe_req(hdr->frame_control) ||
3375 !ieee80211_is_probe_resp(hdr->frame_control) && 3347 ieee80211_is_probe_resp(hdr->frame_control) ||
3376 !ieee80211_is_beacon(hdr->frame_control)) 3348 ieee80211_is_beacon(hdr->frame_control);
3377 return false;
3378 if (!ether_addr_equal(sdata->vif.addr, hdr->addr1) &&
3379 !multicast)
3380 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
3381 break;
3382 default: 3349 default:
3383 /* should never get here */
3384 WARN_ON_ONCE(1);
3385 break; 3350 break;
3386 } 3351 }
3387 3352
3388 return true; 3353 WARN_ON_ONCE(1);
3354 return false;
3389} 3355}
3390 3356
3391/* 3357/*
@@ -3399,13 +3365,10 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
3399{ 3365{
3400 struct ieee80211_local *local = rx->local; 3366 struct ieee80211_local *local = rx->local;
3401 struct ieee80211_sub_if_data *sdata = rx->sdata; 3367 struct ieee80211_sub_if_data *sdata = rx->sdata;
3402 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
3403 struct ieee80211_hdr *hdr = (void *)skb->data;
3404 3368
3405 rx->skb = skb; 3369 rx->skb = skb;
3406 status->rx_flags |= IEEE80211_RX_RA_MATCH;
3407 3370
3408 if (!prepare_for_handlers(rx, hdr)) 3371 if (!ieee80211_accept_frame(rx))
3409 return false; 3372 return false;
3410 3373
3411 if (!consume) { 3374 if (!consume) {
@@ -3448,7 +3411,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
3448 rx.local = local; 3411 rx.local = local;
3449 3412
3450 if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc)) 3413 if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc))
3451 local->dot11ReceivedFragmentCount++; 3414 I802_DEBUG_INC(local->dot11ReceivedFragmentCount);
3452 3415
3453 if (ieee80211_is_mgmt(fc)) { 3416 if (ieee80211_is_mgmt(fc)) {
3454 /* drop frame if too short for header */ 3417 /* drop frame if too short for header */
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 7bb6a9383f58..11d0901ebb7b 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -6,7 +6,7 @@
6 * Copyright 2005, Devicescape Software, Inc. 6 * Copyright 2005, Devicescape Software, Inc.
7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net> 8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
9 * Copyright 2013-2014 Intel Mobile Communications GmbH 9 * Copyright 2013-2015 Intel Mobile Communications GmbH
10 * 10 *
11 * This program is free software; you can redistribute it and/or modify 11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as 12 * it under the terms of the GNU General Public License version 2 as
@@ -69,10 +69,11 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
69 int clen, srlen; 69 int clen, srlen;
70 enum nl80211_bss_scan_width scan_width; 70 enum nl80211_bss_scan_width scan_width;
71 s32 signal = 0; 71 s32 signal = 0;
72 bool signal_valid;
72 73
73 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) 74 if (ieee80211_hw_check(&local->hw, SIGNAL_DBM))
74 signal = rx_status->signal * 100; 75 signal = rx_status->signal * 100;
75 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) 76 else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC))
76 signal = (rx_status->signal * 100) / local->hw.max_signal; 77 signal = (rx_status->signal * 100) / local->hw.max_signal;
77 78
78 scan_width = NL80211_BSS_CHAN_WIDTH_20; 79 scan_width = NL80211_BSS_CHAN_WIDTH_20;
@@ -86,6 +87,11 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
86 GFP_ATOMIC); 87 GFP_ATOMIC);
87 if (!cbss) 88 if (!cbss)
88 return NULL; 89 return NULL;
90 /* In case the signal is invalid update the status */
91 signal_valid = abs(channel->center_freq - cbss->channel->center_freq)
92 <= local->hw.wiphy->max_adj_channel_rssi_comp;
93 if (!signal_valid)
94 rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL;
89 95
90 bss = (void *)cbss->priv; 96 bss = (void *)cbss->priv;
91 97
@@ -257,7 +263,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
257 if (test_bit(SCAN_HW_CANCELLED, &local->scanning)) 263 if (test_bit(SCAN_HW_CANCELLED, &local->scanning))
258 return false; 264 return false;
259 265
260 if (local->hw.flags & IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS) { 266 if (ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS)) {
261 for (i = 0; i < req->n_channels; i++) { 267 for (i = 0; i < req->n_channels; i++) {
262 local->hw_scan_req->req.channels[i] = req->channels[i]; 268 local->hw_scan_req->req.channels[i] = req->channels[i];
263 bands_used |= BIT(req->channels[i]->band); 269 bands_used |= BIT(req->channels[i]->band);
@@ -326,7 +332,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
326 return; 332 return;
327 333
328 if (hw_scan && !aborted && 334 if (hw_scan && !aborted &&
329 !(local->hw.flags & IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS) && 335 !ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS) &&
330 ieee80211_prep_hw_scan(local)) { 336 ieee80211_prep_hw_scan(local)) {
331 int rc; 337 int rc;
332 338
@@ -520,7 +526,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
520 526
521 local->hw_scan_ies_bufsize = local->scan_ies_len + req->ie_len; 527 local->hw_scan_ies_bufsize = local->scan_ies_len + req->ie_len;
522 528
523 if (local->hw.flags & IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS) { 529 if (ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS)) {
524 int i, n_bands = 0; 530 int i, n_bands = 0;
525 u8 bands_counted = 0; 531 u8 bands_counted = 0;
526 532
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 2880f2ae99ab..666ddac3c87c 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -71,6 +71,7 @@ static const struct rhashtable_params sta_rht_params = {
71 .key_offset = offsetof(struct sta_info, sta.addr), 71 .key_offset = offsetof(struct sta_info, sta.addr),
72 .key_len = ETH_ALEN, 72 .key_len = ETH_ALEN,
73 .hashfn = sta_addr_hash, 73 .hashfn = sta_addr_hash,
74 .max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE,
74}; 75};
75 76
76/* Caller must hold local->sta_mtx */ 77/* Caller must hold local->sta_mtx */
@@ -281,12 +282,12 @@ static void sta_deliver_ps_frames(struct work_struct *wk)
281static int sta_prepare_rate_control(struct ieee80211_local *local, 282static int sta_prepare_rate_control(struct ieee80211_local *local,
282 struct sta_info *sta, gfp_t gfp) 283 struct sta_info *sta, gfp_t gfp)
283{ 284{
284 if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) 285 if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL))
285 return 0; 286 return 0;
286 287
287 sta->rate_ctrl = local->rate_ctrl; 288 sta->rate_ctrl = local->rate_ctrl;
288 sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl, 289 sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl,
289 &sta->sta, gfp); 290 sta, gfp);
290 if (!sta->rate_ctrl_priv) 291 if (!sta->rate_ctrl_priv)
291 return -ENOMEM; 292 return -ENOMEM;
292 293
@@ -312,6 +313,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
312 INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); 313 INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
313 mutex_init(&sta->ampdu_mlme.mtx); 314 mutex_init(&sta->ampdu_mlme.mtx);
314#ifdef CONFIG_MAC80211_MESH 315#ifdef CONFIG_MAC80211_MESH
316 spin_lock_init(&sta->plink_lock);
315 if (ieee80211_vif_is_mesh(&sdata->vif) && 317 if (ieee80211_vif_is_mesh(&sdata->vif) &&
316 !sdata->u.mesh.user_mpm) 318 !sdata->u.mesh.user_mpm)
317 init_timer(&sta->plink_timer); 319 init_timer(&sta->plink_timer);
@@ -641,7 +643,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending)
641 } 643 }
642 644
643 /* No need to do anything if the driver does all */ 645 /* No need to do anything if the driver does all */
644 if (local->hw.flags & IEEE80211_HW_AP_LINK_PS) 646 if (ieee80211_hw_check(&local->hw, AP_LINK_PS))
645 return; 647 return;
646 648
647 if (sta->dead) 649 if (sta->dead)
@@ -1146,7 +1148,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
1146 sta->driver_buffered_tids = 0; 1148 sta->driver_buffered_tids = 0;
1147 sta->txq_buffered_tids = 0; 1149 sta->txq_buffered_tids = 0;
1148 1150
1149 if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS)) 1151 if (!ieee80211_hw_check(&local->hw, AP_LINK_PS))
1150 drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta); 1152 drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta);
1151 1153
1152 if (sta->sta.txq[0]) { 1154 if (sta->sta.txq[0]) {
@@ -1217,6 +1219,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
1217 ps_dbg(sdata, 1219 ps_dbg(sdata,
1218 "STA %pM aid %d sending %d filtered/%d PS frames since STA not sleeping anymore\n", 1220 "STA %pM aid %d sending %d filtered/%d PS frames since STA not sleeping anymore\n",
1219 sta->sta.addr, sta->sta.aid, filtered, buffered); 1221 sta->sta.addr, sta->sta.aid, filtered, buffered);
1222
1223 ieee80211_check_fast_xmit(sta);
1220} 1224}
1221 1225
1222static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, 1226static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata,
@@ -1615,6 +1619,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
1615 1619
1616 if (block) { 1620 if (block) {
1617 set_sta_flag(sta, WLAN_STA_PS_DRIVER); 1621 set_sta_flag(sta, WLAN_STA_PS_DRIVER);
1622 ieee80211_clear_fast_xmit(sta);
1618 return; 1623 return;
1619 } 1624 }
1620 1625
@@ -1632,6 +1637,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
1632 ieee80211_queue_work(hw, &sta->drv_deliver_wk); 1637 ieee80211_queue_work(hw, &sta->drv_deliver_wk);
1633 } else { 1638 } else {
1634 clear_sta_flag(sta, WLAN_STA_PS_DRIVER); 1639 clear_sta_flag(sta, WLAN_STA_PS_DRIVER);
1640 ieee80211_check_fast_xmit(sta);
1635 } 1641 }
1636} 1642}
1637EXPORT_SYMBOL(ieee80211_sta_block_awake); 1643EXPORT_SYMBOL(ieee80211_sta_block_awake);
@@ -1736,6 +1742,7 @@ int sta_info_move_state(struct sta_info *sta,
1736 !sta->sdata->u.vlan.sta)) 1742 !sta->sdata->u.vlan.sta))
1737 atomic_dec(&sta->sdata->bss->num_mcast_sta); 1743 atomic_dec(&sta->sdata->bss->num_mcast_sta);
1738 clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); 1744 clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
1745 ieee80211_clear_fast_xmit(sta);
1739 } 1746 }
1740 break; 1747 break;
1741 case IEEE80211_STA_AUTHORIZED: 1748 case IEEE80211_STA_AUTHORIZED:
@@ -1745,6 +1752,7 @@ int sta_info_move_state(struct sta_info *sta,
1745 !sta->sdata->u.vlan.sta)) 1752 !sta->sdata->u.vlan.sta))
1746 atomic_inc(&sta->sdata->bss->num_mcast_sta); 1753 atomic_inc(&sta->sdata->bss->num_mcast_sta);
1747 set_bit(WLAN_STA_AUTHORIZED, &sta->_flags); 1754 set_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
1755 ieee80211_check_fast_xmit(sta);
1748 } 1756 }
1749 break; 1757 break;
1750 default: 1758 default:
@@ -1871,8 +1879,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
1871 sinfo->rx_beacon_signal_avg = ieee80211_ave_rssi(&sdata->vif); 1879 sinfo->rx_beacon_signal_avg = ieee80211_ave_rssi(&sdata->vif);
1872 } 1880 }
1873 1881
1874 if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || 1882 if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) ||
1875 (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { 1883 ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) {
1876 if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) { 1884 if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) {
1877 sinfo->signal = (s8)sta->last_signal; 1885 sinfo->signal = (s8)sta->last_signal;
1878 sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL); 1886 sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
@@ -1924,7 +1932,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
1924 1932
1925 if (!(tidstats->filled & 1933 if (!(tidstats->filled &
1926 BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) && 1934 BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) &&
1927 local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { 1935 ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
1928 tidstats->filled |= 1936 tidstats->filled |=
1929 BIT(NL80211_TID_STATS_TX_MSDU_RETRIES); 1937 BIT(NL80211_TID_STATS_TX_MSDU_RETRIES);
1930 tidstats->tx_msdu_retries = sta->tx_msdu_retries[i]; 1938 tidstats->tx_msdu_retries = sta->tx_msdu_retries[i];
@@ -1932,7 +1940,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
1932 1940
1933 if (!(tidstats->filled & 1941 if (!(tidstats->filled &
1934 BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) && 1942 BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) &&
1935 local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { 1943 ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
1936 tidstats->filled |= 1944 tidstats->filled |=
1937 BIT(NL80211_TID_STATS_TX_MSDU_FAILED); 1945 BIT(NL80211_TID_STATS_TX_MSDU_FAILED);
1938 tidstats->tx_msdu_failed = sta->tx_msdu_failed[i]; 1946 tidstats->tx_msdu_failed = sta->tx_msdu_failed[i];
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5c164fb3f6c5..226f8ca47ad6 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -241,6 +241,34 @@ struct sta_ampdu_mlme {
241/* Value to indicate no TID reservation */ 241/* Value to indicate no TID reservation */
242#define IEEE80211_TID_UNRESERVED 0xff 242#define IEEE80211_TID_UNRESERVED 0xff
243 243
244#define IEEE80211_FAST_XMIT_MAX_IV 18
245
246/**
247 * struct ieee80211_fast_tx - TX fastpath information
248 * @key: key to use for hw crypto
249 * @hdr: the 802.11 header to put with the frame
250 * @hdr_len: actual 802.11 header length
251 * @sa_offs: offset of the SA
252 * @da_offs: offset of the DA
253 * @pn_offs: offset where to put PN for crypto (or 0 if not needed)
254 * @band: band this will be transmitted on, for tx_info
255 * @rcu_head: RCU head to free this struct
256 *
257 * This struct is small enough so that the common case (maximum crypto
258 * header length of 8 like for CCMP/GCMP) fits into a single 64-byte
259 * cache line.
260 */
261struct ieee80211_fast_tx {
262 struct ieee80211_key *key;
263 u8 hdr_len;
264 u8 sa_offs, da_offs, pn_offs;
265 u8 band;
266 u8 hdr[30 + 2 + IEEE80211_FAST_XMIT_MAX_IV +
267 sizeof(rfc1042_header)];
268
269 struct rcu_head rcu_head;
270};
271
244/** 272/**
245 * struct sta_info - STA information 273 * struct sta_info - STA information
246 * 274 *
@@ -257,6 +285,8 @@ struct sta_ampdu_mlme {
257 * @gtk: group keys negotiated with this station, if any 285 * @gtk: group keys negotiated with this station, if any
258 * @gtk_idx: last installed group key index 286 * @gtk_idx: last installed group key index
259 * @rate_ctrl: rate control algorithm reference 287 * @rate_ctrl: rate control algorithm reference
288 * @rate_ctrl_lock: spinlock used to protect rate control data
289 * (data inside the algorithm, so serializes calls there)
260 * @rate_ctrl_priv: rate control private per-STA pointer 290 * @rate_ctrl_priv: rate control private per-STA pointer
261 * @last_tx_rate: rate used for last transmit, to report to userspace as 291 * @last_tx_rate: rate used for last transmit, to report to userspace as
262 * "the" transmit rate 292 * "the" transmit rate
@@ -295,10 +325,10 @@ struct sta_ampdu_mlme {
295 * @fail_avg: moving percentage of failed MSDUs 325 * @fail_avg: moving percentage of failed MSDUs
296 * @tx_packets: number of RX/TX MSDUs 326 * @tx_packets: number of RX/TX MSDUs
297 * @tx_bytes: number of bytes transmitted to this STA 327 * @tx_bytes: number of bytes transmitted to this STA
298 * @tx_fragments: number of transmitted MPDUs
299 * @tid_seq: per-TID sequence numbers for sending to this STA 328 * @tid_seq: per-TID sequence numbers for sending to this STA
300 * @ampdu_mlme: A-MPDU state machine state 329 * @ampdu_mlme: A-MPDU state machine state
301 * @timer_to_tid: identity mapping to ID timers 330 * @timer_to_tid: identity mapping to ID timers
331 * @plink_lock: serialize access to plink fields
302 * @llid: Local link ID 332 * @llid: Local link ID
303 * @plid: Peer link ID 333 * @plid: Peer link ID
304 * @reason: Cancel reason on PLINK_HOLDING state 334 * @reason: Cancel reason on PLINK_HOLDING state
@@ -338,6 +368,9 @@ struct sta_ampdu_mlme {
338 * using IEEE80211_NUM_TID entry for non-QoS frames 368 * using IEEE80211_NUM_TID entry for non-QoS frames
339 * @rx_msdu: MSDUs received from this station, using IEEE80211_NUM_TID 369 * @rx_msdu: MSDUs received from this station, using IEEE80211_NUM_TID
340 * entry for non-QoS frames 370 * entry for non-QoS frames
371 * @fast_tx: TX fastpath information
372 * @processed_beacon: set to true after peer rates and capabilities are
373 * processed
341 */ 374 */
342struct sta_info { 375struct sta_info {
343 /* General information, mostly static */ 376 /* General information, mostly static */
@@ -352,8 +385,11 @@ struct sta_info {
352 u8 ptk_idx; 385 u8 ptk_idx;
353 struct rate_control_ref *rate_ctrl; 386 struct rate_control_ref *rate_ctrl;
354 void *rate_ctrl_priv; 387 void *rate_ctrl_priv;
388 spinlock_t rate_ctrl_lock;
355 spinlock_t lock; 389 spinlock_t lock;
356 390
391 struct ieee80211_fast_tx __rcu *fast_tx;
392
357 struct work_struct drv_deliver_wk; 393 struct work_struct drv_deliver_wk;
358 394
359 u16 listen_interval; 395 u16 listen_interval;
@@ -400,7 +436,6 @@ struct sta_info {
400 unsigned int fail_avg; 436 unsigned int fail_avg;
401 437
402 /* Updated from TX path only, no locking requirements */ 438 /* Updated from TX path only, no locking requirements */
403 u32 tx_fragments;
404 u64 tx_packets[IEEE80211_NUM_ACS]; 439 u64 tx_packets[IEEE80211_NUM_ACS];
405 u64 tx_bytes[IEEE80211_NUM_ACS]; 440 u64 tx_bytes[IEEE80211_NUM_ACS];
406 struct ieee80211_tx_rate last_tx_rate; 441 struct ieee80211_tx_rate last_tx_rate;
@@ -422,9 +457,10 @@ struct sta_info {
422 457
423#ifdef CONFIG_MAC80211_MESH 458#ifdef CONFIG_MAC80211_MESH
424 /* 459 /*
425 * Mesh peer link attributes 460 * Mesh peer link attributes, protected by plink_lock.
426 * TODO: move to a sub-structure that is referenced with pointer? 461 * TODO: move to a sub-structure that is referenced with pointer?
427 */ 462 */
463 spinlock_t plink_lock;
428 u16 llid; 464 u16 llid;
429 u16 plid; 465 u16 plid;
430 u16 reason; 466 u16 reason;
@@ -432,12 +468,14 @@ struct sta_info {
432 enum nl80211_plink_state plink_state; 468 enum nl80211_plink_state plink_state;
433 u32 plink_timeout; 469 u32 plink_timeout;
434 struct timer_list plink_timer; 470 struct timer_list plink_timer;
471
435 s64 t_offset; 472 s64 t_offset;
436 s64 t_offset_setpoint; 473 s64 t_offset_setpoint;
437 /* mesh power save */ 474 /* mesh power save */
438 enum nl80211_mesh_power_mode local_pm; 475 enum nl80211_mesh_power_mode local_pm;
439 enum nl80211_mesh_power_mode peer_pm; 476 enum nl80211_mesh_power_mode peer_pm;
440 enum nl80211_mesh_power_mode nonpeer_pm; 477 enum nl80211_mesh_power_mode nonpeer_pm;
478 bool processed_beacon;
441#endif 479#endif
442 480
443#ifdef CONFIG_MAC80211_DEBUGFS 481#ifdef CONFIG_MAC80211_DEBUGFS
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 005fdbe39a8b..45628f37c083 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -181,7 +181,7 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
181 struct ieee80211_local *local = sta->local; 181 struct ieee80211_local *local = sta->local;
182 struct ieee80211_sub_if_data *sdata = sta->sdata; 182 struct ieee80211_sub_if_data *sdata = sta->sdata;
183 183
184 if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) 184 if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
185 sta->last_rx = jiffies; 185 sta->last_rx = jiffies;
186 186
187 if (ieee80211_is_data_qos(mgmt->frame_control)) { 187 if (ieee80211_is_data_qos(mgmt->frame_control)) {
@@ -414,8 +414,7 @@ static void ieee80211_tdls_td_tx_handle(struct ieee80211_local *local,
414 414
415 if (is_teardown) { 415 if (is_teardown) {
416 /* This mechanism relies on being able to get ACKs */ 416 /* This mechanism relies on being able to get ACKs */
417 WARN_ON(!(local->hw.flags & 417 WARN_ON(!ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS));
418 IEEE80211_HW_REPORTS_TX_ACK_STATUS));
419 418
420 /* Check if peer has ACKed */ 419 /* Check if peer has ACKed */
421 if (flags & IEEE80211_TX_STAT_ACK) { 420 if (flags & IEEE80211_TX_STAT_ACK) {
@@ -429,6 +428,74 @@ static void ieee80211_tdls_td_tx_handle(struct ieee80211_local *local,
429 } 428 }
430} 429}
431 430
431static struct ieee80211_sub_if_data *
432ieee80211_sdata_from_skb(struct ieee80211_local *local, struct sk_buff *skb)
433{
434 struct ieee80211_sub_if_data *sdata;
435
436 if (skb->dev) {
437 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
438 if (!sdata->dev)
439 continue;
440
441 if (skb->dev == sdata->dev)
442 return sdata;
443 }
444
445 return NULL;
446 }
447
448 return rcu_dereference(local->p2p_sdata);
449}
450
451static void ieee80211_report_ack_skb(struct ieee80211_local *local,
452 struct ieee80211_tx_info *info,
453 bool acked, bool dropped)
454{
455 struct sk_buff *skb;
456 unsigned long flags;
457
458 spin_lock_irqsave(&local->ack_status_lock, flags);
459 skb = idr_find(&local->ack_status_frames, info->ack_frame_id);
460 if (skb)
461 idr_remove(&local->ack_status_frames, info->ack_frame_id);
462 spin_unlock_irqrestore(&local->ack_status_lock, flags);
463
464 if (!skb)
465 return;
466
467 if (dropped) {
468 dev_kfree_skb_any(skb);
469 return;
470 }
471
472 if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) {
473 u64 cookie = IEEE80211_SKB_CB(skb)->ack.cookie;
474 struct ieee80211_sub_if_data *sdata;
475 struct ieee80211_hdr *hdr = (void *)skb->data;
476
477 rcu_read_lock();
478 sdata = ieee80211_sdata_from_skb(local, skb);
479 if (sdata) {
480 if (ieee80211_is_nullfunc(hdr->frame_control) ||
481 ieee80211_is_qos_nullfunc(hdr->frame_control))
482 cfg80211_probe_status(sdata->dev, hdr->addr1,
483 cookie, acked,
484 GFP_ATOMIC);
485 else
486 cfg80211_mgmt_tx_status(&sdata->wdev, cookie,
487 skb->data, skb->len,
488 acked, GFP_ATOMIC);
489 }
490 rcu_read_unlock();
491
492 dev_kfree_skb_any(skb);
493 } else {
494 /* consumes skb */
495 skb_complete_wifi_ack(skb, acked);
496 }
497}
498
432static void ieee80211_report_used_skb(struct ieee80211_local *local, 499static void ieee80211_report_used_skb(struct ieee80211_local *local,
433 struct sk_buff *skb, bool dropped) 500 struct sk_buff *skb, bool dropped)
434{ 501{
@@ -439,28 +506,12 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
439 if (dropped) 506 if (dropped)
440 acked = false; 507 acked = false;
441 508
442 if (info->flags & (IEEE80211_TX_INTFL_NL80211_FRAME_TX | 509 if (info->flags & IEEE80211_TX_INTFL_MLME_CONN_TX) {
443 IEEE80211_TX_INTFL_MLME_CONN_TX)) { 510 struct ieee80211_sub_if_data *sdata;
444 struct ieee80211_sub_if_data *sdata = NULL;
445 struct ieee80211_sub_if_data *iter_sdata;
446 u64 cookie = (unsigned long)skb;
447 511
448 rcu_read_lock(); 512 rcu_read_lock();
449 513
450 if (skb->dev) { 514 sdata = ieee80211_sdata_from_skb(local, skb);
451 list_for_each_entry_rcu(iter_sdata, &local->interfaces,
452 list) {
453 if (!iter_sdata->dev)
454 continue;
455
456 if (skb->dev == iter_sdata->dev) {
457 sdata = iter_sdata;
458 break;
459 }
460 }
461 } else {
462 sdata = rcu_dereference(local->p2p_sdata);
463 }
464 515
465 if (!sdata) { 516 if (!sdata) {
466 skb->dev = NULL; 517 skb->dev = NULL;
@@ -478,38 +529,14 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
478 ieee80211_mgd_conn_tx_status(sdata, 529 ieee80211_mgd_conn_tx_status(sdata,
479 hdr->frame_control, 530 hdr->frame_control,
480 acked); 531 acked);
481 } else if (ieee80211_is_nullfunc(hdr->frame_control) ||
482 ieee80211_is_qos_nullfunc(hdr->frame_control)) {
483 cfg80211_probe_status(sdata->dev, hdr->addr1,
484 cookie, acked, GFP_ATOMIC);
485 } else { 532 } else {
486 cfg80211_mgmt_tx_status(&sdata->wdev, cookie, skb->data, 533 /* we assign ack frame ID for the others */
487 skb->len, acked, GFP_ATOMIC); 534 WARN_ON(1);
488 } 535 }
489 536
490 rcu_read_unlock(); 537 rcu_read_unlock();
491 } 538 } else if (info->ack_frame_id) {
492 539 ieee80211_report_ack_skb(local, info, acked, dropped);
493 if (unlikely(info->ack_frame_id)) {
494 struct sk_buff *ack_skb;
495 unsigned long flags;
496
497 spin_lock_irqsave(&local->ack_status_lock, flags);
498 ack_skb = idr_find(&local->ack_status_frames,
499 info->ack_frame_id);
500 if (ack_skb)
501 idr_remove(&local->ack_status_frames,
502 info->ack_frame_id);
503 spin_unlock_irqrestore(&local->ack_status_lock, flags);
504
505 if (ack_skb) {
506 if (!dropped) {
507 /* consumes ack_skb */
508 skb_complete_wifi_ack(ack_skb, acked);
509 } else {
510 dev_kfree_skb_any(ack_skb);
511 }
512 }
513 } 540 }
514} 541}
515 542
@@ -631,15 +658,15 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw,
631 } 658 }
632 659
633 if (acked || noack_success) { 660 if (acked || noack_success) {
634 local->dot11TransmittedFrameCount++; 661 I802_DEBUG_INC(local->dot11TransmittedFrameCount);
635 if (!pubsta) 662 if (!pubsta)
636 local->dot11MulticastTransmittedFrameCount++; 663 I802_DEBUG_INC(local->dot11MulticastTransmittedFrameCount);
637 if (retry_count > 0) 664 if (retry_count > 0)
638 local->dot11RetryCount++; 665 I802_DEBUG_INC(local->dot11RetryCount);
639 if (retry_count > 1) 666 if (retry_count > 1)
640 local->dot11MultipleRetryCount++; 667 I802_DEBUG_INC(local->dot11MultipleRetryCount);
641 } else { 668 } else {
642 local->dot11FailedCount++; 669 I802_DEBUG_INC(local->dot11FailedCount);
643 } 670 }
644} 671}
645EXPORT_SYMBOL(ieee80211_tx_status_noskb); 672EXPORT_SYMBOL(ieee80211_tx_status_noskb);
@@ -703,7 +730,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
703 ieee80211_get_qos_ctl(hdr), 730 ieee80211_get_qos_ctl(hdr),
704 sta, true, acked); 731 sta, true, acked);
705 732
706 if ((local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) && 733 if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) &&
707 (ieee80211_is_data(hdr->frame_control)) && 734 (ieee80211_is_data(hdr->frame_control)) &&
708 (rates_idx != -1)) 735 (rates_idx != -1))
709 sta->last_tx_rate = info->status.rates[rates_idx]; 736 sta->last_tx_rate = info->status.rates[rates_idx];
@@ -770,11 +797,11 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
770 ieee80211_frame_acked(sta, skb); 797 ieee80211_frame_acked(sta, skb);
771 798
772 if ((sta->sdata->vif.type == NL80211_IFTYPE_STATION) && 799 if ((sta->sdata->vif.type == NL80211_IFTYPE_STATION) &&
773 (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) 800 ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
774 ieee80211_sta_tx_notify(sta->sdata, (void *) skb->data, 801 ieee80211_sta_tx_notify(sta->sdata, (void *) skb->data,
775 acked, info->status.tx_time); 802 acked, info->status.tx_time);
776 803
777 if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { 804 if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
778 if (info->flags & IEEE80211_TX_STAT_ACK) { 805 if (info->flags & IEEE80211_TX_STAT_ACK) {
779 if (sta->lost_packets) 806 if (sta->lost_packets)
780 sta->lost_packets = 0; 807 sta->lost_packets = 0;
@@ -802,13 +829,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
802 if ((info->flags & IEEE80211_TX_STAT_ACK) || 829 if ((info->flags & IEEE80211_TX_STAT_ACK) ||
803 (info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED)) { 830 (info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED)) {
804 if (ieee80211_is_first_frag(hdr->seq_ctrl)) { 831 if (ieee80211_is_first_frag(hdr->seq_ctrl)) {
805 local->dot11TransmittedFrameCount++; 832 I802_DEBUG_INC(local->dot11TransmittedFrameCount);
806 if (is_multicast_ether_addr(ieee80211_get_DA(hdr))) 833 if (is_multicast_ether_addr(ieee80211_get_DA(hdr)))
807 local->dot11MulticastTransmittedFrameCount++; 834 I802_DEBUG_INC(local->dot11MulticastTransmittedFrameCount);
808 if (retry_count > 0) 835 if (retry_count > 0)
809 local->dot11RetryCount++; 836 I802_DEBUG_INC(local->dot11RetryCount);
810 if (retry_count > 1) 837 if (retry_count > 1)
811 local->dot11MultipleRetryCount++; 838 I802_DEBUG_INC(local->dot11MultipleRetryCount);
812 } 839 }
813 840
814 /* This counter shall be incremented for an acknowledged MPDU 841 /* This counter shall be incremented for an acknowledged MPDU
@@ -818,14 +845,14 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
818 if (!is_multicast_ether_addr(hdr->addr1) || 845 if (!is_multicast_ether_addr(hdr->addr1) ||
819 ieee80211_is_data(fc) || 846 ieee80211_is_data(fc) ||
820 ieee80211_is_mgmt(fc)) 847 ieee80211_is_mgmt(fc))
821 local->dot11TransmittedFragmentCount++; 848 I802_DEBUG_INC(local->dot11TransmittedFragmentCount);
822 } else { 849 } else {
823 if (ieee80211_is_first_frag(hdr->seq_ctrl)) 850 if (ieee80211_is_first_frag(hdr->seq_ctrl))
824 local->dot11FailedCount++; 851 I802_DEBUG_INC(local->dot11FailedCount);
825 } 852 }
826 853
827 if (ieee80211_is_nullfunc(fc) && ieee80211_has_pm(fc) && 854 if (ieee80211_is_nullfunc(fc) && ieee80211_has_pm(fc) &&
828 (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) && 855 ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS) &&
829 !(info->flags & IEEE80211_TX_CTL_INJECTED) && 856 !(info->flags & IEEE80211_TX_CTL_INJECTED) &&
830 local->ps_sdata && !(local->scanning)) { 857 local->ps_sdata && !(local->scanning)) {
831 if (info->flags & IEEE80211_TX_STAT_ACK) { 858 if (info->flags & IEEE80211_TX_STAT_ACK) {
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index fff0d864adfa..ad31b2dab4f5 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -167,23 +167,16 @@ static void ieee80211_tdls_add_bss_coex_ie(struct sk_buff *skb)
167static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata, 167static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata,
168 u16 status_code) 168 u16 status_code)
169{ 169{
170 struct ieee80211_local *local = sdata->local;
171 u16 capab;
172
173 /* The capability will be 0 when sending a failure code */ 170 /* The capability will be 0 when sending a failure code */
174 if (status_code != 0) 171 if (status_code != 0)
175 return 0; 172 return 0;
176 173
177 capab = 0; 174 if (ieee80211_get_sdata_band(sdata) == IEEE80211_BAND_2GHZ) {
178 if (ieee80211_get_sdata_band(sdata) != IEEE80211_BAND_2GHZ) 175 return WLAN_CAPABILITY_SHORT_SLOT_TIME |
179 return capab; 176 WLAN_CAPABILITY_SHORT_PREAMBLE;
180 177 }
181 if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
182 capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
183 if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE))
184 capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
185 178
186 return capab; 179 return 0;
187} 180}
188 181
189static void ieee80211_tdls_add_link_ie(struct ieee80211_sub_if_data *sdata, 182static void ieee80211_tdls_add_link_ie(struct ieee80211_sub_if_data *sdata,
@@ -527,30 +520,19 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
527 520
528 /* if HT support is only added in TDLS, we need an HT-operation IE */ 521 /* if HT support is only added in TDLS, we need an HT-operation IE */
529 if (!ap_sta->sta.ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) { 522 if (!ap_sta->sta.ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) {
530 struct ieee80211_chanctx_conf *chanctx_conf = 523 pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation));
531 rcu_dereference(sdata->vif.chanctx_conf); 524 /* send an empty HT operation IE */
532 if (!WARN_ON(!chanctx_conf)) { 525 ieee80211_ie_build_ht_oper(pos, &sta->sta.ht_cap,
533 pos = skb_put(skb, 2 + 526 &sdata->vif.bss_conf.chandef, 0);
534 sizeof(struct ieee80211_ht_operation));
535 /* send an empty HT operation IE */
536 ieee80211_ie_build_ht_oper(pos, &sta->sta.ht_cap,
537 &chanctx_conf->def, 0);
538 }
539 } 527 }
540 528
541 ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); 529 ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator);
542 530
543 /* only include VHT-operation if not on the 2.4GHz band */ 531 /* only include VHT-operation if not on the 2.4GHz band */
544 if (band != IEEE80211_BAND_2GHZ && !ap_sta->sta.vht_cap.vht_supported && 532 if (band != IEEE80211_BAND_2GHZ && sta->sta.vht_cap.vht_supported) {
545 sta->sta.vht_cap.vht_supported) { 533 pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_operation));
546 struct ieee80211_chanctx_conf *chanctx_conf = 534 ieee80211_ie_build_vht_oper(pos, &sta->sta.vht_cap,
547 rcu_dereference(sdata->vif.chanctx_conf); 535 &sdata->vif.bss_conf.chandef);
548 if (!WARN_ON(!chanctx_conf)) {
549 pos = skb_put(skb, 2 +
550 sizeof(struct ieee80211_vht_operation));
551 ieee80211_ie_build_vht_oper(pos, &sta->sta.vht_cap,
552 &chanctx_conf->def);
553 }
554 } 536 }
555 537
556 rcu_read_unlock(); 538 rcu_read_unlock();
@@ -953,7 +935,7 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev,
953 * packet through the AP. 935 * packet through the AP.
954 */ 936 */
955 if ((action_code == WLAN_TDLS_TEARDOWN) && 937 if ((action_code == WLAN_TDLS_TEARDOWN) &&
956 (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { 938 ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) {
957 bool try_resend; /* Should we keep skb for possible resend */ 939 bool try_resend; /* Should we keep skb for possible resend */
958 940
959 /* If not sending directly to peer - no point in keeping skb */ 941 /* If not sending directly to peer - no point in keeping skb */
@@ -1194,6 +1176,12 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
1194 1176
1195 switch (oper) { 1177 switch (oper) {
1196 case NL80211_TDLS_ENABLE_LINK: 1178 case NL80211_TDLS_ENABLE_LINK:
1179 if (sdata->vif.csa_active) {
1180 tdls_dbg(sdata, "TDLS: disallow link during CSA\n");
1181 ret = -EBUSY;
1182 break;
1183 }
1184
1197 rcu_read_lock(); 1185 rcu_read_lock();
1198 sta = sta_info_get(sdata, peer); 1186 sta = sta_info_get(sdata, peer);
1199 if (!sta) { 1187 if (!sta) {
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 4c2e7690226a..6f14591d8ca9 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -69,6 +69,17 @@
69#define CHANCTX_PR_ARG CHANDEF_PR_ARG, MIN_CHANDEF_PR_ARG, \ 69#define CHANCTX_PR_ARG CHANDEF_PR_ARG, MIN_CHANDEF_PR_ARG, \
70 __entry->rx_chains_static, __entry->rx_chains_dynamic 70 __entry->rx_chains_static, __entry->rx_chains_dynamic
71 71
72#define KEY_ENTRY __field(u32, cipher) \
73 __field(u8, hw_key_idx) \
74 __field(u8, flags) \
75 __field(s8, keyidx)
76#define KEY_ASSIGN(k) __entry->cipher = (k)->cipher; \
77 __entry->flags = (k)->flags; \
78 __entry->keyidx = (k)->keyidx; \
79 __entry->hw_key_idx = (k)->hw_key_idx;
80#define KEY_PR_FMT " cipher:0x%x, flags=%#x, keyidx=%d, hw_key_idx=%d"
81#define KEY_PR_ARG __entry->cipher, __entry->flags, __entry->keyidx, __entry->hw_key_idx
82
72 83
73 84
74/* 85/*
@@ -522,25 +533,19 @@ TRACE_EVENT(drv_set_key,
522 LOCAL_ENTRY 533 LOCAL_ENTRY
523 VIF_ENTRY 534 VIF_ENTRY
524 STA_ENTRY 535 STA_ENTRY
525 __field(u32, cipher) 536 KEY_ENTRY
526 __field(u8, hw_key_idx)
527 __field(u8, flags)
528 __field(s8, keyidx)
529 ), 537 ),
530 538
531 TP_fast_assign( 539 TP_fast_assign(
532 LOCAL_ASSIGN; 540 LOCAL_ASSIGN;
533 VIF_ASSIGN; 541 VIF_ASSIGN;
534 STA_ASSIGN; 542 STA_ASSIGN;
535 __entry->cipher = key->cipher; 543 KEY_ASSIGN(key);
536 __entry->flags = key->flags;
537 __entry->keyidx = key->keyidx;
538 __entry->hw_key_idx = key->hw_key_idx;
539 ), 544 ),
540 545
541 TP_printk( 546 TP_printk(
542 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT, 547 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT KEY_PR_FMT,
543 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG 548 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, KEY_PR_ARG
544 ) 549 )
545); 550);
546 551
@@ -656,28 +661,25 @@ TRACE_EVENT(drv_get_stats,
656 ) 661 )
657); 662);
658 663
659TRACE_EVENT(drv_get_tkip_seq, 664TRACE_EVENT(drv_get_key_seq,
660 TP_PROTO(struct ieee80211_local *local, 665 TP_PROTO(struct ieee80211_local *local,
661 u8 hw_key_idx, u32 *iv32, u16 *iv16), 666 struct ieee80211_key_conf *key),
662 667
663 TP_ARGS(local, hw_key_idx, iv32, iv16), 668 TP_ARGS(local, key),
664 669
665 TP_STRUCT__entry( 670 TP_STRUCT__entry(
666 LOCAL_ENTRY 671 LOCAL_ENTRY
667 __field(u8, hw_key_idx) 672 KEY_ENTRY
668 __field(u32, iv32)
669 __field(u16, iv16)
670 ), 673 ),
671 674
672 TP_fast_assign( 675 TP_fast_assign(
673 LOCAL_ASSIGN; 676 LOCAL_ASSIGN;
674 __entry->hw_key_idx = hw_key_idx; 677 KEY_ASSIGN(key);
675 __entry->iv32 = *iv32;
676 __entry->iv16 = *iv16;
677 ), 678 ),
678 679
679 TP_printk( 680 TP_printk(
680 LOCAL_PR_FMT, LOCAL_PR_ARG 681 LOCAL_PR_FMT KEY_PR_FMT,
682 LOCAL_PR_ARG, KEY_PR_ARG
681 ) 683 )
682); 684);
683 685
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 667111ee6a20..8410bb3bf5e8 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -37,6 +37,16 @@
37 37
38/* misc utils */ 38/* misc utils */
39 39
40static inline void ieee80211_tx_stats(struct net_device *dev, u32 len)
41{
42 struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
43
44 u64_stats_update_begin(&tstats->syncp);
45 tstats->tx_packets++;
46 tstats->tx_bytes += len;
47 u64_stats_update_end(&tstats->syncp);
48}
49
40static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, 50static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
41 struct sk_buff *skb, int group_addr, 51 struct sk_buff *skb, int group_addr,
42 int next_frag_len) 52 int next_frag_len)
@@ -201,11 +211,11 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)
201 struct ieee80211_if_managed *ifmgd; 211 struct ieee80211_if_managed *ifmgd;
202 212
203 /* driver doesn't support power save */ 213 /* driver doesn't support power save */
204 if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) 214 if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS))
205 return TX_CONTINUE; 215 return TX_CONTINUE;
206 216
207 /* hardware does dynamic power save */ 217 /* hardware does dynamic power save */
208 if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) 218 if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
209 return TX_CONTINUE; 219 return TX_CONTINUE;
210 220
211 /* dynamic power save disabled */ 221 /* dynamic power save disabled */
@@ -421,7 +431,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
421 if (ieee80211_is_probe_req(hdr->frame_control)) 431 if (ieee80211_is_probe_req(hdr->frame_control))
422 return TX_CONTINUE; 432 return TX_CONTINUE;
423 433
424 if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) 434 if (ieee80211_hw_check(&tx->local->hw, QUEUE_CONTROL))
425 info->hw_queue = tx->sdata->vif.cab_queue; 435 info->hw_queue = tx->sdata->vif.cab_queue;
426 436
427 /* no stations in PS mode */ 437 /* no stations in PS mode */
@@ -431,7 +441,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
431 info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM; 441 info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM;
432 442
433 /* device releases frame after DTIM beacon */ 443 /* device releases frame after DTIM beacon */
434 if (!(tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING)) 444 if (!ieee80211_hw_check(&tx->local->hw, HOST_BROADCAST_PS_BUFFERING))
435 return TX_CONTINUE; 445 return TX_CONTINUE;
436 446
437 /* buffered in mac80211 */ 447 /* buffered in mac80211 */
@@ -987,7 +997,6 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
987 997
988 skb_queue_walk(&tx->skbs, skb) { 998 skb_queue_walk(&tx->skbs, skb) {
989 ac = skb_get_queue_mapping(skb); 999 ac = skb_get_queue_mapping(skb);
990 tx->sta->tx_fragments++;
991 tx->sta->tx_bytes[ac] += skb->len; 1000 tx->sta->tx_bytes[ac] += skb->len;
992 } 1001 }
993 if (ac >= 0) 1002 if (ac >= 0)
@@ -1176,8 +1185,8 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
1176 1185
1177 if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) && 1186 if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) &&
1178 !ieee80211_is_qos_nullfunc(hdr->frame_control) && 1187 !ieee80211_is_qos_nullfunc(hdr->frame_control) &&
1179 (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) && 1188 ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION) &&
1180 !(local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW)) { 1189 !ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW)) {
1181 struct tid_ampdu_tx *tid_tx; 1190 struct tid_ampdu_tx *tid_tx;
1182 1191
1183 qc = ieee80211_get_qos_ctl(hdr); 1192 qc = ieee80211_get_qos_ctl(hdr);
@@ -1420,7 +1429,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
1420 vif = &sdata->vif; 1429 vif = &sdata->vif;
1421 info->hw_queue = 1430 info->hw_queue =
1422 vif->hw_queue[skb_get_queue_mapping(skb)]; 1431 vif->hw_queue[skb_get_queue_mapping(skb)];
1423 } else if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) { 1432 } else if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) {
1424 dev_kfree_skb(skb); 1433 dev_kfree_skb(skb);
1425 return true; 1434 return true;
1426 } else 1435 } else
@@ -1466,7 +1475,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
1466 CALL_TXH(ieee80211_tx_h_ps_buf); 1475 CALL_TXH(ieee80211_tx_h_ps_buf);
1467 CALL_TXH(ieee80211_tx_h_check_control_port_protocol); 1476 CALL_TXH(ieee80211_tx_h_check_control_port_protocol);
1468 CALL_TXH(ieee80211_tx_h_select_key); 1477 CALL_TXH(ieee80211_tx_h_select_key);
1469 if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) 1478 if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL))
1470 CALL_TXH(ieee80211_tx_h_rate_ctrl); 1479 CALL_TXH(ieee80211_tx_h_rate_ctrl);
1471 1480
1472 if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) { 1481 if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) {
@@ -1481,7 +1490,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
1481 /* handlers after fragment must be aware of tx info fragmentation! */ 1490 /* handlers after fragment must be aware of tx info fragmentation! */
1482 CALL_TXH(ieee80211_tx_h_stats); 1491 CALL_TXH(ieee80211_tx_h_stats);
1483 CALL_TXH(ieee80211_tx_h_encrypt); 1492 CALL_TXH(ieee80211_tx_h_encrypt);
1484 if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) 1493 if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL))
1485 CALL_TXH(ieee80211_tx_h_calculate_duration); 1494 CALL_TXH(ieee80211_tx_h_calculate_duration);
1486#undef CALL_TXH 1495#undef CALL_TXH
1487 1496
@@ -1571,7 +1580,7 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
1571 1580
1572 /* set up hw_queue value early */ 1581 /* set up hw_queue value early */
1573 if (!(info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) || 1582 if (!(info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) ||
1574 !(local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)) 1583 !ieee80211_hw_check(&local->hw, QUEUE_CONTROL))
1575 info->hw_queue = 1584 info->hw_queue =
1576 sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; 1585 sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
1577 1586
@@ -1598,9 +1607,9 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,
1598 } 1607 }
1599 1608
1600 if (skb_cloned(skb) && 1609 if (skb_cloned(skb) &&
1601 (!(local->hw.flags & IEEE80211_HW_SUPPORTS_CLONED_SKBS) || 1610 (!ieee80211_hw_check(&local->hw, SUPPORTS_CLONED_SKBS) ||
1602 !skb_clone_writable(skb, ETH_HLEN) || 1611 !skb_clone_writable(skb, ETH_HLEN) ||
1603 sdata->crypto_tx_tailroom_needed_cnt)) 1612 (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt)))
1604 I802_DEBUG_INC(local->tx_expand_skb_head_cloned); 1613 I802_DEBUG_INC(local->tx_expand_skb_head_cloned);
1605 else if (head_need || tail_need) 1614 else if (head_need || tail_need)
1606 I802_DEBUG_INC(local->tx_expand_skb_head); 1615 I802_DEBUG_INC(local->tx_expand_skb_head);
@@ -2387,12 +2396,455 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
2387 return ERR_PTR(ret); 2396 return ERR_PTR(ret);
2388} 2397}
2389 2398
2399/*
2400 * fast-xmit overview
2401 *
2402 * The core idea of this fast-xmit is to remove per-packet checks by checking
2403 * them out of band. ieee80211_check_fast_xmit() implements the out-of-band
2404 * checks that are needed to get the sta->fast_tx pointer assigned, after which
2405 * much less work can be done per packet. For example, fragmentation must be
2406 * disabled or the fast_tx pointer will not be set. All the conditions are seen
2407 * in the code here.
2408 *
2409 * Once assigned, the fast_tx data structure also caches the per-packet 802.11
2410 * header and other data to aid packet processing in ieee80211_xmit_fast().
2411 *
2412 * The most difficult part of this is that when any of these assumptions
2413 * change, an external trigger (i.e. a call to ieee80211_clear_fast_xmit(),
2414 * ieee80211_check_fast_xmit() or friends) is required to reset the data,
2415 * since the per-packet code no longer checks the conditions. This is reflected
2416 * by the calls to these functions throughout the rest of the code, and must be
2417 * maintained if any of the TX path checks change.
2418 */
2419
2420void ieee80211_check_fast_xmit(struct sta_info *sta)
2421{
2422 struct ieee80211_fast_tx build = {}, *fast_tx = NULL, *old;
2423 struct ieee80211_local *local = sta->local;
2424 struct ieee80211_sub_if_data *sdata = sta->sdata;
2425 struct ieee80211_hdr *hdr = (void *)build.hdr;
2426 struct ieee80211_chanctx_conf *chanctx_conf;
2427 __le16 fc;
2428
2429 if (!ieee80211_hw_check(&local->hw, SUPPORT_FAST_XMIT))
2430 return;
2431
2432 /* Locking here protects both the pointer itself, and against concurrent
2433 * invocations winning data access races to, e.g., the key pointer that
2434 * is used.
2435 * Without it, the invocation of this function right after the key
2436 * pointer changes wouldn't be sufficient, as another CPU could access
2437 * the pointer, then stall, and then do the cache update after the CPU
2438 * that invalidated the key.
2439 * With the locking, such scenarios cannot happen as the check for the
2440 * key and the fast-tx assignment are done atomically, so the CPU that
2441 * modifies the key will either wait or other one will see the key
2442 * cleared/changed already.
2443 */
2444 spin_lock_bh(&sta->lock);
2445 if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) &&
2446 !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS) &&
2447 sdata->vif.type == NL80211_IFTYPE_STATION)
2448 goto out;
2449
2450 if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED))
2451 goto out;
2452
2453 if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
2454 test_sta_flag(sta, WLAN_STA_PS_DRIVER) ||
2455 test_sta_flag(sta, WLAN_STA_PS_DELIVER))
2456 goto out;
2457
2458 if (sdata->noack_map)
2459 goto out;
2460
2461 /* fast-xmit doesn't handle fragmentation at all */
2462 if (local->hw.wiphy->frag_threshold != (u32)-1 &&
2463 !local->ops->set_frag_threshold)
2464 goto out;
2465
2466 rcu_read_lock();
2467 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
2468 if (!chanctx_conf) {
2469 rcu_read_unlock();
2470 goto out;
2471 }
2472 build.band = chanctx_conf->def.chan->band;
2473 rcu_read_unlock();
2474
2475 fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
2476
2477 switch (sdata->vif.type) {
2478 case NL80211_IFTYPE_ADHOC:
2479 /* DA SA BSSID */
2480 build.da_offs = offsetof(struct ieee80211_hdr, addr1);
2481 build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
2482 memcpy(hdr->addr3, sdata->u.ibss.bssid, ETH_ALEN);
2483 build.hdr_len = 24;
2484 break;
2485 case NL80211_IFTYPE_STATION:
2486 if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
2487 /* DA SA BSSID */
2488 build.da_offs = offsetof(struct ieee80211_hdr, addr1);
2489 build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
2490 memcpy(hdr->addr3, sdata->u.mgd.bssid, ETH_ALEN);
2491 build.hdr_len = 24;
2492 break;
2493 }
2494
2495 if (sdata->u.mgd.use_4addr) {
2496 /* non-regular ethertype cannot use the fastpath */
2497 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS |
2498 IEEE80211_FCTL_TODS);
2499 /* RA TA DA SA */
2500 memcpy(hdr->addr1, sdata->u.mgd.bssid, ETH_ALEN);
2501 memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
2502 build.da_offs = offsetof(struct ieee80211_hdr, addr3);
2503 build.sa_offs = offsetof(struct ieee80211_hdr, addr4);
2504 build.hdr_len = 30;
2505 break;
2506 }
2507 fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
2508 /* BSSID SA DA */
2509 memcpy(hdr->addr1, sdata->u.mgd.bssid, ETH_ALEN);
2510 build.da_offs = offsetof(struct ieee80211_hdr, addr3);
2511 build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
2512 build.hdr_len = 24;
2513 break;
2514 case NL80211_IFTYPE_AP_VLAN:
2515 if (sdata->wdev.use_4addr) {
2516 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS |
2517 IEEE80211_FCTL_TODS);
2518 /* RA TA DA SA */
2519 memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN);
2520 memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
2521 build.da_offs = offsetof(struct ieee80211_hdr, addr3);
2522 build.sa_offs = offsetof(struct ieee80211_hdr, addr4);
2523 build.hdr_len = 30;
2524 break;
2525 }
2526 /* fall through */
2527 case NL80211_IFTYPE_AP:
2528 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
2529 /* DA BSSID SA */
2530 build.da_offs = offsetof(struct ieee80211_hdr, addr1);
2531 memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
2532 build.sa_offs = offsetof(struct ieee80211_hdr, addr3);
2533 build.hdr_len = 24;
2534 break;
2535 default:
2536 /* not handled on fast-xmit */
2537 goto out;
2538 }
2539
2540 if (sta->sta.wme) {
2541 build.hdr_len += 2;
2542 fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
2543 }
2544
2545 /* We store the key here so there's no point in using rcu_dereference()
2546 * but that's fine because the code that changes the pointers will call
2547 * this function after doing so. For a single CPU that would be enough,
2548 * for multiple see the comment above.
2549 */
2550 build.key = rcu_access_pointer(sta->ptk[sta->ptk_idx]);
2551 if (!build.key)
2552 build.key = rcu_access_pointer(sdata->default_unicast_key);
2553 if (build.key) {
2554 bool gen_iv, iv_spc, mmic;
2555
2556 gen_iv = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV;
2557 iv_spc = build.key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE;
2558 mmic = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC;
2559
2560 /* don't handle software crypto */
2561 if (!(build.key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
2562 goto out;
2563
2564 switch (build.key->conf.cipher) {
2565 case WLAN_CIPHER_SUITE_CCMP:
2566 case WLAN_CIPHER_SUITE_CCMP_256:
2567 /* add fixed key ID */
2568 if (gen_iv) {
2569 (build.hdr + build.hdr_len)[3] =
2570 0x20 | (build.key->conf.keyidx << 6);
2571 build.pn_offs = build.hdr_len;
2572 }
2573 if (gen_iv || iv_spc)
2574 build.hdr_len += IEEE80211_CCMP_HDR_LEN;
2575 break;
2576 case WLAN_CIPHER_SUITE_GCMP:
2577 case WLAN_CIPHER_SUITE_GCMP_256:
2578 /* add fixed key ID */
2579 if (gen_iv) {
2580 (build.hdr + build.hdr_len)[3] =
2581 0x20 | (build.key->conf.keyidx << 6);
2582 build.pn_offs = build.hdr_len;
2583 }
2584 if (gen_iv || iv_spc)
2585 build.hdr_len += IEEE80211_GCMP_HDR_LEN;
2586 break;
2587 case WLAN_CIPHER_SUITE_TKIP:
2588 /* cannot handle MMIC or IV generation in xmit-fast */
2589 if (mmic || gen_iv)
2590 goto out;
2591 if (iv_spc)
2592 build.hdr_len += IEEE80211_TKIP_IV_LEN;
2593 break;
2594 case WLAN_CIPHER_SUITE_WEP40:
2595 case WLAN_CIPHER_SUITE_WEP104:
2596 /* cannot handle IV generation in fast-xmit */
2597 if (gen_iv)
2598 goto out;
2599 if (iv_spc)
2600 build.hdr_len += IEEE80211_WEP_IV_LEN;
2601 break;
2602 case WLAN_CIPHER_SUITE_AES_CMAC:
2603 case WLAN_CIPHER_SUITE_BIP_CMAC_256:
2604 case WLAN_CIPHER_SUITE_BIP_GMAC_128:
2605 case WLAN_CIPHER_SUITE_BIP_GMAC_256:
2606 WARN(1,
2607 "management cipher suite 0x%x enabled for data\n",
2608 build.key->conf.cipher);
2609 goto out;
2610 default:
2611 /* we don't know how to generate IVs for this at all */
2612 if (WARN_ON(gen_iv))
2613 goto out;
2614 /* pure hardware keys are OK, of course */
2615 if (!(build.key->flags & KEY_FLAG_CIPHER_SCHEME))
2616 break;
2617 /* cipher scheme might require space allocation */
2618 if (iv_spc &&
2619 build.key->conf.iv_len > IEEE80211_FAST_XMIT_MAX_IV)
2620 goto out;
2621 if (iv_spc)
2622 build.hdr_len += build.key->conf.iv_len;
2623 }
2624
2625 fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
2626 }
2627
2628 hdr->frame_control = fc;
2629
2630 memcpy(build.hdr + build.hdr_len,
2631 rfc1042_header, sizeof(rfc1042_header));
2632 build.hdr_len += sizeof(rfc1042_header);
2633
2634 fast_tx = kmemdup(&build, sizeof(build), GFP_ATOMIC);
2635 /* if the kmemdup fails, continue w/o fast_tx */
2636 if (!fast_tx)
2637 goto out;
2638
2639 out:
2640 /* we might have raced against another call to this function */
2641 old = rcu_dereference_protected(sta->fast_tx,
2642 lockdep_is_held(&sta->lock));
2643 rcu_assign_pointer(sta->fast_tx, fast_tx);
2644 if (old)
2645 kfree_rcu(old, rcu_head);
2646 spin_unlock_bh(&sta->lock);
2647}
2648
2649void ieee80211_check_fast_xmit_all(struct ieee80211_local *local)
2650{
2651 struct sta_info *sta;
2652
2653 rcu_read_lock();
2654 list_for_each_entry_rcu(sta, &local->sta_list, list)
2655 ieee80211_check_fast_xmit(sta);
2656 rcu_read_unlock();
2657}
2658
2659void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata)
2660{
2661 struct ieee80211_local *local = sdata->local;
2662 struct sta_info *sta;
2663
2664 rcu_read_lock();
2665
2666 list_for_each_entry_rcu(sta, &local->sta_list, list) {
2667 if (sdata != sta->sdata &&
2668 (!sta->sdata->bss || sta->sdata->bss != sdata->bss))
2669 continue;
2670 ieee80211_check_fast_xmit(sta);
2671 }
2672
2673 rcu_read_unlock();
2674}
2675
2676void ieee80211_clear_fast_xmit(struct sta_info *sta)
2677{
2678 struct ieee80211_fast_tx *fast_tx;
2679
2680 spin_lock_bh(&sta->lock);
2681 fast_tx = rcu_dereference_protected(sta->fast_tx,
2682 lockdep_is_held(&sta->lock));
2683 RCU_INIT_POINTER(sta->fast_tx, NULL);
2684 spin_unlock_bh(&sta->lock);
2685
2686 if (fast_tx)
2687 kfree_rcu(fast_tx, rcu_head);
2688}
2689
2690static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
2691 struct net_device *dev, struct sta_info *sta,
2692 struct ieee80211_fast_tx *fast_tx,
2693 struct sk_buff *skb)
2694{
2695 struct ieee80211_local *local = sdata->local;
2696 u16 ethertype = (skb->data[12] << 8) | skb->data[13];
2697 int extra_head = fast_tx->hdr_len - (ETH_HLEN - 2);
2698 int hw_headroom = sdata->local->hw.extra_tx_headroom;
2699 struct ethhdr eth;
2700 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
2701 struct ieee80211_hdr *hdr = (void *)fast_tx->hdr;
2702 struct ieee80211_tx_data tx;
2703 ieee80211_tx_result r;
2704 struct tid_ampdu_tx *tid_tx = NULL;
2705 u8 tid = IEEE80211_NUM_TIDS;
2706
2707 /* control port protocol needs a lot of special handling */
2708 if (cpu_to_be16(ethertype) == sdata->control_port_protocol)
2709 return false;
2710
2711 /* only RFC 1042 SNAP */
2712 if (ethertype < ETH_P_802_3_MIN)
2713 return false;
2714
2715 /* don't handle TX status request here either */
2716 if (skb->sk && skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)
2717 return false;
2718
2719 if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
2720 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
2721 tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
2722 if (tid_tx) {
2723 if (!test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state))
2724 return false;
2725 if (tid_tx->timeout)
2726 tid_tx->last_tx = jiffies;
2727 }
2728 }
2729
2730 /* after this point (skb is modified) we cannot return false */
2731
2732 if (skb_shared(skb)) {
2733 struct sk_buff *tmp_skb = skb;
2734
2735 skb = skb_clone(skb, GFP_ATOMIC);
2736 kfree_skb(tmp_skb);
2737
2738 if (!skb)
2739 return true;
2740 }
2741
2742 ieee80211_tx_stats(dev, skb->len + extra_head);
2743
2744 /* will not be crypto-handled beyond what we do here, so use false
2745 * as the may-encrypt argument for the resize to not account for
2746 * more room than we already have in 'extra_head'
2747 */
2748 if (unlikely(ieee80211_skb_resize(sdata, skb,
2749 max_t(int, extra_head + hw_headroom -
2750 skb_headroom(skb), 0),
2751 false))) {
2752 kfree_skb(skb);
2753 return true;
2754 }
2755
2756 memcpy(&eth, skb->data, ETH_HLEN - 2);
2757 hdr = (void *)skb_push(skb, extra_head);
2758 memcpy(skb->data, fast_tx->hdr, fast_tx->hdr_len);
2759 memcpy(skb->data + fast_tx->da_offs, eth.h_dest, ETH_ALEN);
2760 memcpy(skb->data + fast_tx->sa_offs, eth.h_source, ETH_ALEN);
2761
2762 memset(info, 0, sizeof(*info));
2763 info->band = fast_tx->band;
2764 info->control.vif = &sdata->vif;
2765 info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT |
2766 IEEE80211_TX_CTL_DONTFRAG |
2767 (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0);
2768
2769 if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
2770 *ieee80211_get_qos_ctl(hdr) = tid;
2771 hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
2772 } else {
2773 info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
2774 hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number);
2775 sdata->sequence_number += 0x10;
2776 }
2777
2778 sta->tx_msdu[tid]++;
2779
2780 info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
2781
2782 __skb_queue_head_init(&tx.skbs);
2783
2784 tx.flags = IEEE80211_TX_UNICAST;
2785 tx.local = local;
2786 tx.sdata = sdata;
2787 tx.sta = sta;
2788 tx.key = fast_tx->key;
2789
2790 if (fast_tx->key)
2791 info->control.hw_key = &fast_tx->key->conf;
2792
2793 if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
2794 tx.skb = skb;
2795 r = ieee80211_tx_h_rate_ctrl(&tx);
2796 skb = tx.skb;
2797 tx.skb = NULL;
2798
2799 if (r != TX_CONTINUE) {
2800 if (r != TX_QUEUED)
2801 kfree_skb(skb);
2802 return true;
2803 }
2804 }
2805
2806 /* statistics normally done by ieee80211_tx_h_stats (but that
2807 * has to consider fragmentation, so is more complex)
2808 */
2809 sta->tx_bytes[skb_get_queue_mapping(skb)] += skb->len;
2810 sta->tx_packets[skb_get_queue_mapping(skb)]++;
2811
2812 if (fast_tx->pn_offs) {
2813 u64 pn;
2814 u8 *crypto_hdr = skb->data + fast_tx->pn_offs;
2815
2816 switch (fast_tx->key->conf.cipher) {
2817 case WLAN_CIPHER_SUITE_CCMP:
2818 case WLAN_CIPHER_SUITE_CCMP_256:
2819 case WLAN_CIPHER_SUITE_GCMP:
2820 case WLAN_CIPHER_SUITE_GCMP_256:
2821 pn = atomic64_inc_return(&fast_tx->key->conf.tx_pn);
2822 crypto_hdr[0] = pn;
2823 crypto_hdr[1] = pn >> 8;
2824 crypto_hdr[4] = pn >> 16;
2825 crypto_hdr[5] = pn >> 24;
2826 crypto_hdr[6] = pn >> 32;
2827 crypto_hdr[7] = pn >> 40;
2828 break;
2829 }
2830 }
2831
2832 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
2833 sdata = container_of(sdata->bss,
2834 struct ieee80211_sub_if_data, u.ap);
2835
2836 __skb_queue_tail(&tx.skbs, skb);
2837 ieee80211_tx_frags(local, &sdata->vif, &sta->sta, &tx.skbs, false);
2838 return true;
2839}
2840
2390void __ieee80211_subif_start_xmit(struct sk_buff *skb, 2841void __ieee80211_subif_start_xmit(struct sk_buff *skb,
2391 struct net_device *dev, 2842 struct net_device *dev,
2392 u32 info_flags) 2843 u32 info_flags)
2393{ 2844{
2394 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 2845 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
2395 struct sta_info *sta; 2846 struct sta_info *sta;
2847 struct sk_buff *next;
2396 2848
2397 if (unlikely(skb->len < ETH_HLEN)) { 2849 if (unlikely(skb->len < ETH_HLEN)) {
2398 kfree_skb(skb); 2850 kfree_skb(skb);
@@ -2401,20 +2853,67 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
2401 2853
2402 rcu_read_lock(); 2854 rcu_read_lock();
2403 2855
2404 if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) { 2856 if (ieee80211_lookup_ra_sta(sdata, skb, &sta))
2405 kfree_skb(skb); 2857 goto out_free;
2406 goto out; 2858
2859 if (!IS_ERR_OR_NULL(sta)) {
2860 struct ieee80211_fast_tx *fast_tx;
2861
2862 fast_tx = rcu_dereference(sta->fast_tx);
2863
2864 if (fast_tx &&
2865 ieee80211_xmit_fast(sdata, dev, sta, fast_tx, skb))
2866 goto out;
2407 } 2867 }
2408 2868
2409 skb = ieee80211_build_hdr(sdata, skb, info_flags, sta); 2869 if (skb_is_gso(skb)) {
2410 if (IS_ERR(skb)) 2870 struct sk_buff *segs;
2411 goto out; 2871
2872 segs = skb_gso_segment(skb, 0);
2873 if (IS_ERR(segs)) {
2874 goto out_free;
2875 } else if (segs) {
2876 consume_skb(skb);
2877 skb = segs;
2878 }
2879 } else {
2880 /* we cannot process non-linear frames on this path */
2881 if (skb_linearize(skb)) {
2882 kfree_skb(skb);
2883 goto out;
2884 }
2885
2886 /* the frame could be fragmented, software-encrypted, and other
2887 * things so we cannot really handle checksum offload with it -
2888 * fix it up in software before we handle anything else.
2889 */
2890 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2891 skb_set_transport_header(skb,
2892 skb_checksum_start_offset(skb));
2893 if (skb_checksum_help(skb))
2894 goto out_free;
2895 }
2896 }
2897
2898 next = skb;
2899 while (next) {
2900 skb = next;
2901 next = skb->next;
2412 2902
2413 dev->stats.tx_packets++; 2903 skb->prev = NULL;
2414 dev->stats.tx_bytes += skb->len; 2904 skb->next = NULL;
2415 dev->trans_start = jiffies; 2905
2906 skb = ieee80211_build_hdr(sdata, skb, info_flags, sta);
2907 if (IS_ERR(skb))
2908 goto out;
2416 2909
2417 ieee80211_xmit(sdata, sta, skb); 2910 ieee80211_tx_stats(dev, skb->len);
2911
2912 ieee80211_xmit(sdata, sta, skb);
2913 }
2914 goto out;
2915 out_free:
2916 kfree_skb(skb);
2418 out: 2917 out:
2419 rcu_read_unlock(); 2918 rcu_read_unlock();
2420} 2919}
@@ -3308,7 +3807,7 @@ int ieee80211_reserve_tid(struct ieee80211_sta *pubsta, u8 tid)
3308 synchronize_net(); 3807 synchronize_net();
3309 3808
3310 /* Tear down BA sessions so we stop aggregating on this TID */ 3809 /* Tear down BA sessions so we stop aggregating on this TID */
3311 if (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) { 3810 if (ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION)) {
3312 set_sta_flag(sta, WLAN_STA_BLOCK_BA); 3811 set_sta_flag(sta, WLAN_STA_BLOCK_BA);
3313 __ieee80211_stop_tx_ba_session(sta, tid, 3812 __ieee80211_stop_tx_ba_session(sta, tid,
3314 AGG_STOP_LOCAL_REQUEST); 3813 AGG_STOP_LOCAL_REQUEST);
@@ -3322,7 +3821,7 @@ int ieee80211_reserve_tid(struct ieee80211_sta *pubsta, u8 tid)
3322 ieee80211_wake_vif_queues(local, sdata, 3821 ieee80211_wake_vif_queues(local, sdata,
3323 IEEE80211_QUEUE_STOP_REASON_RESERVE_TID); 3822 IEEE80211_QUEUE_STOP_REASON_RESERVE_TID);
3324 3823
3325 if (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) 3824 if (ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION))
3326 clear_sta_flag(sta, WLAN_STA_BLOCK_BA); 3825 clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
3327 3826
3328 ret = 0; 3827 ret = 0;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index b864ebc6ab8f..43e5aadd7a89 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -564,7 +564,7 @@ ieee80211_get_vif_queues(struct ieee80211_local *local,
564{ 564{
565 unsigned int queues; 565 unsigned int queues;
566 566
567 if (sdata && local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) { 567 if (sdata && ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) {
568 int ac; 568 int ac;
569 569
570 queues = 0; 570 queues = 0;
@@ -592,7 +592,7 @@ void __ieee80211_flush_queues(struct ieee80211_local *local,
592 * If no queue was set, or if the HW doesn't support 592 * If no queue was set, or if the HW doesn't support
593 * IEEE80211_HW_QUEUE_CONTROL - flush all queues 593 * IEEE80211_HW_QUEUE_CONTROL - flush all queues
594 */ 594 */
595 if (!queues || !(local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)) 595 if (!queues || !ieee80211_hw_check(&local->hw, QUEUE_CONTROL))
596 queues = ieee80211_get_vif_queues(local, sdata); 596 queues = ieee80211_get_vif_queues(local, sdata);
597 597
598 ieee80211_stop_queues_by_reason(&local->hw, queues, 598 ieee80211_stop_queues_by_reason(&local->hw, queues,
@@ -2046,7 +2046,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
2046 * about the sessions, but we and the AP still think they 2046 * about the sessions, but we and the AP still think they
2047 * are active. This is really a workaround though. 2047 * are active. This is really a workaround though.
2048 */ 2048 */
2049 if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { 2049 if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) {
2050 mutex_lock(&local->sta_mtx); 2050 mutex_lock(&local->sta_mtx);
2051 2051
2052 list_for_each_entry(sta, &local->sta_list, list) { 2052 list_for_each_entry(sta, &local->sta_list, list) {
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 9d63d93c836e..943f7606527e 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -444,7 +444,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb,
444 hdr = (struct ieee80211_hdr *) pos; 444 hdr = (struct ieee80211_hdr *) pos;
445 pos += hdrlen; 445 pos += hdrlen;
446 446
447 pn64 = atomic64_inc_return(&key->u.ccmp.tx_pn); 447 pn64 = atomic64_inc_return(&key->conf.tx_pn);
448 448
449 pn[5] = pn64; 449 pn[5] = pn64;
450 pn[4] = pn64 >> 8; 450 pn[4] = pn64 >> 8;
@@ -670,7 +670,7 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
670 hdr = (struct ieee80211_hdr *)pos; 670 hdr = (struct ieee80211_hdr *)pos;
671 pos += hdrlen; 671 pos += hdrlen;
672 672
673 pn64 = atomic64_inc_return(&key->u.gcmp.tx_pn); 673 pn64 = atomic64_inc_return(&key->conf.tx_pn);
674 674
675 pn[5] = pn64; 675 pn[5] = pn64;
676 pn[4] = pn64 >> 8; 676 pn[4] = pn64 >> 8;
@@ -940,7 +940,7 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx)
940 mmie->key_id = cpu_to_le16(key->conf.keyidx); 940 mmie->key_id = cpu_to_le16(key->conf.keyidx);
941 941
942 /* PN = PN + 1 */ 942 /* PN = PN + 1 */
943 pn64 = atomic64_inc_return(&key->u.aes_cmac.tx_pn); 943 pn64 = atomic64_inc_return(&key->conf.tx_pn);
944 944
945 bip_ipn_set64(mmie->sequence_number, pn64); 945 bip_ipn_set64(mmie->sequence_number, pn64);
946 946
@@ -984,7 +984,7 @@ ieee80211_crypto_aes_cmac_256_encrypt(struct ieee80211_tx_data *tx)
984 mmie->key_id = cpu_to_le16(key->conf.keyidx); 984 mmie->key_id = cpu_to_le16(key->conf.keyidx);
985 985
986 /* PN = PN + 1 */ 986 /* PN = PN + 1 */
987 pn64 = atomic64_inc_return(&key->u.aes_cmac.tx_pn); 987 pn64 = atomic64_inc_return(&key->conf.tx_pn);
988 988
989 bip_ipn_set64(mmie->sequence_number, pn64); 989 bip_ipn_set64(mmie->sequence_number, pn64);
990 990
@@ -1129,7 +1129,7 @@ ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx)
1129 mmie->key_id = cpu_to_le16(key->conf.keyidx); 1129 mmie->key_id = cpu_to_le16(key->conf.keyidx);
1130 1130
1131 /* PN = PN + 1 */ 1131 /* PN = PN + 1 */
1132 pn64 = atomic64_inc_return(&key->u.aes_gmac.tx_pn); 1132 pn64 = atomic64_inc_return(&key->conf.tx_pn);
1133 1133
1134 bip_ipn_set64(mmie->sequence_number, pn64); 1134 bip_ipn_set64(mmie->sequence_number, pn64);
1135 1135
diff --git a/net/mac802154/Kconfig b/net/mac802154/Kconfig
index aa462b480a39..fb45287ebac3 100644
--- a/net/mac802154/Kconfig
+++ b/net/mac802154/Kconfig
@@ -2,6 +2,7 @@ config MAC802154
2 tristate "Generic IEEE 802.15.4 Soft Networking Stack (mac802154)" 2 tristate "Generic IEEE 802.15.4 Soft Networking Stack (mac802154)"
3 depends on IEEE802154 3 depends on IEEE802154
4 select CRC_CCITT 4 select CRC_CCITT
5 select CRYPTO
5 select CRYPTO_AUTHENC 6 select CRYPTO_AUTHENC
6 select CRYPTO_CCM 7 select CRYPTO_CCM
7 select CRYPTO_CTR 8 select CRYPTO_CTR
diff --git a/net/mac802154/Makefile b/net/mac802154/Makefile
index 702d8b466317..17a51e8389e2 100644
--- a/net/mac802154/Makefile
+++ b/net/mac802154/Makefile
@@ -1,5 +1,7 @@
1obj-$(CONFIG_MAC802154) += mac802154.o 1obj-$(CONFIG_MAC802154) += mac802154.o
2mac802154-objs := main.o rx.o tx.o mac_cmd.o mib.o \ 2mac802154-objs := main.o rx.o tx.o mac_cmd.o mib.o \
3 iface.o llsec.o util.o cfg.o 3 iface.o llsec.o util.o cfg.o trace.o
4
5CFLAGS_trace.o := -I$(src)
4 6
5ccflags-y += -D__CHECK_ENDIAN__ 7ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c
index 70be9c799f8a..317c4662e544 100644
--- a/net/mac802154/cfg.c
+++ b/net/mac802154/cfg.c
@@ -73,9 +73,9 @@ ieee802154_set_channel(struct wpan_phy *wpan_phy, u8 page, u8 channel)
73 73
74 ASSERT_RTNL(); 74 ASSERT_RTNL();
75 75
76 /* check if phy support this setting */ 76 if (wpan_phy->current_page == page &&
77 if (!(wpan_phy->channels_supported[page] & BIT(channel))) 77 wpan_phy->current_channel == channel)
78 return -EINVAL; 78 return 0;
79 79
80 ret = drv_set_channel(local, page, channel); 80 ret = drv_set_channel(local, page, channel);
81 if (!ret) { 81 if (!ret) {
@@ -95,9 +95,8 @@ ieee802154_set_cca_mode(struct wpan_phy *wpan_phy,
95 95
96 ASSERT_RTNL(); 96 ASSERT_RTNL();
97 97
98 /* check if phy support this setting */ 98 if (wpan_phy_cca_cmp(&wpan_phy->cca, cca))
99 if (!(local->hw.flags & IEEE802154_HW_CCA_MODE)) 99 return 0;
100 return -EOPNOTSUPP;
101 100
102 ret = drv_set_cca_mode(local, cca); 101 ret = drv_set_cca_mode(local, cca);
103 if (!ret) 102 if (!ret)
@@ -107,20 +106,49 @@ ieee802154_set_cca_mode(struct wpan_phy *wpan_phy,
107} 106}
108 107
109static int 108static int
109ieee802154_set_cca_ed_level(struct wpan_phy *wpan_phy, s32 ed_level)
110{
111 struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
112 int ret;
113
114 ASSERT_RTNL();
115
116 if (wpan_phy->cca_ed_level == ed_level)
117 return 0;
118
119 ret = drv_set_cca_ed_level(local, ed_level);
120 if (!ret)
121 wpan_phy->cca_ed_level = ed_level;
122
123 return ret;
124}
125
126static int
127ieee802154_set_tx_power(struct wpan_phy *wpan_phy, s32 power)
128{
129 struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
130 int ret;
131
132 ASSERT_RTNL();
133
134 if (wpan_phy->transmit_power == power)
135 return 0;
136
137 ret = drv_set_tx_power(local, power);
138 if (!ret)
139 wpan_phy->transmit_power = power;
140
141 return ret;
142}
143
144static int
110ieee802154_set_pan_id(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, 145ieee802154_set_pan_id(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
111 __le16 pan_id) 146 __le16 pan_id)
112{ 147{
113 ASSERT_RTNL(); 148 ASSERT_RTNL();
114 149
115 /* TODO 150 if (wpan_dev->pan_id == pan_id)
116 * I am not sure about to check here on broadcast pan_id. 151 return 0;
117 * Broadcast is a valid setting, comment from 802.15.4:
118 * If this value is 0xffff, the device is not associated.
119 *
120 * This could useful to simple deassociate an device.
121 */
122 if (pan_id == cpu_to_le16(IEEE802154_PAN_ID_BROADCAST))
123 return -EINVAL;
124 152
125 wpan_dev->pan_id = pan_id; 153 wpan_dev->pan_id = pan_id;
126 return 0; 154 return 0;
@@ -131,12 +159,11 @@ ieee802154_set_backoff_exponent(struct wpan_phy *wpan_phy,
131 struct wpan_dev *wpan_dev, 159 struct wpan_dev *wpan_dev,
132 u8 min_be, u8 max_be) 160 u8 min_be, u8 max_be)
133{ 161{
134 struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
135
136 ASSERT_RTNL(); 162 ASSERT_RTNL();
137 163
138 if (!(local->hw.flags & IEEE802154_HW_CSMA_PARAMS)) 164 if (wpan_dev->min_be == min_be &&
139 return -EOPNOTSUPP; 165 wpan_dev->max_be == max_be)
166 return 0;
140 167
141 wpan_dev->min_be = min_be; 168 wpan_dev->min_be = min_be;
142 wpan_dev->max_be = max_be; 169 wpan_dev->max_be = max_be;
@@ -149,20 +176,8 @@ ieee802154_set_short_addr(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
149{ 176{
150 ASSERT_RTNL(); 177 ASSERT_RTNL();
151 178
152 /* TODO 179 if (wpan_dev->short_addr == short_addr)
153 * I am not sure about to check here on broadcast short_addr. 180 return 0;
154 * Broadcast is a valid setting, comment from 802.15.4:
155 * A value of 0xfffe indicates that the device has
156 * associated but has not been allocated an address. A
157 * value of 0xffff indicates that the device does not
158 * have a short address.
159 *
160 * I think we should allow to set these settings but
161 * don't allow to allow socket communication with it.
162 */
163 if (short_addr == cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC) ||
164 short_addr == cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST))
165 return -EINVAL;
166 181
167 wpan_dev->short_addr = short_addr; 182 wpan_dev->short_addr = short_addr;
168 return 0; 183 return 0;
@@ -173,12 +188,10 @@ ieee802154_set_max_csma_backoffs(struct wpan_phy *wpan_phy,
173 struct wpan_dev *wpan_dev, 188 struct wpan_dev *wpan_dev,
174 u8 max_csma_backoffs) 189 u8 max_csma_backoffs)
175{ 190{
176 struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
177
178 ASSERT_RTNL(); 191 ASSERT_RTNL();
179 192
180 if (!(local->hw.flags & IEEE802154_HW_CSMA_PARAMS)) 193 if (wpan_dev->csma_retries == max_csma_backoffs)
181 return -EOPNOTSUPP; 194 return 0;
182 195
183 wpan_dev->csma_retries = max_csma_backoffs; 196 wpan_dev->csma_retries = max_csma_backoffs;
184 return 0; 197 return 0;
@@ -189,12 +202,10 @@ ieee802154_set_max_frame_retries(struct wpan_phy *wpan_phy,
189 struct wpan_dev *wpan_dev, 202 struct wpan_dev *wpan_dev,
190 s8 max_frame_retries) 203 s8 max_frame_retries)
191{ 204{
192 struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
193
194 ASSERT_RTNL(); 205 ASSERT_RTNL();
195 206
196 if (!(local->hw.flags & IEEE802154_HW_FRAME_RETRIES)) 207 if (wpan_dev->frame_retries == max_frame_retries)
197 return -EOPNOTSUPP; 208 return 0;
198 209
199 wpan_dev->frame_retries = max_frame_retries; 210 wpan_dev->frame_retries = max_frame_retries;
200 return 0; 211 return 0;
@@ -204,12 +215,10 @@ static int
204ieee802154_set_lbt_mode(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, 215ieee802154_set_lbt_mode(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
205 bool mode) 216 bool mode)
206{ 217{
207 struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
208
209 ASSERT_RTNL(); 218 ASSERT_RTNL();
210 219
211 if (!(local->hw.flags & IEEE802154_HW_LBT)) 220 if (wpan_dev->lbt == mode)
212 return -EOPNOTSUPP; 221 return 0;
213 222
214 wpan_dev->lbt = mode; 223 wpan_dev->lbt = mode;
215 return 0; 224 return 0;
@@ -222,6 +231,8 @@ const struct cfg802154_ops mac802154_config_ops = {
222 .del_virtual_intf = ieee802154_del_iface, 231 .del_virtual_intf = ieee802154_del_iface,
223 .set_channel = ieee802154_set_channel, 232 .set_channel = ieee802154_set_channel,
224 .set_cca_mode = ieee802154_set_cca_mode, 233 .set_cca_mode = ieee802154_set_cca_mode,
234 .set_cca_ed_level = ieee802154_set_cca_ed_level,
235 .set_tx_power = ieee802154_set_tx_power,
225 .set_pan_id = ieee802154_set_pan_id, 236 .set_pan_id = ieee802154_set_pan_id,
226 .set_short_addr = ieee802154_set_short_addr, 237 .set_short_addr = ieee802154_set_short_addr,
227 .set_backoff_exponent = ieee802154_set_backoff_exponent, 238 .set_backoff_exponent = ieee802154_set_backoff_exponent,
diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h
index a0533357b9ea..0550f3365e33 100644
--- a/net/mac802154/driver-ops.h
+++ b/net/mac802154/driver-ops.h
@@ -7,6 +7,7 @@
7#include <net/mac802154.h> 7#include <net/mac802154.h>
8 8
9#include "ieee802154_i.h" 9#include "ieee802154_i.h"
10#include "trace.h"
10 11
11static inline int 12static inline int
12drv_xmit_async(struct ieee802154_local *local, struct sk_buff *skb) 13drv_xmit_async(struct ieee802154_local *local, struct sk_buff *skb)
@@ -27,19 +28,25 @@ drv_xmit_sync(struct ieee802154_local *local, struct sk_buff *skb)
27 28
28static inline int drv_start(struct ieee802154_local *local) 29static inline int drv_start(struct ieee802154_local *local)
29{ 30{
31 int ret;
32
30 might_sleep(); 33 might_sleep();
31 34
35 trace_802154_drv_start(local);
32 local->started = true; 36 local->started = true;
33 smp_mb(); 37 smp_mb();
34 38 ret = local->ops->start(&local->hw);
35 return local->ops->start(&local->hw); 39 trace_802154_drv_return_int(local, ret);
40 return ret;
36} 41}
37 42
38static inline void drv_stop(struct ieee802154_local *local) 43static inline void drv_stop(struct ieee802154_local *local)
39{ 44{
40 might_sleep(); 45 might_sleep();
41 46
47 trace_802154_drv_stop(local);
42 local->ops->stop(&local->hw); 48 local->ops->stop(&local->hw);
49 trace_802154_drv_return_void(local);
43 50
44 /* sync away all work on the tasklet before clearing started */ 51 /* sync away all work on the tasklet before clearing started */
45 tasklet_disable(&local->tasklet); 52 tasklet_disable(&local->tasklet);
@@ -53,13 +60,20 @@ static inline void drv_stop(struct ieee802154_local *local)
53static inline int 60static inline int
54drv_set_channel(struct ieee802154_local *local, u8 page, u8 channel) 61drv_set_channel(struct ieee802154_local *local, u8 page, u8 channel)
55{ 62{
63 int ret;
64
56 might_sleep(); 65 might_sleep();
57 66
58 return local->ops->set_channel(&local->hw, page, channel); 67 trace_802154_drv_set_channel(local, page, channel);
68 ret = local->ops->set_channel(&local->hw, page, channel);
69 trace_802154_drv_return_int(local, ret);
70 return ret;
59} 71}
60 72
61static inline int drv_set_tx_power(struct ieee802154_local *local, s8 dbm) 73static inline int drv_set_tx_power(struct ieee802154_local *local, s32 mbm)
62{ 74{
75 int ret;
76
63 might_sleep(); 77 might_sleep();
64 78
65 if (!local->ops->set_txpower) { 79 if (!local->ops->set_txpower) {
@@ -67,12 +81,17 @@ static inline int drv_set_tx_power(struct ieee802154_local *local, s8 dbm)
67 return -EOPNOTSUPP; 81 return -EOPNOTSUPP;
68 } 82 }
69 83
70 return local->ops->set_txpower(&local->hw, dbm); 84 trace_802154_drv_set_tx_power(local, mbm);
85 ret = local->ops->set_txpower(&local->hw, mbm);
86 trace_802154_drv_return_int(local, ret);
87 return ret;
71} 88}
72 89
73static inline int drv_set_cca_mode(struct ieee802154_local *local, 90static inline int drv_set_cca_mode(struct ieee802154_local *local,
74 const struct wpan_phy_cca *cca) 91 const struct wpan_phy_cca *cca)
75{ 92{
93 int ret;
94
76 might_sleep(); 95 might_sleep();
77 96
78 if (!local->ops->set_cca_mode) { 97 if (!local->ops->set_cca_mode) {
@@ -80,11 +99,16 @@ static inline int drv_set_cca_mode(struct ieee802154_local *local,
80 return -EOPNOTSUPP; 99 return -EOPNOTSUPP;
81 } 100 }
82 101
83 return local->ops->set_cca_mode(&local->hw, cca); 102 trace_802154_drv_set_cca_mode(local, cca);
103 ret = local->ops->set_cca_mode(&local->hw, cca);
104 trace_802154_drv_return_int(local, ret);
105 return ret;
84} 106}
85 107
86static inline int drv_set_lbt_mode(struct ieee802154_local *local, bool mode) 108static inline int drv_set_lbt_mode(struct ieee802154_local *local, bool mode)
87{ 109{
110 int ret;
111
88 might_sleep(); 112 might_sleep();
89 113
90 if (!local->ops->set_lbt) { 114 if (!local->ops->set_lbt) {
@@ -92,12 +116,17 @@ static inline int drv_set_lbt_mode(struct ieee802154_local *local, bool mode)
92 return -EOPNOTSUPP; 116 return -EOPNOTSUPP;
93 } 117 }
94 118
95 return local->ops->set_lbt(&local->hw, mode); 119 trace_802154_drv_set_lbt_mode(local, mode);
120 ret = local->ops->set_lbt(&local->hw, mode);
121 trace_802154_drv_return_int(local, ret);
122 return ret;
96} 123}
97 124
98static inline int 125static inline int
99drv_set_cca_ed_level(struct ieee802154_local *local, s32 ed_level) 126drv_set_cca_ed_level(struct ieee802154_local *local, s32 mbm)
100{ 127{
128 int ret;
129
101 might_sleep(); 130 might_sleep();
102 131
103 if (!local->ops->set_cca_ed_level) { 132 if (!local->ops->set_cca_ed_level) {
@@ -105,12 +134,16 @@ drv_set_cca_ed_level(struct ieee802154_local *local, s32 ed_level)
105 return -EOPNOTSUPP; 134 return -EOPNOTSUPP;
106 } 135 }
107 136
108 return local->ops->set_cca_ed_level(&local->hw, ed_level); 137 trace_802154_drv_set_cca_ed_level(local, mbm);
138 ret = local->ops->set_cca_ed_level(&local->hw, mbm);
139 trace_802154_drv_return_int(local, ret);
140 return ret;
109} 141}
110 142
111static inline int drv_set_pan_id(struct ieee802154_local *local, __le16 pan_id) 143static inline int drv_set_pan_id(struct ieee802154_local *local, __le16 pan_id)
112{ 144{
113 struct ieee802154_hw_addr_filt filt; 145 struct ieee802154_hw_addr_filt filt;
146 int ret;
114 147
115 might_sleep(); 148 might_sleep();
116 149
@@ -121,14 +154,18 @@ static inline int drv_set_pan_id(struct ieee802154_local *local, __le16 pan_id)
121 154
122 filt.pan_id = pan_id; 155 filt.pan_id = pan_id;
123 156
124 return local->ops->set_hw_addr_filt(&local->hw, &filt, 157 trace_802154_drv_set_pan_id(local, pan_id);
158 ret = local->ops->set_hw_addr_filt(&local->hw, &filt,
125 IEEE802154_AFILT_PANID_CHANGED); 159 IEEE802154_AFILT_PANID_CHANGED);
160 trace_802154_drv_return_int(local, ret);
161 return ret;
126} 162}
127 163
128static inline int 164static inline int
129drv_set_extended_addr(struct ieee802154_local *local, __le64 extended_addr) 165drv_set_extended_addr(struct ieee802154_local *local, __le64 extended_addr)
130{ 166{
131 struct ieee802154_hw_addr_filt filt; 167 struct ieee802154_hw_addr_filt filt;
168 int ret;
132 169
133 might_sleep(); 170 might_sleep();
134 171
@@ -139,14 +176,18 @@ drv_set_extended_addr(struct ieee802154_local *local, __le64 extended_addr)
139 176
140 filt.ieee_addr = extended_addr; 177 filt.ieee_addr = extended_addr;
141 178
142 return local->ops->set_hw_addr_filt(&local->hw, &filt, 179 trace_802154_drv_set_extended_addr(local, extended_addr);
180 ret = local->ops->set_hw_addr_filt(&local->hw, &filt,
143 IEEE802154_AFILT_IEEEADDR_CHANGED); 181 IEEE802154_AFILT_IEEEADDR_CHANGED);
182 trace_802154_drv_return_int(local, ret);
183 return ret;
144} 184}
145 185
146static inline int 186static inline int
147drv_set_short_addr(struct ieee802154_local *local, __le16 short_addr) 187drv_set_short_addr(struct ieee802154_local *local, __le16 short_addr)
148{ 188{
149 struct ieee802154_hw_addr_filt filt; 189 struct ieee802154_hw_addr_filt filt;
190 int ret;
150 191
151 might_sleep(); 192 might_sleep();
152 193
@@ -157,14 +198,18 @@ drv_set_short_addr(struct ieee802154_local *local, __le16 short_addr)
157 198
158 filt.short_addr = short_addr; 199 filt.short_addr = short_addr;
159 200
160 return local->ops->set_hw_addr_filt(&local->hw, &filt, 201 trace_802154_drv_set_short_addr(local, short_addr);
202 ret = local->ops->set_hw_addr_filt(&local->hw, &filt,
161 IEEE802154_AFILT_SADDR_CHANGED); 203 IEEE802154_AFILT_SADDR_CHANGED);
204 trace_802154_drv_return_int(local, ret);
205 return ret;
162} 206}
163 207
164static inline int 208static inline int
165drv_set_pan_coord(struct ieee802154_local *local, bool is_coord) 209drv_set_pan_coord(struct ieee802154_local *local, bool is_coord)
166{ 210{
167 struct ieee802154_hw_addr_filt filt; 211 struct ieee802154_hw_addr_filt filt;
212 int ret;
168 213
169 might_sleep(); 214 might_sleep();
170 215
@@ -175,14 +220,19 @@ drv_set_pan_coord(struct ieee802154_local *local, bool is_coord)
175 220
176 filt.pan_coord = is_coord; 221 filt.pan_coord = is_coord;
177 222
178 return local->ops->set_hw_addr_filt(&local->hw, &filt, 223 trace_802154_drv_set_pan_coord(local, is_coord);
224 ret = local->ops->set_hw_addr_filt(&local->hw, &filt,
179 IEEE802154_AFILT_PANC_CHANGED); 225 IEEE802154_AFILT_PANC_CHANGED);
226 trace_802154_drv_return_int(local, ret);
227 return ret;
180} 228}
181 229
182static inline int 230static inline int
183drv_set_csma_params(struct ieee802154_local *local, u8 min_be, u8 max_be, 231drv_set_csma_params(struct ieee802154_local *local, u8 min_be, u8 max_be,
184 u8 max_csma_backoffs) 232 u8 max_csma_backoffs)
185{ 233{
234 int ret;
235
186 might_sleep(); 236 might_sleep();
187 237
188 if (!local->ops->set_csma_params) { 238 if (!local->ops->set_csma_params) {
@@ -190,13 +240,19 @@ drv_set_csma_params(struct ieee802154_local *local, u8 min_be, u8 max_be,
190 return -EOPNOTSUPP; 240 return -EOPNOTSUPP;
191 } 241 }
192 242
193 return local->ops->set_csma_params(&local->hw, min_be, max_be, 243 trace_802154_drv_set_csma_params(local, min_be, max_be,
244 max_csma_backoffs);
245 ret = local->ops->set_csma_params(&local->hw, min_be, max_be,
194 max_csma_backoffs); 246 max_csma_backoffs);
247 trace_802154_drv_return_int(local, ret);
248 return ret;
195} 249}
196 250
197static inline int 251static inline int
198drv_set_max_frame_retries(struct ieee802154_local *local, s8 max_frame_retries) 252drv_set_max_frame_retries(struct ieee802154_local *local, s8 max_frame_retries)
199{ 253{
254 int ret;
255
200 might_sleep(); 256 might_sleep();
201 257
202 if (!local->ops->set_frame_retries) { 258 if (!local->ops->set_frame_retries) {
@@ -204,12 +260,17 @@ drv_set_max_frame_retries(struct ieee802154_local *local, s8 max_frame_retries)
204 return -EOPNOTSUPP; 260 return -EOPNOTSUPP;
205 } 261 }
206 262
207 return local->ops->set_frame_retries(&local->hw, max_frame_retries); 263 trace_802154_drv_set_max_frame_retries(local, max_frame_retries);
264 ret = local->ops->set_frame_retries(&local->hw, max_frame_retries);
265 trace_802154_drv_return_int(local, ret);
266 return ret;
208} 267}
209 268
210static inline int 269static inline int
211drv_set_promiscuous_mode(struct ieee802154_local *local, bool on) 270drv_set_promiscuous_mode(struct ieee802154_local *local, bool on)
212{ 271{
272 int ret;
273
213 might_sleep(); 274 might_sleep();
214 275
215 if (!local->ops->set_promiscuous_mode) { 276 if (!local->ops->set_promiscuous_mode) {
@@ -217,7 +278,10 @@ drv_set_promiscuous_mode(struct ieee802154_local *local, bool on)
217 return -EOPNOTSUPP; 278 return -EOPNOTSUPP;
218 } 279 }
219 280
220 return local->ops->set_promiscuous_mode(&local->hw, on); 281 trace_802154_drv_set_promiscuous_mode(local, on);
282 ret = local->ops->set_promiscuous_mode(&local->hw, on);
283 trace_802154_drv_return_int(local, ret);
284 return ret;
221} 285}
222 286
223#endif /* __MAC802154_DRIVER_OPS */ 287#endif /* __MAC802154_DRIVER_OPS */
diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h
index 127ba18386fc..34755d5751a4 100644
--- a/net/mac802154/ieee802154_i.h
+++ b/net/mac802154/ieee802154_i.h
@@ -86,16 +86,12 @@ struct ieee802154_sub_if_data {
86 unsigned long state; 86 unsigned long state;
87 char name[IFNAMSIZ]; 87 char name[IFNAMSIZ];
88 88
89 spinlock_t mib_lock;
90
91 /* protects sec from concurrent access by netlink. access by 89 /* protects sec from concurrent access by netlink. access by
92 * encrypt/decrypt/header_create safe without additional protection. 90 * encrypt/decrypt/header_create safe without additional protection.
93 */ 91 */
94 struct mutex sec_mtx; 92 struct mutex sec_mtx;
95 93
96 struct mac802154_llsec sec; 94 struct mac802154_llsec sec;
97 /* must be last, dynamically sized area in this! */
98 struct ieee802154_vif vif;
99}; 95};
100 96
101#define MAC802154_CHAN_NONE 0xff /* No channel is assigned */ 97#define MAC802154_CHAN_NONE 0xff /* No channel is assigned */
@@ -136,12 +132,7 @@ ieee802154_subif_start_xmit(struct sk_buff *skb, struct net_device *dev);
136enum hrtimer_restart ieee802154_xmit_ifs_timer(struct hrtimer *timer); 132enum hrtimer_restart ieee802154_xmit_ifs_timer(struct hrtimer *timer);
137 133
138/* MIB callbacks */ 134/* MIB callbacks */
139void mac802154_dev_set_short_addr(struct net_device *dev, __le16 val);
140__le16 mac802154_dev_get_short_addr(const struct net_device *dev);
141__le16 mac802154_dev_get_pan_id(const struct net_device *dev);
142void mac802154_dev_set_pan_id(struct net_device *dev, __le16 val);
143void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan); 135void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan);
144u8 mac802154_dev_get_dsn(const struct net_device *dev);
145 136
146int mac802154_get_params(struct net_device *dev, 137int mac802154_get_params(struct net_device *dev,
147 struct ieee802154_llsec_params *params); 138 struct ieee802154_llsec_params *params);
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index 91b75abbd1a1..8b698246a51b 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -62,9 +62,10 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
62 (struct sockaddr_ieee802154 *)&ifr->ifr_addr; 62 (struct sockaddr_ieee802154 *)&ifr->ifr_addr;
63 int err = -ENOIOCTLCMD; 63 int err = -ENOIOCTLCMD;
64 64
65 ASSERT_RTNL(); 65 if (cmd != SIOCGIFADDR && cmd != SIOCSIFADDR)
66 return err;
66 67
67 spin_lock_bh(&sdata->mib_lock); 68 rtnl_lock();
68 69
69 switch (cmd) { 70 switch (cmd) {
70 case SIOCGIFADDR: 71 case SIOCGIFADDR:
@@ -89,7 +90,7 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
89 } 90 }
90 case SIOCSIFADDR: 91 case SIOCSIFADDR:
91 if (netif_running(dev)) { 92 if (netif_running(dev)) {
92 spin_unlock_bh(&sdata->mib_lock); 93 rtnl_unlock();
93 return -EBUSY; 94 return -EBUSY;
94 } 95 }
95 96
@@ -111,7 +112,7 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
111 break; 112 break;
112 } 113 }
113 114
114 spin_unlock_bh(&sdata->mib_lock); 115 rtnl_unlock();
115 return err; 116 return err;
116} 117}
117 118
@@ -125,7 +126,7 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p)
125 return -EBUSY; 126 return -EBUSY;
126 127
127 ieee802154_be64_to_le64(&extended_addr, addr->sa_data); 128 ieee802154_be64_to_le64(&extended_addr, addr->sa_data);
128 if (!ieee802154_is_valid_extended_addr(extended_addr)) 129 if (!ieee802154_is_valid_extended_unicast_addr(extended_addr))
129 return -EINVAL; 130 return -EINVAL;
130 131
131 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); 132 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
@@ -134,19 +135,72 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p)
134 return mac802154_wpan_update_llsec(dev); 135 return mac802154_wpan_update_llsec(dev);
135} 136}
136 137
138static int ieee802154_setup_hw(struct ieee802154_sub_if_data *sdata)
139{
140 struct ieee802154_local *local = sdata->local;
141 struct wpan_dev *wpan_dev = &sdata->wpan_dev;
142 int ret;
143
144 if (local->hw.flags & IEEE802154_HW_PROMISCUOUS) {
145 ret = drv_set_promiscuous_mode(local,
146 wpan_dev->promiscuous_mode);
147 if (ret < 0)
148 return ret;
149 }
150
151 if (local->hw.flags & IEEE802154_HW_AFILT) {
152 ret = drv_set_pan_id(local, wpan_dev->pan_id);
153 if (ret < 0)
154 return ret;
155
156 ret = drv_set_extended_addr(local, wpan_dev->extended_addr);
157 if (ret < 0)
158 return ret;
159
160 ret = drv_set_short_addr(local, wpan_dev->short_addr);
161 if (ret < 0)
162 return ret;
163 }
164
165 if (local->hw.flags & IEEE802154_HW_LBT) {
166 ret = drv_set_lbt_mode(local, wpan_dev->lbt);
167 if (ret < 0)
168 return ret;
169 }
170
171 if (local->hw.flags & IEEE802154_HW_CSMA_PARAMS) {
172 ret = drv_set_csma_params(local, wpan_dev->min_be,
173 wpan_dev->max_be,
174 wpan_dev->csma_retries);
175 if (ret < 0)
176 return ret;
177 }
178
179 if (local->hw.flags & IEEE802154_HW_FRAME_RETRIES) {
180 ret = drv_set_max_frame_retries(local, wpan_dev->frame_retries);
181 if (ret < 0)
182 return ret;
183 }
184
185 return 0;
186}
187
137static int mac802154_slave_open(struct net_device *dev) 188static int mac802154_slave_open(struct net_device *dev)
138{ 189{
139 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); 190 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
140 struct ieee802154_local *local = sdata->local; 191 struct ieee802154_local *local = sdata->local;
141 int res = 0; 192 int res;
142 193
143 ASSERT_RTNL(); 194 ASSERT_RTNL();
144 195
145 set_bit(SDATA_STATE_RUNNING, &sdata->state); 196 set_bit(SDATA_STATE_RUNNING, &sdata->state);
146 197
147 if (!local->open_count) { 198 if (!local->open_count) {
199 res = ieee802154_setup_hw(sdata);
200 if (res)
201 goto err;
202
148 res = drv_start(local); 203 res = drv_start(local);
149 WARN_ON(res);
150 if (res) 204 if (res)
151 goto err; 205 goto err;
152 } 206 }
@@ -218,8 +272,8 @@ ieee802154_check_concurrent_iface(struct ieee802154_sub_if_data *sdata,
218 * exist really an use case if we need to support 272 * exist really an use case if we need to support
219 * multiple node types at the same time. 273 * multiple node types at the same time.
220 */ 274 */
221 if (sdata->vif.type == NL802154_IFTYPE_NODE && 275 if (wpan_dev->iftype == NL802154_IFTYPE_NODE &&
222 nsdata->vif.type == NL802154_IFTYPE_NODE) 276 nsdata->wpan_dev.iftype == NL802154_IFTYPE_NODE)
223 return -EBUSY; 277 return -EBUSY;
224 278
225 /* check all phy mac sublayer settings are the same. 279 /* check all phy mac sublayer settings are the same.
@@ -239,67 +293,13 @@ static int mac802154_wpan_open(struct net_device *dev)
239{ 293{
240 int rc; 294 int rc;
241 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); 295 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
242 struct ieee802154_local *local = sdata->local;
243 struct wpan_dev *wpan_dev = &sdata->wpan_dev; 296 struct wpan_dev *wpan_dev = &sdata->wpan_dev;
244 struct wpan_phy *phy = sdata->local->phy;
245 297
246 rc = ieee802154_check_concurrent_iface(sdata, sdata->vif.type); 298 rc = ieee802154_check_concurrent_iface(sdata, wpan_dev->iftype);
247 if (rc < 0) 299 if (rc < 0)
248 return rc; 300 return rc;
249 301
250 rc = mac802154_slave_open(dev); 302 return mac802154_slave_open(dev);
251 if (rc < 0)
252 return rc;
253
254 mutex_lock(&phy->pib_lock);
255
256 if (local->hw.flags & IEEE802154_HW_PROMISCUOUS) {
257 rc = drv_set_promiscuous_mode(local,
258 wpan_dev->promiscuous_mode);
259 if (rc < 0)
260 goto out;
261 }
262
263 if (local->hw.flags & IEEE802154_HW_AFILT) {
264 rc = drv_set_pan_id(local, wpan_dev->pan_id);
265 if (rc < 0)
266 goto out;
267
268 rc = drv_set_extended_addr(local, wpan_dev->extended_addr);
269 if (rc < 0)
270 goto out;
271
272 rc = drv_set_short_addr(local, wpan_dev->short_addr);
273 if (rc < 0)
274 goto out;
275 }
276
277 if (local->hw.flags & IEEE802154_HW_LBT) {
278 rc = drv_set_lbt_mode(local, wpan_dev->lbt);
279 if (rc < 0)
280 goto out;
281 }
282
283 if (local->hw.flags & IEEE802154_HW_CSMA_PARAMS) {
284 rc = drv_set_csma_params(local, wpan_dev->min_be,
285 wpan_dev->max_be,
286 wpan_dev->csma_retries);
287 if (rc < 0)
288 goto out;
289 }
290
291 if (local->hw.flags & IEEE802154_HW_FRAME_RETRIES) {
292 rc = drv_set_max_frame_retries(local, wpan_dev->frame_retries);
293 if (rc < 0)
294 goto out;
295 }
296
297 mutex_unlock(&phy->pib_lock);
298 return 0;
299
300out:
301 mutex_unlock(&phy->pib_lock);
302 return rc;
303} 303}
304 304
305static int mac802154_slave_close(struct net_device *dev) 305static int mac802154_slave_close(struct net_device *dev)
@@ -309,15 +309,16 @@ static int mac802154_slave_close(struct net_device *dev)
309 309
310 ASSERT_RTNL(); 310 ASSERT_RTNL();
311 311
312 hrtimer_cancel(&local->ifs_timer);
313
314 netif_stop_queue(dev); 312 netif_stop_queue(dev);
315 local->open_count--; 313 local->open_count--;
316 314
317 clear_bit(SDATA_STATE_RUNNING, &sdata->state); 315 clear_bit(SDATA_STATE_RUNNING, &sdata->state);
318 316
319 if (!local->open_count) 317 if (!local->open_count) {
318 flush_workqueue(local->workqueue);
319 hrtimer_cancel(&local->ifs_timer);
320 drv_stop(local); 320 drv_stop(local);
321 }
321 322
322 return 0; 323 return 0;
323} 324}
@@ -374,14 +375,12 @@ static int mac802154_header_create(struct sk_buff *skb,
374 hdr.fc.type = cb->type; 375 hdr.fc.type = cb->type;
375 hdr.fc.security_enabled = cb->secen; 376 hdr.fc.security_enabled = cb->secen;
376 hdr.fc.ack_request = cb->ackreq; 377 hdr.fc.ack_request = cb->ackreq;
377 hdr.seq = ieee802154_mlme_ops(dev)->get_dsn(dev); 378 hdr.seq = atomic_inc_return(&dev->ieee802154_ptr->dsn) & 0xFF;
378 379
379 if (mac802154_set_header_security(sdata, &hdr, cb) < 0) 380 if (mac802154_set_header_security(sdata, &hdr, cb) < 0)
380 return -EINVAL; 381 return -EINVAL;
381 382
382 if (!saddr) { 383 if (!saddr) {
383 spin_lock_bh(&sdata->mib_lock);
384
385 if (wpan_dev->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST) || 384 if (wpan_dev->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST) ||
386 wpan_dev->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) || 385 wpan_dev->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) ||
387 wpan_dev->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST)) { 386 wpan_dev->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST)) {
@@ -393,8 +392,6 @@ static int mac802154_header_create(struct sk_buff *skb,
393 } 392 }
394 393
395 hdr.source.pan_id = wpan_dev->pan_id; 394 hdr.source.pan_id = wpan_dev->pan_id;
396
397 spin_unlock_bh(&sdata->mib_lock);
398 } else { 395 } else {
399 hdr.source = *(const struct ieee802154_addr *)saddr; 396 hdr.source = *(const struct ieee802154_addr *)saddr;
400 } 397 }
@@ -474,13 +471,15 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
474 enum nl802154_iftype type) 471 enum nl802154_iftype type)
475{ 472{
476 struct wpan_dev *wpan_dev = &sdata->wpan_dev; 473 struct wpan_dev *wpan_dev = &sdata->wpan_dev;
474 u8 tmp;
477 475
478 /* set some type-dependent values */ 476 /* set some type-dependent values */
479 sdata->vif.type = type;
480 sdata->wpan_dev.iftype = type; 477 sdata->wpan_dev.iftype = type;
481 478
482 get_random_bytes(&wpan_dev->bsn, 1); 479 get_random_bytes(&tmp, sizeof(tmp));
483 get_random_bytes(&wpan_dev->dsn, 1); 480 atomic_set(&wpan_dev->bsn, tmp);
481 get_random_bytes(&tmp, sizeof(tmp));
482 atomic_set(&wpan_dev->dsn, tmp);
484 483
485 /* defaults per 802.15.4-2011 */ 484 /* defaults per 802.15.4-2011 */
486 wpan_dev->min_be = 3; 485 wpan_dev->min_be = 3;
@@ -503,7 +502,6 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
503 sdata->dev->ml_priv = &mac802154_mlme_wpan; 502 sdata->dev->ml_priv = &mac802154_mlme_wpan;
504 wpan_dev->promiscuous_mode = false; 503 wpan_dev->promiscuous_mode = false;
505 504
506 spin_lock_init(&sdata->mib_lock);
507 mutex_init(&sdata->sec_mtx); 505 mutex_init(&sdata->sec_mtx);
508 506
509 mac802154_llsec_init(&sdata->sec); 507 mac802154_llsec_init(&sdata->sec);
@@ -531,7 +529,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
531 529
532 ASSERT_RTNL(); 530 ASSERT_RTNL();
533 531
534 ndev = alloc_netdev(sizeof(*sdata) + local->hw.vif_data_size, name, 532 ndev = alloc_netdev(sizeof(*sdata), name,
535 name_assign_type, ieee802154_if_setup); 533 name_assign_type, ieee802154_if_setup);
536 if (!ndev) 534 if (!ndev)
537 return ERR_PTR(-ENOMEM); 535 return ERR_PTR(-ENOMEM);
@@ -547,7 +545,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
547 switch (type) { 545 switch (type) {
548 case NL802154_IFTYPE_NODE: 546 case NL802154_IFTYPE_NODE:
549 ndev->type = ARPHRD_IEEE802154; 547 ndev->type = ARPHRD_IEEE802154;
550 if (ieee802154_is_valid_extended_addr(extended_addr)) 548 if (ieee802154_is_valid_extended_unicast_addr(extended_addr))
551 ieee802154_le64_to_be64(ndev->dev_addr, &extended_addr); 549 ieee802154_le64_to_be64(ndev->dev_addr, &extended_addr);
552 else 550 else
553 memcpy(ndev->dev_addr, ndev->perm_addr, 551 memcpy(ndev->dev_addr, ndev->perm_addr,
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 5b2be12832e6..985e9394e2af 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -17,8 +17,9 @@
17#include <linux/err.h> 17#include <linux/err.h>
18#include <linux/bug.h> 18#include <linux/bug.h>
19#include <linux/completion.h> 19#include <linux/completion.h>
20#include <linux/crypto.h>
20#include <linux/ieee802154.h> 21#include <linux/ieee802154.h>
21#include <crypto/algapi.h> 22#include <crypto/aead.h>
22 23
23#include "ieee802154_i.h" 24#include "ieee802154_i.h"
24#include "llsec.h" 25#include "llsec.h"
@@ -649,7 +650,7 @@ llsec_do_encrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec,
649 u8 iv[16]; 650 u8 iv[16];
650 unsigned char *data; 651 unsigned char *data;
651 int authlen, assoclen, datalen, rc; 652 int authlen, assoclen, datalen, rc;
652 struct scatterlist src, assoc[2], dst[2]; 653 struct scatterlist sg;
653 struct aead_request *req; 654 struct aead_request *req;
654 655
655 authlen = ieee802154_sechdr_authtag_len(&hdr->sec); 656 authlen = ieee802154_sechdr_authtag_len(&hdr->sec);
@@ -659,30 +660,23 @@ llsec_do_encrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec,
659 if (!req) 660 if (!req)
660 return -ENOMEM; 661 return -ENOMEM;
661 662
662 sg_init_table(assoc, 2);
663 sg_set_buf(&assoc[0], skb_mac_header(skb), skb->mac_len);
664 assoclen = skb->mac_len; 663 assoclen = skb->mac_len;
665 664
666 data = skb_mac_header(skb) + skb->mac_len; 665 data = skb_mac_header(skb) + skb->mac_len;
667 datalen = skb_tail_pointer(skb) - data; 666 datalen = skb_tail_pointer(skb) - data;
668 667
669 if (hdr->sec.level & IEEE802154_SCF_SECLEVEL_ENC) { 668 skb_put(skb, authlen);
670 sg_set_buf(&assoc[1], data, 0); 669
671 } else { 670 sg_init_one(&sg, skb_mac_header(skb), assoclen + datalen + authlen);
672 sg_set_buf(&assoc[1], data, datalen); 671
672 if (!(hdr->sec.level & IEEE802154_SCF_SECLEVEL_ENC)) {
673 assoclen += datalen; 673 assoclen += datalen;
674 datalen = 0; 674 datalen = 0;
675 } 675 }
676 676
677 sg_init_one(&src, data, datalen);
678
679 sg_init_table(dst, 2);
680 sg_set_buf(&dst[0], data, datalen);
681 sg_set_buf(&dst[1], skb_put(skb, authlen), authlen);
682
683 aead_request_set_callback(req, 0, NULL, NULL); 677 aead_request_set_callback(req, 0, NULL, NULL);
684 aead_request_set_assoc(req, assoc, assoclen); 678 aead_request_set_crypt(req, &sg, &sg, datalen, iv);
685 aead_request_set_crypt(req, &src, dst, datalen, iv); 679 aead_request_set_ad(req, assoclen);
686 680
687 rc = crypto_aead_encrypt(req); 681 rc = crypto_aead_encrypt(req);
688 682
@@ -858,7 +852,7 @@ llsec_do_decrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec,
858 u8 iv[16]; 852 u8 iv[16];
859 unsigned char *data; 853 unsigned char *data;
860 int authlen, datalen, assoclen, rc; 854 int authlen, datalen, assoclen, rc;
861 struct scatterlist src, assoc[2]; 855 struct scatterlist sg;
862 struct aead_request *req; 856 struct aead_request *req;
863 857
864 authlen = ieee802154_sechdr_authtag_len(&hdr->sec); 858 authlen = ieee802154_sechdr_authtag_len(&hdr->sec);
@@ -868,27 +862,21 @@ llsec_do_decrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec,
868 if (!req) 862 if (!req)
869 return -ENOMEM; 863 return -ENOMEM;
870 864
871 sg_init_table(assoc, 2);
872 sg_set_buf(&assoc[0], skb_mac_header(skb), skb->mac_len);
873 assoclen = skb->mac_len; 865 assoclen = skb->mac_len;
874 866
875 data = skb_mac_header(skb) + skb->mac_len; 867 data = skb_mac_header(skb) + skb->mac_len;
876 datalen = skb_tail_pointer(skb) - data; 868 datalen = skb_tail_pointer(skb) - data;
877 869
878 if (hdr->sec.level & IEEE802154_SCF_SECLEVEL_ENC) { 870 sg_init_one(&sg, skb_mac_header(skb), assoclen + datalen);
879 sg_set_buf(&assoc[1], data, 0); 871
880 } else { 872 if (!(hdr->sec.level & IEEE802154_SCF_SECLEVEL_ENC)) {
881 sg_set_buf(&assoc[1], data, datalen - authlen);
882 assoclen += datalen - authlen; 873 assoclen += datalen - authlen;
883 data += datalen - authlen;
884 datalen = authlen; 874 datalen = authlen;
885 } 875 }
886 876
887 sg_init_one(&src, data, datalen);
888
889 aead_request_set_callback(req, 0, NULL, NULL); 877 aead_request_set_callback(req, 0, NULL, NULL);
890 aead_request_set_assoc(req, assoc, assoclen); 878 aead_request_set_crypt(req, &sg, &sg, datalen, iv);
891 aead_request_set_crypt(req, &src, &src, datalen, iv); 879 aead_request_set_ad(req, assoclen);
892 880
893 rc = crypto_aead_decrypt(req); 881 rc = crypto_aead_decrypt(req);
894 882
diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index bdccb4ecd30f..8606da459ff3 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c
@@ -36,37 +36,30 @@ static int mac802154_mlme_start_req(struct net_device *dev,
36 u8 pan_coord, u8 blx, 36 u8 pan_coord, u8 blx,
37 u8 coord_realign) 37 u8 coord_realign)
38{ 38{
39 struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); 39 struct ieee802154_llsec_params params;
40 int rc = 0; 40 int changed = 0;
41 41
42 ASSERT_RTNL(); 42 ASSERT_RTNL();
43 43
44 BUG_ON(addr->mode != IEEE802154_ADDR_SHORT); 44 BUG_ON(addr->mode != IEEE802154_ADDR_SHORT);
45 45
46 mac802154_dev_set_pan_id(dev, addr->pan_id); 46 dev->ieee802154_ptr->pan_id = addr->pan_id;
47 mac802154_dev_set_short_addr(dev, addr->short_addr); 47 dev->ieee802154_ptr->short_addr = addr->short_addr;
48 mac802154_dev_set_page_channel(dev, page, channel); 48 mac802154_dev_set_page_channel(dev, page, channel);
49 49
50 if (ops->llsec) { 50 params.pan_id = addr->pan_id;
51 struct ieee802154_llsec_params params; 51 changed |= IEEE802154_LLSEC_PARAM_PAN_ID;
52 int changed = 0;
53 52
54 params.coord_shortaddr = addr->short_addr; 53 params.hwaddr = ieee802154_devaddr_from_raw(dev->dev_addr);
55 changed |= IEEE802154_LLSEC_PARAM_COORD_SHORTADDR; 54 changed |= IEEE802154_LLSEC_PARAM_HWADDR;
56 55
57 params.pan_id = addr->pan_id; 56 params.coord_hwaddr = params.hwaddr;
58 changed |= IEEE802154_LLSEC_PARAM_PAN_ID; 57 changed |= IEEE802154_LLSEC_PARAM_COORD_HWADDR;
59 58
60 params.hwaddr = ieee802154_devaddr_from_raw(dev->dev_addr); 59 params.coord_shortaddr = addr->short_addr;
61 changed |= IEEE802154_LLSEC_PARAM_HWADDR; 60 changed |= IEEE802154_LLSEC_PARAM_COORD_SHORTADDR;
62 61
63 params.coord_hwaddr = params.hwaddr; 62 return mac802154_set_params(dev, &params, changed);
64 changed |= IEEE802154_LLSEC_PARAM_COORD_HWADDR;
65
66 rc = ops->llsec->set_params(dev, &params, changed);
67 }
68
69 return rc;
70} 63}
71 64
72static int mac802154_set_mac_params(struct net_device *dev, 65static int mac802154_set_mac_params(struct net_device *dev,
@@ -91,19 +84,19 @@ static int mac802154_set_mac_params(struct net_device *dev,
91 wpan_dev->frame_retries = params->frame_retries; 84 wpan_dev->frame_retries = params->frame_retries;
92 wpan_dev->lbt = params->lbt; 85 wpan_dev->lbt = params->lbt;
93 86
94 if (local->hw.flags & IEEE802154_HW_TXPOWER) { 87 if (local->hw.phy->flags & WPAN_PHY_FLAG_TXPOWER) {
95 ret = drv_set_tx_power(local, params->transmit_power); 88 ret = drv_set_tx_power(local, params->transmit_power);
96 if (ret < 0) 89 if (ret < 0)
97 return ret; 90 return ret;
98 } 91 }
99 92
100 if (local->hw.flags & IEEE802154_HW_CCA_MODE) { 93 if (local->hw.phy->flags & WPAN_PHY_FLAG_CCA_MODE) {
101 ret = drv_set_cca_mode(local, &params->cca); 94 ret = drv_set_cca_mode(local, &params->cca);
102 if (ret < 0) 95 if (ret < 0)
103 return ret; 96 return ret;
104 } 97 }
105 98
106 if (local->hw.flags & IEEE802154_HW_CCA_ED_LEVEL) { 99 if (local->hw.phy->flags & WPAN_PHY_FLAG_CCA_ED_LEVEL) {
107 ret = drv_set_cca_ed_level(local, params->cca_ed_level); 100 ret = drv_set_cca_ed_level(local, params->cca_ed_level);
108 if (ret < 0) 101 if (ret < 0)
109 return ret; 102 return ret;
@@ -151,9 +144,6 @@ static struct ieee802154_llsec_ops mac802154_llsec_ops = {
151 144
152struct ieee802154_mlme_ops mac802154_mlme_wpan = { 145struct ieee802154_mlme_ops mac802154_mlme_wpan = {
153 .start_req = mac802154_mlme_start_req, 146 .start_req = mac802154_mlme_start_req,
154 .get_pan_id = mac802154_dev_get_pan_id,
155 .get_short_addr = mac802154_dev_get_short_addr,
156 .get_dsn = mac802154_dev_get_dsn,
157 147
158 .llsec = &mac802154_llsec_ops, 148 .llsec = &mac802154_llsec_ops,
159 149
diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index 08cb32dc8fd3..356b346e1ee8 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c
@@ -107,6 +107,18 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops)
107 107
108 skb_queue_head_init(&local->skb_queue); 108 skb_queue_head_init(&local->skb_queue);
109 109
110 /* init supported flags with 802.15.4 default ranges */
111 phy->supported.max_minbe = 8;
112 phy->supported.min_maxbe = 3;
113 phy->supported.max_maxbe = 8;
114 phy->supported.min_frame_retries = -1;
115 phy->supported.max_frame_retries = 7;
116 phy->supported.max_csma_backoffs = 5;
117 phy->supported.lbt = NL802154_SUPPORTED_BOOL_FALSE;
118
119 /* always supported */
120 phy->supported.iftypes = BIT(NL802154_IFTYPE_NODE);
121
110 return &local->hw; 122 return &local->hw;
111} 123}
112EXPORT_SYMBOL(ieee802154_alloc_hw); 124EXPORT_SYMBOL(ieee802154_alloc_hw);
@@ -155,6 +167,26 @@ int ieee802154_register_hw(struct ieee802154_hw *hw)
155 167
156 ieee802154_setup_wpan_phy_pib(local->phy); 168 ieee802154_setup_wpan_phy_pib(local->phy);
157 169
170 if (!(hw->flags & IEEE802154_HW_CSMA_PARAMS)) {
171 local->phy->supported.min_csma_backoffs = 4;
172 local->phy->supported.max_csma_backoffs = 4;
173 local->phy->supported.min_maxbe = 5;
174 local->phy->supported.max_maxbe = 5;
175 local->phy->supported.min_minbe = 3;
176 local->phy->supported.max_minbe = 3;
177 }
178
179 if (!(hw->flags & IEEE802154_HW_FRAME_RETRIES)) {
180 /* TODO should be 3, but our default value is -1 which means
181 * no ARET handling.
182 */
183 local->phy->supported.min_frame_retries = -1;
184 local->phy->supported.max_frame_retries = -1;
185 }
186
187 if (hw->flags & IEEE802154_HW_PROMISCUOUS)
188 local->phy->supported.iftypes |= BIT(NL802154_IFTYPE_MONITOR);
189
158 rc = wpan_phy_register(local->phy); 190 rc = wpan_phy_register(local->phy);
159 if (rc < 0) 191 if (rc < 0)
160 goto out_wq; 192 goto out_wq;
diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index 5cf019a57fd7..73f94fbf8785 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c
@@ -26,81 +26,22 @@
26#include "ieee802154_i.h" 26#include "ieee802154_i.h"
27#include "driver-ops.h" 27#include "driver-ops.h"
28 28
29void mac802154_dev_set_short_addr(struct net_device *dev, __le16 val)
30{
31 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
32
33 BUG_ON(dev->type != ARPHRD_IEEE802154);
34
35 spin_lock_bh(&sdata->mib_lock);
36 sdata->wpan_dev.short_addr = val;
37 spin_unlock_bh(&sdata->mib_lock);
38}
39
40__le16 mac802154_dev_get_short_addr(const struct net_device *dev)
41{
42 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
43 __le16 ret;
44
45 BUG_ON(dev->type != ARPHRD_IEEE802154);
46
47 spin_lock_bh(&sdata->mib_lock);
48 ret = sdata->wpan_dev.short_addr;
49 spin_unlock_bh(&sdata->mib_lock);
50
51 return ret;
52}
53
54__le16 mac802154_dev_get_pan_id(const struct net_device *dev)
55{
56 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
57 __le16 ret;
58
59 BUG_ON(dev->type != ARPHRD_IEEE802154);
60
61 spin_lock_bh(&sdata->mib_lock);
62 ret = sdata->wpan_dev.pan_id;
63 spin_unlock_bh(&sdata->mib_lock);
64
65 return ret;
66}
67
68void mac802154_dev_set_pan_id(struct net_device *dev, __le16 val)
69{
70 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
71
72 BUG_ON(dev->type != ARPHRD_IEEE802154);
73
74 spin_lock_bh(&sdata->mib_lock);
75 sdata->wpan_dev.pan_id = val;
76 spin_unlock_bh(&sdata->mib_lock);
77}
78
79u8 mac802154_dev_get_dsn(const struct net_device *dev)
80{
81 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
82
83 BUG_ON(dev->type != ARPHRD_IEEE802154);
84
85 return sdata->wpan_dev.dsn++;
86}
87
88void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan) 29void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)
89{ 30{
90 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); 31 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
91 struct ieee802154_local *local = sdata->local; 32 struct ieee802154_local *local = sdata->local;
92 int res; 33 int res;
93 34
35 ASSERT_RTNL();
36
94 BUG_ON(dev->type != ARPHRD_IEEE802154); 37 BUG_ON(dev->type != ARPHRD_IEEE802154);
95 38
96 res = drv_set_channel(local, page, chan); 39 res = drv_set_channel(local, page, chan);
97 if (res) { 40 if (res) {
98 pr_debug("set_channel failed\n"); 41 pr_debug("set_channel failed\n");
99 } else { 42 } else {
100 mutex_lock(&local->phy->pib_lock);
101 local->phy->current_channel = chan; 43 local->phy->current_channel = chan;
102 local->phy->current_page = page; 44 local->phy->current_page = page;
103 mutex_unlock(&local->phy->pib_lock);
104 } 45 }
105} 46}
106 47
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index c0d67b2b4132..d93ad2d4a4fc 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -47,8 +47,6 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata,
47 47
48 pr_debug("getting packet via slave interface %s\n", sdata->dev->name); 48 pr_debug("getting packet via slave interface %s\n", sdata->dev->name);
49 49
50 spin_lock_bh(&sdata->mib_lock);
51
52 span = wpan_dev->pan_id; 50 span = wpan_dev->pan_id;
53 sshort = wpan_dev->short_addr; 51 sshort = wpan_dev->short_addr;
54 52
@@ -83,13 +81,10 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata,
83 skb->pkt_type = PACKET_OTHERHOST; 81 skb->pkt_type = PACKET_OTHERHOST;
84 break; 82 break;
85 default: 83 default:
86 spin_unlock_bh(&sdata->mib_lock);
87 pr_debug("invalid dest mode\n"); 84 pr_debug("invalid dest mode\n");
88 goto fail; 85 goto fail;
89 } 86 }
90 87
91 spin_unlock_bh(&sdata->mib_lock);
92
93 skb->dev = sdata->dev; 88 skb->dev = sdata->dev;
94 89
95 rc = mac802154_llsec_decrypt(&sdata->sec, skb); 90 rc = mac802154_llsec_decrypt(&sdata->sec, skb);
@@ -207,8 +202,10 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local,
207 } 202 }
208 203
209 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 204 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
210 if (sdata->vif.type != NL802154_IFTYPE_NODE || 205 if (sdata->wpan_dev.iftype != NL802154_IFTYPE_NODE)
211 !netif_running(sdata->dev)) 206 continue;
207
208 if (!ieee802154_sdata_running(sdata))
212 continue; 209 continue;
213 210
214 ieee802154_subif_frame(sdata, skb, &hdr); 211 ieee802154_subif_frame(sdata, skb, &hdr);
@@ -232,7 +229,7 @@ ieee802154_monitors_rx(struct ieee802154_local *local, struct sk_buff *skb)
232 skb->protocol = htons(ETH_P_IEEE802154); 229 skb->protocol = htons(ETH_P_IEEE802154);
233 230
234 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 231 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
235 if (sdata->vif.type != NL802154_IFTYPE_MONITOR) 232 if (sdata->wpan_dev.iftype != NL802154_IFTYPE_MONITOR)
236 continue; 233 continue;
237 234
238 if (!ieee802154_sdata_running(sdata)) 235 if (!ieee802154_sdata_running(sdata))
diff --git a/net/mac802154/trace.c b/net/mac802154/trace.c
new file mode 100644
index 000000000000..863e5e6b983d
--- /dev/null
+++ b/net/mac802154/trace.c
@@ -0,0 +1,9 @@
1#include <linux/module.h>
2
3#ifndef __CHECKER__
4#include <net/cfg802154.h>
5#include "driver-ops.h"
6#define CREATE_TRACE_POINTS
7#include "trace.h"
8
9#endif
diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h
new file mode 100644
index 000000000000..6f30e0c93a16
--- /dev/null
+++ b/net/mac802154/trace.h
@@ -0,0 +1,272 @@
1/* Based on net/mac80211/trace.h */
2
3#undef TRACE_SYSTEM
4#define TRACE_SYSTEM mac802154
5
6#if !defined(__MAC802154_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
7#define __MAC802154_DRIVER_TRACE
8
9#include <linux/tracepoint.h>
10
11#include <net/mac802154.h>
12#include "ieee802154_i.h"
13
14#define MAXNAME 32
15#define LOCAL_ENTRY __array(char, wpan_phy_name, MAXNAME)
16#define LOCAL_ASSIGN strlcpy(__entry->wpan_phy_name, \
17 wpan_phy_name(local->hw.phy), MAXNAME)
18#define LOCAL_PR_FMT "%s"
19#define LOCAL_PR_ARG __entry->wpan_phy_name
20
21#define CCA_ENTRY __field(enum nl802154_cca_modes, cca_mode) \
22 __field(enum nl802154_cca_opts, cca_opt)
23#define CCA_ASSIGN \
24 do { \
25 (__entry->cca_mode) = cca->mode; \
26 (__entry->cca_opt) = cca->opt; \
27 } while (0)
28#define CCA_PR_FMT "cca_mode: %d, cca_opt: %d"
29#define CCA_PR_ARG __entry->cca_mode, __entry->cca_opt
30
31#define BOOL_TO_STR(bo) (bo) ? "true" : "false"
32
33/* Tracing for driver callbacks */
34
35DECLARE_EVENT_CLASS(local_only_evt,
36 TP_PROTO(struct ieee802154_local *local),
37 TP_ARGS(local),
38 TP_STRUCT__entry(
39 LOCAL_ENTRY
40 ),
41 TP_fast_assign(
42 LOCAL_ASSIGN;
43 ),
44 TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG)
45);
46
47DEFINE_EVENT(local_only_evt, 802154_drv_return_void,
48 TP_PROTO(struct ieee802154_local *local),
49 TP_ARGS(local)
50);
51
52TRACE_EVENT(802154_drv_return_int,
53 TP_PROTO(struct ieee802154_local *local, int ret),
54 TP_ARGS(local, ret),
55 TP_STRUCT__entry(
56 LOCAL_ENTRY
57 __field(int, ret)
58 ),
59 TP_fast_assign(
60 LOCAL_ASSIGN;
61 __entry->ret = ret;
62 ),
63 TP_printk(LOCAL_PR_FMT ", returned: %d", LOCAL_PR_ARG,
64 __entry->ret)
65);
66
67DEFINE_EVENT(local_only_evt, 802154_drv_start,
68 TP_PROTO(struct ieee802154_local *local),
69 TP_ARGS(local)
70);
71
72DEFINE_EVENT(local_only_evt, 802154_drv_stop,
73 TP_PROTO(struct ieee802154_local *local),
74 TP_ARGS(local)
75);
76
77TRACE_EVENT(802154_drv_set_channel,
78 TP_PROTO(struct ieee802154_local *local, u8 page, u8 channel),
79 TP_ARGS(local, page, channel),
80 TP_STRUCT__entry(
81 LOCAL_ENTRY
82 __field(u8, page)
83 __field(u8, channel)
84 ),
85 TP_fast_assign(
86 LOCAL_ASSIGN;
87 __entry->page = page;
88 __entry->channel = channel;
89 ),
90 TP_printk(LOCAL_PR_FMT ", page: %d, channel: %d", LOCAL_PR_ARG,
91 __entry->page, __entry->channel)
92);
93
94TRACE_EVENT(802154_drv_set_cca_mode,
95 TP_PROTO(struct ieee802154_local *local,
96 const struct wpan_phy_cca *cca),
97 TP_ARGS(local, cca),
98 TP_STRUCT__entry(
99 LOCAL_ENTRY
100 CCA_ENTRY
101 ),
102 TP_fast_assign(
103 LOCAL_ASSIGN;
104 CCA_ASSIGN;
105 ),
106 TP_printk(LOCAL_PR_FMT ", " CCA_PR_FMT, LOCAL_PR_ARG,
107 CCA_PR_ARG)
108);
109
110TRACE_EVENT(802154_drv_set_cca_ed_level,
111 TP_PROTO(struct ieee802154_local *local, s32 mbm),
112 TP_ARGS(local, mbm),
113 TP_STRUCT__entry(
114 LOCAL_ENTRY
115 __field(s32, mbm)
116 ),
117 TP_fast_assign(
118 LOCAL_ASSIGN;
119 __entry->mbm = mbm;
120 ),
121 TP_printk(LOCAL_PR_FMT ", ed level: %d", LOCAL_PR_ARG,
122 __entry->mbm)
123);
124
125TRACE_EVENT(802154_drv_set_tx_power,
126 TP_PROTO(struct ieee802154_local *local, s32 power),
127 TP_ARGS(local, power),
128 TP_STRUCT__entry(
129 LOCAL_ENTRY
130 __field(s32, power)
131 ),
132 TP_fast_assign(
133 LOCAL_ASSIGN;
134 __entry->power = power;
135 ),
136 TP_printk(LOCAL_PR_FMT ", mbm: %d", LOCAL_PR_ARG,
137 __entry->power)
138);
139
140TRACE_EVENT(802154_drv_set_lbt_mode,
141 TP_PROTO(struct ieee802154_local *local, bool mode),
142 TP_ARGS(local, mode),
143 TP_STRUCT__entry(
144 LOCAL_ENTRY
145 __field(bool, mode)
146 ),
147 TP_fast_assign(
148 LOCAL_ASSIGN;
149 __entry->mode = mode;
150 ),
151 TP_printk(LOCAL_PR_FMT ", lbt mode: %s", LOCAL_PR_ARG,
152 BOOL_TO_STR(__entry->mode))
153);
154
155TRACE_EVENT(802154_drv_set_short_addr,
156 TP_PROTO(struct ieee802154_local *local, __le16 short_addr),
157 TP_ARGS(local, short_addr),
158 TP_STRUCT__entry(
159 LOCAL_ENTRY
160 __field(__le16, short_addr)
161 ),
162 TP_fast_assign(
163 LOCAL_ASSIGN;
164 __entry->short_addr = short_addr;
165 ),
166 TP_printk(LOCAL_PR_FMT ", short addr: 0x%04x", LOCAL_PR_ARG,
167 le16_to_cpu(__entry->short_addr))
168);
169
170TRACE_EVENT(802154_drv_set_pan_id,
171 TP_PROTO(struct ieee802154_local *local, __le16 pan_id),
172 TP_ARGS(local, pan_id),
173 TP_STRUCT__entry(
174 LOCAL_ENTRY
175 __field(__le16, pan_id)
176 ),
177 TP_fast_assign(
178 LOCAL_ASSIGN;
179 __entry->pan_id = pan_id;
180 ),
181 TP_printk(LOCAL_PR_FMT ", pan id: 0x%04x", LOCAL_PR_ARG,
182 le16_to_cpu(__entry->pan_id))
183);
184
185TRACE_EVENT(802154_drv_set_extended_addr,
186 TP_PROTO(struct ieee802154_local *local, __le64 extended_addr),
187 TP_ARGS(local, extended_addr),
188 TP_STRUCT__entry(
189 LOCAL_ENTRY
190 __field(__le64, extended_addr)
191 ),
192 TP_fast_assign(
193 LOCAL_ASSIGN;
194 __entry->extended_addr = extended_addr;
195 ),
196 TP_printk(LOCAL_PR_FMT ", extended addr: 0x%llx", LOCAL_PR_ARG,
197 le64_to_cpu(__entry->extended_addr))
198);
199
200TRACE_EVENT(802154_drv_set_pan_coord,
201 TP_PROTO(struct ieee802154_local *local, bool is_coord),
202 TP_ARGS(local, is_coord),
203 TP_STRUCT__entry(
204 LOCAL_ENTRY
205 __field(bool, is_coord)
206 ),
207 TP_fast_assign(
208 LOCAL_ASSIGN;
209 __entry->is_coord = is_coord;
210 ),
211 TP_printk(LOCAL_PR_FMT ", is_coord: %s", LOCAL_PR_ARG,
212 BOOL_TO_STR(__entry->is_coord))
213);
214
215TRACE_EVENT(802154_drv_set_csma_params,
216 TP_PROTO(struct ieee802154_local *local, u8 min_be, u8 max_be,
217 u8 max_csma_backoffs),
218 TP_ARGS(local, min_be, max_be, max_csma_backoffs),
219 TP_STRUCT__entry(
220 LOCAL_ENTRY
221 __field(u8, min_be)
222 __field(u8, max_be)
223 __field(u8, max_csma_backoffs)
224 ),
225 TP_fast_assign(
226 LOCAL_ASSIGN,
227 __entry->min_be = min_be;
228 __entry->max_be = max_be;
229 __entry->max_csma_backoffs = max_csma_backoffs;
230 ),
231 TP_printk(LOCAL_PR_FMT ", min be: %d, max be: %d, max csma backoffs: %d",
232 LOCAL_PR_ARG, __entry->min_be, __entry->max_be,
233 __entry->max_csma_backoffs)
234);
235
236TRACE_EVENT(802154_drv_set_max_frame_retries,
237 TP_PROTO(struct ieee802154_local *local, s8 max_frame_retries),
238 TP_ARGS(local, max_frame_retries),
239 TP_STRUCT__entry(
240 LOCAL_ENTRY
241 __field(s8, max_frame_retries)
242 ),
243 TP_fast_assign(
244 LOCAL_ASSIGN;
245 __entry->max_frame_retries = max_frame_retries;
246 ),
247 TP_printk(LOCAL_PR_FMT ", max frame retries: %d", LOCAL_PR_ARG,
248 __entry->max_frame_retries)
249);
250
251TRACE_EVENT(802154_drv_set_promiscuous_mode,
252 TP_PROTO(struct ieee802154_local *local, bool on),
253 TP_ARGS(local, on),
254 TP_STRUCT__entry(
255 LOCAL_ENTRY
256 __field(bool, on)
257 ),
258 TP_fast_assign(
259 LOCAL_ASSIGN;
260 __entry->on = on;
261 ),
262 TP_printk(LOCAL_PR_FMT ", promiscuous mode: %s", LOCAL_PR_ARG,
263 BOOL_TO_STR(__entry->on))
264);
265
266#endif /* !__MAC802154_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
267
268#undef TRACE_INCLUDE_PATH
269#define TRACE_INCLUDE_PATH .
270#undef TRACE_INCLUDE_FILE
271#define TRACE_INCLUDE_FILE trace
272#include <trace/define_trace.h>
diff --git a/net/mac802154/util.c b/net/mac802154/util.c
index 150bf807e572..583435f38930 100644
--- a/net/mac802154/util.c
+++ b/net/mac802154/util.c
@@ -85,11 +85,10 @@ void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb,
85 hrtimer_start(&local->ifs_timer, 85 hrtimer_start(&local->ifs_timer,
86 ktime_set(0, hw->phy->sifs_period * NSEC_PER_USEC), 86 ktime_set(0, hw->phy->sifs_period * NSEC_PER_USEC),
87 HRTIMER_MODE_REL); 87 HRTIMER_MODE_REL);
88
89 consume_skb(skb);
90 } else { 88 } else {
91 ieee802154_wake_queue(hw); 89 ieee802154_wake_queue(hw);
92 consume_skb(skb);
93 } 90 }
91
92 dev_consume_skb_any(skb);
94} 93}
95EXPORT_SYMBOL(ieee802154_xmit_complete); 94EXPORT_SYMBOL(ieee802154_xmit_complete);
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 809df534a720..0183b32da942 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -62,6 +62,7 @@ out:
62 62
63static struct packet_offload mpls_mc_offload __read_mostly = { 63static struct packet_offload mpls_mc_offload __read_mostly = {
64 .type = cpu_to_be16(ETH_P_MPLS_MC), 64 .type = cpu_to_be16(ETH_P_MPLS_MC),
65 .priority = 15,
65 .callbacks = { 66 .callbacks = {
66 .gso_segment = mpls_gso_segment, 67 .gso_segment = mpls_gso_segment,
67 }, 68 },
@@ -69,6 +70,7 @@ static struct packet_offload mpls_mc_offload __read_mostly = {
69 70
70static struct packet_offload mpls_uc_offload __read_mostly = { 71static struct packet_offload mpls_uc_offload __read_mostly = {
71 .type = cpu_to_be16(ETH_P_MPLS_UC), 72 .type = cpu_to_be16(ETH_P_MPLS_UC),
73 .priority = 15,
72 .callbacks = { 74 .callbacks = {
73 .gso_segment = mpls_gso_segment, 75 .gso_segment = mpls_gso_segment,
74 }, 76 },
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index a0f3e6a3c7d1..6eae69a698ed 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1,6 +1,14 @@
1menu "Core Netfilter Configuration" 1menu "Core Netfilter Configuration"
2 depends on NET && INET && NETFILTER 2 depends on NET && INET && NETFILTER
3 3
4config NETFILTER_INGRESS
5 bool "Netfilter ingress support"
6 default y
7 select NET_INGRESS
8 help
9 This allows you to classify packets from ingress using the Netfilter
10 infrastructure.
11
4config NETFILTER_NETLINK 12config NETFILTER_NETLINK
5 tristate 13 tristate
6 14
@@ -198,7 +206,7 @@ config NF_CONNTRACK_FTP
198 206
199config NF_CONNTRACK_H323 207config NF_CONNTRACK_H323
200 tristate "H.323 protocol support" 208 tristate "H.323 protocol support"
201 depends on (IPV6 || IPV6=n) 209 depends on IPV6 || IPV6=n
202 depends on NETFILTER_ADVANCED 210 depends on NETFILTER_ADVANCED
203 help 211 help
204 H.323 is a VoIP signalling protocol from ITU-T. As one of the most 212 H.323 is a VoIP signalling protocol from ITU-T. As one of the most
@@ -448,6 +456,11 @@ config NF_TABLES_INET
448 help 456 help
449 This option enables support for a mixed IPv4/IPv6 "inet" table. 457 This option enables support for a mixed IPv4/IPv6 "inet" table.
450 458
459config NF_TABLES_NETDEV
460 tristate "Netfilter nf_tables netdev tables support"
461 help
462 This option enables support for the "netdev" table.
463
451config NFT_EXTHDR 464config NFT_EXTHDR
452 tristate "Netfilter nf_tables IPv6 exthdr module" 465 tristate "Netfilter nf_tables IPv6 exthdr module"
453 help 466 help
@@ -710,7 +723,7 @@ config NETFILTER_XT_TARGET_HL
710 723
711config NETFILTER_XT_TARGET_HMARK 724config NETFILTER_XT_TARGET_HMARK
712 tristate '"HMARK" target support' 725 tristate '"HMARK" target support'
713 depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) 726 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
714 depends on NETFILTER_ADVANCED 727 depends on NETFILTER_ADVANCED
715 ---help--- 728 ---help---
716 This option adds the "HMARK" target. 729 This option adds the "HMARK" target.
@@ -852,7 +865,7 @@ config NETFILTER_XT_TARGET_REDIRECT
852config NETFILTER_XT_TARGET_TEE 865config NETFILTER_XT_TARGET_TEE
853 tristate '"TEE" - packet cloning to alternate destination' 866 tristate '"TEE" - packet cloning to alternate destination'
854 depends on NETFILTER_ADVANCED 867 depends on NETFILTER_ADVANCED
855 depends on (IPV6 || IPV6=n) 868 depends on IPV6 || IPV6=n
856 depends on !NF_CONNTRACK || NF_CONNTRACK 869 depends on !NF_CONNTRACK || NF_CONNTRACK
857 ---help--- 870 ---help---
858 This option adds a "TEE" target with which a packet can be cloned and 871 This option adds a "TEE" target with which a packet can be cloned and
@@ -862,8 +875,8 @@ config NETFILTER_XT_TARGET_TPROXY
862 tristate '"TPROXY" target transparent proxying support' 875 tristate '"TPROXY" target transparent proxying support'
863 depends on NETFILTER_XTABLES 876 depends on NETFILTER_XTABLES
864 depends on NETFILTER_ADVANCED 877 depends on NETFILTER_ADVANCED
865 depends on (IPV6 || IPV6=n) 878 depends on IPV6 || IPV6=n
866 depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) 879 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
867 depends on IP_NF_MANGLE 880 depends on IP_NF_MANGLE
868 select NF_DEFRAG_IPV4 881 select NF_DEFRAG_IPV4
869 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES 882 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
@@ -902,7 +915,7 @@ config NETFILTER_XT_TARGET_SECMARK
902 915
903config NETFILTER_XT_TARGET_TCPMSS 916config NETFILTER_XT_TARGET_TCPMSS
904 tristate '"TCPMSS" target support' 917 tristate '"TCPMSS" target support'
905 depends on (IPV6 || IPV6=n) 918 depends on IPV6 || IPV6=n
906 default m if NETFILTER_ADVANCED=n 919 default m if NETFILTER_ADVANCED=n
907 ---help--- 920 ---help---
908 This option adds a `TCPMSS' target, which allows you to alter the 921 This option adds a `TCPMSS' target, which allows you to alter the
@@ -1114,7 +1127,7 @@ config NETFILTER_XT_MATCH_ESP
1114 1127
1115config NETFILTER_XT_MATCH_HASHLIMIT 1128config NETFILTER_XT_MATCH_HASHLIMIT
1116 tristate '"hashlimit" match support' 1129 tristate '"hashlimit" match support'
1117 depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) 1130 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
1118 depends on NETFILTER_ADVANCED 1131 depends on NETFILTER_ADVANCED
1119 help 1132 help
1120 This option adds a `hashlimit' match. 1133 This option adds a `hashlimit' match.
@@ -1356,8 +1369,8 @@ config NETFILTER_XT_MATCH_SOCKET
1356 depends on NETFILTER_XTABLES 1369 depends on NETFILTER_XTABLES
1357 depends on NETFILTER_ADVANCED 1370 depends on NETFILTER_ADVANCED
1358 depends on !NF_CONNTRACK || NF_CONNTRACK 1371 depends on !NF_CONNTRACK || NF_CONNTRACK
1359 depends on (IPV6 || IPV6=n) 1372 depends on IPV6 || IPV6=n
1360 depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) 1373 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
1361 select NF_DEFRAG_IPV4 1374 select NF_DEFRAG_IPV4
1362 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES 1375 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
1363 help 1376 help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index a87d8b8ec730..70d026d46fe7 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -75,6 +75,7 @@ nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
75 75
76obj-$(CONFIG_NF_TABLES) += nf_tables.o 76obj-$(CONFIG_NF_TABLES) += nf_tables.o
77obj-$(CONFIG_NF_TABLES_INET) += nf_tables_inet.o 77obj-$(CONFIG_NF_TABLES_INET) += nf_tables_inet.o
78obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o
78obj-$(CONFIG_NFT_COMPAT) += nft_compat.o 79obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
79obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o 80obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o
80obj-$(CONFIG_NFT_META) += nft_meta.o 81obj-$(CONFIG_NFT_META) += nft_meta.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index e6163017c42d..a0e54974e2c9 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -64,10 +64,27 @@ static DEFINE_MUTEX(nf_hook_mutex);
64 64
65int nf_register_hook(struct nf_hook_ops *reg) 65int nf_register_hook(struct nf_hook_ops *reg)
66{ 66{
67 struct list_head *nf_hook_list;
67 struct nf_hook_ops *elem; 68 struct nf_hook_ops *elem;
68 69
69 mutex_lock(&nf_hook_mutex); 70 mutex_lock(&nf_hook_mutex);
70 list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) { 71 switch (reg->pf) {
72 case NFPROTO_NETDEV:
73#ifdef CONFIG_NETFILTER_INGRESS
74 if (reg->hooknum == NF_NETDEV_INGRESS) {
75 BUG_ON(reg->dev == NULL);
76 nf_hook_list = &reg->dev->nf_hooks_ingress;
77 net_inc_ingress_queue();
78 break;
79 }
80#endif
81 /* Fall through. */
82 default:
83 nf_hook_list = &nf_hooks[reg->pf][reg->hooknum];
84 break;
85 }
86
87 list_for_each_entry(elem, nf_hook_list, list) {
71 if (reg->priority < elem->priority) 88 if (reg->priority < elem->priority)
72 break; 89 break;
73 } 90 }
@@ -85,10 +102,23 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
85 mutex_lock(&nf_hook_mutex); 102 mutex_lock(&nf_hook_mutex);
86 list_del_rcu(&reg->list); 103 list_del_rcu(&reg->list);
87 mutex_unlock(&nf_hook_mutex); 104 mutex_unlock(&nf_hook_mutex);
105 switch (reg->pf) {
106 case NFPROTO_NETDEV:
107#ifdef CONFIG_NETFILTER_INGRESS
108 if (reg->hooknum == NF_NETDEV_INGRESS) {
109 net_dec_ingress_queue();
110 break;
111 }
112 break;
113#endif
114 default:
115 break;
116 }
88#ifdef HAVE_JUMP_LABEL 117#ifdef HAVE_JUMP_LABEL
89 static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); 118 static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
90#endif 119#endif
91 synchronize_net(); 120 synchronize_net();
121 nf_queue_nf_hook_drop(reg);
92} 122}
93EXPORT_SYMBOL(nf_unregister_hook); 123EXPORT_SYMBOL(nf_unregister_hook);
94 124
@@ -166,11 +196,9 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
166 /* We may already have this, but read-locks nest anyway */ 196 /* We may already have this, but read-locks nest anyway */
167 rcu_read_lock(); 197 rcu_read_lock();
168 198
169 elem = list_entry_rcu(&nf_hooks[state->pf][state->hook], 199 elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list);
170 struct nf_hook_ops, list);
171next_hook: 200next_hook:
172 verdict = nf_iterate(&nf_hooks[state->pf][state->hook], skb, state, 201 verdict = nf_iterate(state->hook_list, skb, state, &elem);
173 &elem);
174 if (verdict == NF_ACCEPT || verdict == NF_STOP) { 202 if (verdict == NF_ACCEPT || verdict == NF_STOP) {
175 ret = 1; 203 ret = 1;
176 } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { 204 } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 6f024a8a1534..d05e759ed0fa 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -41,7 +41,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
41 struct mtype *map = set->data; 41 struct mtype *map = set->data;
42 42
43 init_timer(&map->gc); 43 init_timer(&map->gc);
44 map->gc.data = (unsigned long) set; 44 map->gc.data = (unsigned long)set;
45 map->gc.function = gc; 45 map->gc.function = gc;
46 map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; 46 map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
47 add_timer(&map->gc); 47 add_timer(&map->gc);
@@ -144,10 +144,12 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
144 144
145 if (ret == IPSET_ADD_FAILED) { 145 if (ret == IPSET_ADD_FAILED) {
146 if (SET_WITH_TIMEOUT(set) && 146 if (SET_WITH_TIMEOUT(set) &&
147 ip_set_timeout_expired(ext_timeout(x, set))) 147 ip_set_timeout_expired(ext_timeout(x, set))) {
148 ret = 0; 148 ret = 0;
149 else if (!(flags & IPSET_FLAG_EXIST)) 149 } else if (!(flags & IPSET_FLAG_EXIST)) {
150 set_bit(e->id, map->members);
150 return -IPSET_ERR_EXIST; 151 return -IPSET_ERR_EXIST;
152 }
151 /* Element is re-added, cleanup extensions */ 153 /* Element is re-added, cleanup extensions */
152 ip_set_ext_destroy(set, x); 154 ip_set_ext_destroy(set, x);
153 } 155 }
@@ -165,6 +167,10 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
165 ip_set_init_comment(ext_comment(x, set), ext); 167 ip_set_init_comment(ext_comment(x, set), ext);
166 if (SET_WITH_SKBINFO(set)) 168 if (SET_WITH_SKBINFO(set))
167 ip_set_init_skbinfo(ext_skbinfo(x, set), ext); 169 ip_set_init_skbinfo(ext_skbinfo(x, set), ext);
170
171 /* Activate element */
172 set_bit(e->id, map->members);
173
168 return 0; 174 return 0;
169} 175}
170 176
@@ -203,10 +209,13 @@ mtype_list(const struct ip_set *set,
203 struct nlattr *adt, *nested; 209 struct nlattr *adt, *nested;
204 void *x; 210 void *x;
205 u32 id, first = cb->args[IPSET_CB_ARG0]; 211 u32 id, first = cb->args[IPSET_CB_ARG0];
212 int ret = 0;
206 213
207 adt = ipset_nest_start(skb, IPSET_ATTR_ADT); 214 adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
208 if (!adt) 215 if (!adt)
209 return -EMSGSIZE; 216 return -EMSGSIZE;
217 /* Extensions may be replaced */
218 rcu_read_lock();
210 for (; cb->args[IPSET_CB_ARG0] < map->elements; 219 for (; cb->args[IPSET_CB_ARG0] < map->elements;
211 cb->args[IPSET_CB_ARG0]++) { 220 cb->args[IPSET_CB_ARG0]++) {
212 id = cb->args[IPSET_CB_ARG0]; 221 id = cb->args[IPSET_CB_ARG0];
@@ -214,7 +223,7 @@ mtype_list(const struct ip_set *set,
214 if (!test_bit(id, map->members) || 223 if (!test_bit(id, map->members) ||
215 (SET_WITH_TIMEOUT(set) && 224 (SET_WITH_TIMEOUT(set) &&
216#ifdef IP_SET_BITMAP_STORED_TIMEOUT 225#ifdef IP_SET_BITMAP_STORED_TIMEOUT
217 mtype_is_filled((const struct mtype_elem *) x) && 226 mtype_is_filled((const struct mtype_elem *)x) &&
218#endif 227#endif
219 ip_set_timeout_expired(ext_timeout(x, set)))) 228 ip_set_timeout_expired(ext_timeout(x, set))))
220 continue; 229 continue;
@@ -222,14 +231,16 @@ mtype_list(const struct ip_set *set,
222 if (!nested) { 231 if (!nested) {
223 if (id == first) { 232 if (id == first) {
224 nla_nest_cancel(skb, adt); 233 nla_nest_cancel(skb, adt);
225 return -EMSGSIZE; 234 ret = -EMSGSIZE;
226 } else 235 goto out;
227 goto nla_put_failure; 236 }
237
238 goto nla_put_failure;
228 } 239 }
229 if (mtype_do_list(skb, map, id, set->dsize)) 240 if (mtype_do_list(skb, map, id, set->dsize))
230 goto nla_put_failure; 241 goto nla_put_failure;
231 if (ip_set_put_extensions(skb, set, x, 242 if (ip_set_put_extensions(skb, set, x,
232 mtype_is_filled((const struct mtype_elem *) x))) 243 mtype_is_filled((const struct mtype_elem *)x)))
233 goto nla_put_failure; 244 goto nla_put_failure;
234 ipset_nest_end(skb, nested); 245 ipset_nest_end(skb, nested);
235 } 246 }
@@ -238,29 +249,32 @@ mtype_list(const struct ip_set *set,
238 /* Set listing finished */ 249 /* Set listing finished */
239 cb->args[IPSET_CB_ARG0] = 0; 250 cb->args[IPSET_CB_ARG0] = 0;
240 251
241 return 0; 252 goto out;
242 253
243nla_put_failure: 254nla_put_failure:
244 nla_nest_cancel(skb, nested); 255 nla_nest_cancel(skb, nested);
245 if (unlikely(id == first)) { 256 if (unlikely(id == first)) {
246 cb->args[IPSET_CB_ARG0] = 0; 257 cb->args[IPSET_CB_ARG0] = 0;
247 return -EMSGSIZE; 258 ret = -EMSGSIZE;
248 } 259 }
249 ipset_nest_end(skb, adt); 260 ipset_nest_end(skb, adt);
250 return 0; 261out:
262 rcu_read_unlock();
263 return ret;
251} 264}
252 265
253static void 266static void
254mtype_gc(unsigned long ul_set) 267mtype_gc(unsigned long ul_set)
255{ 268{
256 struct ip_set *set = (struct ip_set *) ul_set; 269 struct ip_set *set = (struct ip_set *)ul_set;
257 struct mtype *map = set->data; 270 struct mtype *map = set->data;
258 void *x; 271 void *x;
259 u32 id; 272 u32 id;
260 273
261 /* We run parallel with other readers (test element) 274 /* We run parallel with other readers (test element)
262 * but adding/deleting new entries is locked out */ 275 * but adding/deleting new entries is locked out
263 read_lock_bh(&set->lock); 276 */
277 spin_lock_bh(&set->lock);
264 for (id = 0; id < map->elements; id++) 278 for (id = 0; id < map->elements; id++)
265 if (mtype_gc_test(id, map, set->dsize)) { 279 if (mtype_gc_test(id, map, set->dsize)) {
266 x = get_ext(set, map, id); 280 x = get_ext(set, map, id);
@@ -269,7 +283,7 @@ mtype_gc(unsigned long ul_set)
269 ip_set_ext_destroy(set, x); 283 ip_set_ext_destroy(set, x);
270 } 284 }
271 } 285 }
272 read_unlock_bh(&set->lock); 286 spin_unlock_bh(&set->lock);
273 287
274 map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; 288 map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
275 add_timer(&map->gc); 289 add_timer(&map->gc);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 55b083ec587a..64a564334418 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -36,6 +36,7 @@ IP_SET_MODULE_DESC("bitmap:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
36MODULE_ALIAS("ip_set_bitmap:ip"); 36MODULE_ALIAS("ip_set_bitmap:ip");
37 37
38#define MTYPE bitmap_ip 38#define MTYPE bitmap_ip
39#define HOST_MASK 32
39 40
40/* Type structure */ 41/* Type structure */
41struct bitmap_ip { 42struct bitmap_ip {
@@ -58,7 +59,7 @@ struct bitmap_ip_adt_elem {
58static inline u32 59static inline u32
59ip_to_id(const struct bitmap_ip *m, u32 ip) 60ip_to_id(const struct bitmap_ip *m, u32 ip)
60{ 61{
61 return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip)/m->hosts; 62 return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip) / m->hosts;
62} 63}
63 64
64/* Common functions */ 65/* Common functions */
@@ -80,7 +81,7 @@ static inline int
80bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map, 81bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map,
81 u32 flags, size_t dsize) 82 u32 flags, size_t dsize)
82{ 83{
83 return !!test_and_set_bit(e->id, map->members); 84 return !!test_bit(e->id, map->members);
84} 85}
85 86
86static inline int 87static inline int
@@ -137,20 +138,17 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
137 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 138 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
138 int ret = 0; 139 int ret = 0;
139 140
140 if (unlikely(!tb[IPSET_ATTR_IP] ||
141 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
142 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
143 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
144 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
145 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
146 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
147 return -IPSET_ERR_PROTOCOL;
148
149 if (tb[IPSET_ATTR_LINENO]) 141 if (tb[IPSET_ATTR_LINENO])
150 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 142 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
151 143
152 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || 144 if (unlikely(!tb[IPSET_ATTR_IP]))
153 ip_set_get_extensions(set, tb, &ext); 145 return -IPSET_ERR_PROTOCOL;
146
147 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
148 if (ret)
149 return ret;
150
151 ret = ip_set_get_extensions(set, tb, &ext);
154 if (ret) 152 if (ret)
155 return ret; 153 return ret;
156 154
@@ -174,11 +172,12 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
174 } else if (tb[IPSET_ATTR_CIDR]) { 172 } else if (tb[IPSET_ATTR_CIDR]) {
175 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 173 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
176 174
177 if (!cidr || cidr > 32) 175 if (!cidr || cidr > HOST_MASK)
178 return -IPSET_ERR_INVALID_CIDR; 176 return -IPSET_ERR_INVALID_CIDR;
179 ip_set_mask_from_to(ip, ip_to, cidr); 177 ip_set_mask_from_to(ip, ip_to, cidr);
180 } else 178 } else {
181 ip_to = ip; 179 ip_to = ip;
180 }
182 181
183 if (ip_to > map->last_ip) 182 if (ip_to > map->last_ip)
184 return -IPSET_ERR_BITMAP_RANGE; 183 return -IPSET_ERR_BITMAP_RANGE;
@@ -189,8 +188,8 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
189 188
190 if (ret && !ip_set_eexist(ret, flags)) 189 if (ret && !ip_set_eexist(ret, flags))
191 return ret; 190 return ret;
192 else 191
193 ret = 0; 192 ret = 0;
194 } 193 }
195 return ret; 194 return ret;
196} 195}
@@ -277,16 +276,17 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
277 } else if (tb[IPSET_ATTR_CIDR]) { 276 } else if (tb[IPSET_ATTR_CIDR]) {
278 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 277 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
279 278
280 if (cidr >= 32) 279 if (cidr >= HOST_MASK)
281 return -IPSET_ERR_INVALID_CIDR; 280 return -IPSET_ERR_INVALID_CIDR;
282 ip_set_mask_from_to(first_ip, last_ip, cidr); 281 ip_set_mask_from_to(first_ip, last_ip, cidr);
283 } else 282 } else {
284 return -IPSET_ERR_PROTOCOL; 283 return -IPSET_ERR_PROTOCOL;
284 }
285 285
286 if (tb[IPSET_ATTR_NETMASK]) { 286 if (tb[IPSET_ATTR_NETMASK]) {
287 netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); 287 netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
288 288
289 if (netmask > 32) 289 if (netmask > HOST_MASK)
290 return -IPSET_ERR_INVALID_NETMASK; 290 return -IPSET_ERR_INVALID_NETMASK;
291 291
292 first_ip &= ip_set_hostmask(netmask); 292 first_ip &= ip_set_hostmask(netmask);
@@ -360,7 +360,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
360 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 360 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
361 [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, 361 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
362 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, 362 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
363 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, 363 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
364 .len = IPSET_MAX_COMMENT_SIZE },
364 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, 365 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
365 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, 366 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
366 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, 367 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -377,6 +378,7 @@ bitmap_ip_init(void)
377static void __exit 378static void __exit
378bitmap_ip_fini(void) 379bitmap_ip_fini(void)
379{ 380{
381 rcu_barrier();
380 ip_set_type_unregister(&bitmap_ip_type); 382 ip_set_type_unregister(&bitmap_ip_type);
381} 383}
382 384
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 86104744b00f..1430535118fb 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -36,6 +36,7 @@ IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
36MODULE_ALIAS("ip_set_bitmap:ip,mac"); 36MODULE_ALIAS("ip_set_bitmap:ip,mac");
37 37
38#define MTYPE bitmap_ipmac 38#define MTYPE bitmap_ipmac
39#define HOST_MASK 32
39#define IP_SET_BITMAP_STORED_TIMEOUT 40#define IP_SET_BITMAP_STORED_TIMEOUT
40 41
41enum { 42enum {
@@ -89,7 +90,7 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
89 return 0; 90 return 0;
90 elem = get_elem(map->extensions, e->id, dsize); 91 elem = get_elem(map->extensions, e->id, dsize);
91 if (elem->filled == MAC_FILLED) 92 if (elem->filled == MAC_FILLED)
92 return e->ether == NULL || 93 return !e->ether ||
93 ether_addr_equal(e->ether, elem->ether); 94 ether_addr_equal(e->ether, elem->ether);
94 /* Trigger kernel to fill out the ethernet address */ 95 /* Trigger kernel to fill out the ethernet address */
95 return -EAGAIN; 96 return -EAGAIN;
@@ -130,7 +131,8 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
130 /* If MAC is unset yet, we store plain timeout value 131 /* If MAC is unset yet, we store plain timeout value
131 * because the timer is not activated yet 132 * because the timer is not activated yet
132 * and we can reuse it later when MAC is filled out, 133 * and we can reuse it later when MAC is filled out,
133 * possibly by the kernel */ 134 * possibly by the kernel
135 */
134 if (e->ether) 136 if (e->ether)
135 ip_set_timeout_set(timeout, t); 137 ip_set_timeout_set(timeout, t);
136 else 138 else
@@ -146,28 +148,35 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
146 struct bitmap_ipmac_elem *elem; 148 struct bitmap_ipmac_elem *elem;
147 149
148 elem = get_elem(map->extensions, e->id, dsize); 150 elem = get_elem(map->extensions, e->id, dsize);
149 if (test_and_set_bit(e->id, map->members)) { 151 if (test_bit(e->id, map->members)) {
150 if (elem->filled == MAC_FILLED) { 152 if (elem->filled == MAC_FILLED) {
151 if (e->ether && (flags & IPSET_FLAG_EXIST)) 153 if (e->ether &&
152 memcpy(elem->ether, e->ether, ETH_ALEN); 154 (flags & IPSET_FLAG_EXIST) &&
155 !ether_addr_equal(e->ether, elem->ether)) {
156 /* memcpy isn't atomic */
157 clear_bit(e->id, map->members);
158 smp_mb__after_atomic();
159 ether_addr_copy(elem->ether, e->ether);
160 }
153 return IPSET_ADD_FAILED; 161 return IPSET_ADD_FAILED;
154 } else if (!e->ether) 162 } else if (!e->ether)
155 /* Already added without ethernet address */ 163 /* Already added without ethernet address */
156 return IPSET_ADD_FAILED; 164 return IPSET_ADD_FAILED;
157 /* Fill the MAC address and trigger the timer activation */ 165 /* Fill the MAC address and trigger the timer activation */
158 memcpy(elem->ether, e->ether, ETH_ALEN); 166 clear_bit(e->id, map->members);
167 smp_mb__after_atomic();
168 ether_addr_copy(elem->ether, e->ether);
159 elem->filled = MAC_FILLED; 169 elem->filled = MAC_FILLED;
160 return IPSET_ADD_START_STORED_TIMEOUT; 170 return IPSET_ADD_START_STORED_TIMEOUT;
161 } else if (e->ether) { 171 } else if (e->ether) {
162 /* We can store MAC too */ 172 /* We can store MAC too */
163 memcpy(elem->ether, e->ether, ETH_ALEN); 173 ether_addr_copy(elem->ether, e->ether);
164 elem->filled = MAC_FILLED; 174 elem->filled = MAC_FILLED;
165 return 0; 175 return 0;
166 } else {
167 elem->filled = MAC_UNSET;
168 /* MAC is not stored yet, don't start timer */
169 return IPSET_ADD_STORE_PLAIN_TIMEOUT;
170 } 176 }
177 elem->filled = MAC_UNSET;
178 /* MAC is not stored yet, don't start timer */
179 return IPSET_ADD_STORE_PLAIN_TIMEOUT;
171} 180}
172 181
173static inline int 182static inline int
@@ -238,20 +247,17 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
238 u32 ip = 0; 247 u32 ip = 0;
239 int ret = 0; 248 int ret = 0;
240 249
241 if (unlikely(!tb[IPSET_ATTR_IP] ||
242 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
243 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
244 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
245 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
246 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
247 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
248 return -IPSET_ERR_PROTOCOL;
249
250 if (tb[IPSET_ATTR_LINENO]) 250 if (tb[IPSET_ATTR_LINENO])
251 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 251 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
252 252
253 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || 253 if (unlikely(!tb[IPSET_ATTR_IP]))
254 ip_set_get_extensions(set, tb, &ext); 254 return -IPSET_ERR_PROTOCOL;
255
256 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
257 if (ret)
258 return ret;
259
260 ret = ip_set_get_extensions(set, tb, &ext);
255 if (ret) 261 if (ret)
256 return ret; 262 return ret;
257 263
@@ -343,11 +349,12 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
343 } else if (tb[IPSET_ATTR_CIDR]) { 349 } else if (tb[IPSET_ATTR_CIDR]) {
344 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 350 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
345 351
346 if (cidr >= 32) 352 if (cidr >= HOST_MASK)
347 return -IPSET_ERR_INVALID_CIDR; 353 return -IPSET_ERR_INVALID_CIDR;
348 ip_set_mask_from_to(first_ip, last_ip, cidr); 354 ip_set_mask_from_to(first_ip, last_ip, cidr);
349 } else 355 } else {
350 return -IPSET_ERR_PROTOCOL; 356 return -IPSET_ERR_PROTOCOL;
357 }
351 358
352 elements = (u64)last_ip - first_ip + 1; 359 elements = (u64)last_ip - first_ip + 1;
353 360
@@ -397,7 +404,8 @@ static struct ip_set_type bitmap_ipmac_type = {
397 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 404 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
398 [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, 405 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
399 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, 406 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
400 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, 407 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
408 .len = IPSET_MAX_COMMENT_SIZE },
401 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, 409 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
402 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, 410 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
403 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, 411 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -414,6 +422,7 @@ bitmap_ipmac_init(void)
414static void __exit 422static void __exit
415bitmap_ipmac_fini(void) 423bitmap_ipmac_fini(void)
416{ 424{
425 rcu_barrier();
417 ip_set_type_unregister(&bitmap_ipmac_type); 426 ip_set_type_unregister(&bitmap_ipmac_type);
418} 427}
419 428
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 005dd36444c3..5338ccd5da46 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -73,7 +73,7 @@ static inline int
73bitmap_port_do_add(const struct bitmap_port_adt_elem *e, 73bitmap_port_do_add(const struct bitmap_port_adt_elem *e,
74 struct bitmap_port *map, u32 flags, size_t dsize) 74 struct bitmap_port *map, u32 flags, size_t dsize)
75{ 75{
76 return !!test_and_set_bit(e->id, map->members); 76 return !!test_bit(e->id, map->members);
77} 77}
78 78
79static inline int 79static inline int
@@ -136,19 +136,13 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
136 u16 port_to; 136 u16 port_to;
137 int ret = 0; 137 int ret = 0;
138 138
139 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
140 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
141 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
142 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
143 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
144 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
145 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
146 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
147 return -IPSET_ERR_PROTOCOL;
148
149 if (tb[IPSET_ATTR_LINENO]) 139 if (tb[IPSET_ATTR_LINENO])
150 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 140 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
151 141
142 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
143 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
144 return -IPSET_ERR_PROTOCOL;
145
152 port = ip_set_get_h16(tb[IPSET_ATTR_PORT]); 146 port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
153 if (port < map->first_port || port > map->last_port) 147 if (port < map->first_port || port > map->last_port)
154 return -IPSET_ERR_BITMAP_RANGE; 148 return -IPSET_ERR_BITMAP_RANGE;
@@ -168,8 +162,9 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
168 if (port < map->first_port) 162 if (port < map->first_port)
169 return -IPSET_ERR_BITMAP_RANGE; 163 return -IPSET_ERR_BITMAP_RANGE;
170 } 164 }
171 } else 165 } else {
172 port_to = port; 166 port_to = port;
167 }
173 168
174 if (port_to > map->last_port) 169 if (port_to > map->last_port)
175 return -IPSET_ERR_BITMAP_RANGE; 170 return -IPSET_ERR_BITMAP_RANGE;
@@ -180,8 +175,8 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
180 175
181 if (ret && !ip_set_eexist(ret, flags)) 176 if (ret && !ip_set_eexist(ret, flags))
182 return ret; 177 return ret;
183 else 178
184 ret = 0; 179 ret = 0;
185 } 180 }
186 return ret; 181 return ret;
187} 182}
@@ -294,7 +289,8 @@ static struct ip_set_type bitmap_port_type = {
294 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 289 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
295 [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, 290 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
296 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, 291 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
297 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, 292 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
293 .len = IPSET_MAX_COMMENT_SIZE },
298 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, 294 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
299 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, 295 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
300 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, 296 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -311,6 +307,7 @@ bitmap_port_init(void)
311static void __exit 307static void __exit
312bitmap_port_fini(void) 308bitmap_port_fini(void)
313{ 309{
310 rcu_barrier();
314 ip_set_type_unregister(&bitmap_port_type); 311 ip_set_type_unregister(&bitmap_port_type);
315} 312}
316 313
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index d259da3ce67a..338b4047776f 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -32,8 +32,10 @@ static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */
32struct ip_set_net { 32struct ip_set_net {
33 struct ip_set * __rcu *ip_set_list; /* all individual sets */ 33 struct ip_set * __rcu *ip_set_list; /* all individual sets */
34 ip_set_id_t ip_set_max; /* max number of sets */ 34 ip_set_id_t ip_set_max; /* max number of sets */
35 int is_deleted; /* deleted by ip_set_net_exit */ 35 bool is_deleted; /* deleted by ip_set_net_exit */
36 bool is_destroyed; /* all sets are destroyed */
36}; 37};
38
37static int ip_set_net_id __read_mostly; 39static int ip_set_net_id __read_mostly;
38 40
39static inline struct ip_set_net *ip_set_pernet(struct net *net) 41static inline struct ip_set_net *ip_set_pernet(struct net *net)
@@ -42,7 +44,7 @@ static inline struct ip_set_net *ip_set_pernet(struct net *net)
42} 44}
43 45
44#define IP_SET_INC 64 46#define IP_SET_INC 64
45#define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0) 47#define STRNCMP(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0)
46 48
47static unsigned int max_sets; 49static unsigned int max_sets;
48 50
@@ -59,8 +61,7 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
59#define ip_set(inst, id) \ 61#define ip_set(inst, id) \
60 ip_set_dereference((inst)->ip_set_list)[id] 62 ip_set_dereference((inst)->ip_set_list)[id]
61 63
62/* 64/* The set types are implemented in modules and registered set types
63 * The set types are implemented in modules and registered set types
64 * can be found in ip_set_type_list. Adding/deleting types is 65 * can be found in ip_set_type_list. Adding/deleting types is
65 * serialized by ip_set_type_mutex. 66 * serialized by ip_set_type_mutex.
66 */ 67 */
@@ -85,7 +86,7 @@ find_set_type(const char *name, u8 family, u8 revision)
85 struct ip_set_type *type; 86 struct ip_set_type *type;
86 87
87 list_for_each_entry_rcu(type, &ip_set_type_list, list) 88 list_for_each_entry_rcu(type, &ip_set_type_list, list)
88 if (STREQ(type->name, name) && 89 if (STRNCMP(type->name, name) &&
89 (type->family == family || 90 (type->family == family ||
90 type->family == NFPROTO_UNSPEC) && 91 type->family == NFPROTO_UNSPEC) &&
91 revision >= type->revision_min && 92 revision >= type->revision_min &&
@@ -130,9 +131,10 @@ __find_set_type_get(const char *name, u8 family, u8 revision,
130 goto unlock; 131 goto unlock;
131 } 132 }
132 /* Make sure the type is already loaded 133 /* Make sure the type is already loaded
133 * but we don't support the revision */ 134 * but we don't support the revision
135 */
134 list_for_each_entry_rcu(type, &ip_set_type_list, list) 136 list_for_each_entry_rcu(type, &ip_set_type_list, list)
135 if (STREQ(type->name, name)) { 137 if (STRNCMP(type->name, name)) {
136 err = -IPSET_ERR_FIND_TYPE; 138 err = -IPSET_ERR_FIND_TYPE;
137 goto unlock; 139 goto unlock;
138 } 140 }
@@ -166,7 +168,7 @@ __find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max,
166 *min = 255; *max = 0; 168 *min = 255; *max = 0;
167 rcu_read_lock(); 169 rcu_read_lock();
168 list_for_each_entry_rcu(type, &ip_set_type_list, list) 170 list_for_each_entry_rcu(type, &ip_set_type_list, list)
169 if (STREQ(type->name, name) && 171 if (STRNCMP(type->name, name) &&
170 (type->family == family || 172 (type->family == family ||
171 type->family == NFPROTO_UNSPEC)) { 173 type->family == NFPROTO_UNSPEC)) {
172 found = true; 174 found = true;
@@ -208,15 +210,15 @@ ip_set_type_register(struct ip_set_type *type)
208 pr_warn("ip_set type %s, family %s with revision min %u already registered!\n", 210 pr_warn("ip_set type %s, family %s with revision min %u already registered!\n",
209 type->name, family_name(type->family), 211 type->name, family_name(type->family),
210 type->revision_min); 212 type->revision_min);
211 ret = -EINVAL; 213 ip_set_type_unlock();
212 goto unlock; 214 return -EINVAL;
213 } 215 }
214 list_add_rcu(&type->list, &ip_set_type_list); 216 list_add_rcu(&type->list, &ip_set_type_list);
215 pr_debug("type %s, family %s, revision %u:%u registered.\n", 217 pr_debug("type %s, family %s, revision %u:%u registered.\n",
216 type->name, family_name(type->family), 218 type->name, family_name(type->family),
217 type->revision_min, type->revision_max); 219 type->revision_min, type->revision_max);
218unlock:
219 ip_set_type_unlock(); 220 ip_set_type_unlock();
221
220 return ret; 222 return ret;
221} 223}
222EXPORT_SYMBOL_GPL(ip_set_type_register); 224EXPORT_SYMBOL_GPL(ip_set_type_register);
@@ -230,12 +232,12 @@ ip_set_type_unregister(struct ip_set_type *type)
230 pr_warn("ip_set type %s, family %s with revision min %u not registered\n", 232 pr_warn("ip_set type %s, family %s with revision min %u not registered\n",
231 type->name, family_name(type->family), 233 type->name, family_name(type->family),
232 type->revision_min); 234 type->revision_min);
233 goto unlock; 235 ip_set_type_unlock();
236 return;
234 } 237 }
235 list_del_rcu(&type->list); 238 list_del_rcu(&type->list);
236 pr_debug("type %s, family %s with revision min %u unregistered.\n", 239 pr_debug("type %s, family %s with revision min %u unregistered.\n",
237 type->name, family_name(type->family), type->revision_min); 240 type->name, family_name(type->family), type->revision_min);
238unlock:
239 ip_set_type_unlock(); 241 ip_set_type_unlock();
240 242
241 synchronize_rcu(); 243 synchronize_rcu();
@@ -289,7 +291,7 @@ static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
289int 291int
290ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) 292ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr)
291{ 293{
292 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1]; 294 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1];
293 295
294 if (unlikely(!flag_nested(nla))) 296 if (unlikely(!flag_nested(nla)))
295 return -IPSET_ERR_PROTOCOL; 297 return -IPSET_ERR_PROTOCOL;
@@ -306,7 +308,7 @@ EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
306int 308int
307ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) 309ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
308{ 310{
309 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1]; 311 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1];
310 312
311 if (unlikely(!flag_nested(nla))) 313 if (unlikely(!flag_nested(nla)))
312 return -IPSET_ERR_PROTOCOL; 314 return -IPSET_ERR_PROTOCOL;
@@ -317,7 +319,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
317 return -IPSET_ERR_PROTOCOL; 319 return -IPSET_ERR_PROTOCOL;
318 320
319 memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]), 321 memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
320 sizeof(struct in6_addr)); 322 sizeof(struct in6_addr));
321 return 0; 323 return 0;
322} 324}
323EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); 325EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
@@ -365,7 +367,7 @@ size_t
365ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len) 367ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
366{ 368{
367 enum ip_set_ext_id id; 369 enum ip_set_ext_id id;
368 size_t offset = 0; 370 size_t offset = len;
369 u32 cadt_flags = 0; 371 u32 cadt_flags = 0;
370 372
371 if (tb[IPSET_ATTR_CADT_FLAGS]) 373 if (tb[IPSET_ATTR_CADT_FLAGS])
@@ -375,12 +377,12 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
375 for (id = 0; id < IPSET_EXT_ID_MAX; id++) { 377 for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
376 if (!add_extension(id, cadt_flags, tb)) 378 if (!add_extension(id, cadt_flags, tb))
377 continue; 379 continue;
378 offset += ALIGN(len + offset, ip_set_extensions[id].align); 380 offset = ALIGN(offset, ip_set_extensions[id].align);
379 set->offset[id] = offset; 381 set->offset[id] = offset;
380 set->extensions |= ip_set_extensions[id].type; 382 set->extensions |= ip_set_extensions[id].type;
381 offset += ip_set_extensions[id].len; 383 offset += ip_set_extensions[id].len;
382 } 384 }
383 return len + offset; 385 return offset;
384} 386}
385EXPORT_SYMBOL_GPL(ip_set_elem_len); 387EXPORT_SYMBOL_GPL(ip_set_elem_len);
386 388
@@ -389,13 +391,22 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
389 struct ip_set_ext *ext) 391 struct ip_set_ext *ext)
390{ 392{
391 u64 fullmark; 393 u64 fullmark;
394
395 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
396 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
397 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
398 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
399 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
400 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
401 return -IPSET_ERR_PROTOCOL;
402
392 if (tb[IPSET_ATTR_TIMEOUT]) { 403 if (tb[IPSET_ATTR_TIMEOUT]) {
393 if (!(set->extensions & IPSET_EXT_TIMEOUT)) 404 if (!SET_WITH_TIMEOUT(set))
394 return -IPSET_ERR_TIMEOUT; 405 return -IPSET_ERR_TIMEOUT;
395 ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); 406 ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
396 } 407 }
397 if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) { 408 if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) {
398 if (!(set->extensions & IPSET_EXT_COUNTER)) 409 if (!SET_WITH_COUNTER(set))
399 return -IPSET_ERR_COUNTER; 410 return -IPSET_ERR_COUNTER;
400 if (tb[IPSET_ATTR_BYTES]) 411 if (tb[IPSET_ATTR_BYTES])
401 ext->bytes = be64_to_cpu(nla_get_be64( 412 ext->bytes = be64_to_cpu(nla_get_be64(
@@ -405,25 +416,25 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
405 tb[IPSET_ATTR_PACKETS])); 416 tb[IPSET_ATTR_PACKETS]));
406 } 417 }
407 if (tb[IPSET_ATTR_COMMENT]) { 418 if (tb[IPSET_ATTR_COMMENT]) {
408 if (!(set->extensions & IPSET_EXT_COMMENT)) 419 if (!SET_WITH_COMMENT(set))
409 return -IPSET_ERR_COMMENT; 420 return -IPSET_ERR_COMMENT;
410 ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]); 421 ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
411 } 422 }
412 if (tb[IPSET_ATTR_SKBMARK]) { 423 if (tb[IPSET_ATTR_SKBMARK]) {
413 if (!(set->extensions & IPSET_EXT_SKBINFO)) 424 if (!SET_WITH_SKBINFO(set))
414 return -IPSET_ERR_SKBINFO; 425 return -IPSET_ERR_SKBINFO;
415 fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK])); 426 fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
416 ext->skbmark = fullmark >> 32; 427 ext->skbmark = fullmark >> 32;
417 ext->skbmarkmask = fullmark & 0xffffffff; 428 ext->skbmarkmask = fullmark & 0xffffffff;
418 } 429 }
419 if (tb[IPSET_ATTR_SKBPRIO]) { 430 if (tb[IPSET_ATTR_SKBPRIO]) {
420 if (!(set->extensions & IPSET_EXT_SKBINFO)) 431 if (!SET_WITH_SKBINFO(set))
421 return -IPSET_ERR_SKBINFO; 432 return -IPSET_ERR_SKBINFO;
422 ext->skbprio = be32_to_cpu(nla_get_be32( 433 ext->skbprio = be32_to_cpu(nla_get_be32(
423 tb[IPSET_ATTR_SKBPRIO])); 434 tb[IPSET_ATTR_SKBPRIO]));
424 } 435 }
425 if (tb[IPSET_ATTR_SKBQUEUE]) { 436 if (tb[IPSET_ATTR_SKBQUEUE]) {
426 if (!(set->extensions & IPSET_EXT_SKBINFO)) 437 if (!SET_WITH_SKBINFO(set))
427 return -IPSET_ERR_SKBINFO; 438 return -IPSET_ERR_SKBINFO;
428 ext->skbqueue = be16_to_cpu(nla_get_be16( 439 ext->skbqueue = be16_to_cpu(nla_get_be16(
429 tb[IPSET_ATTR_SKBQUEUE])); 440 tb[IPSET_ATTR_SKBQUEUE]));
@@ -432,8 +443,32 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
432} 443}
433EXPORT_SYMBOL_GPL(ip_set_get_extensions); 444EXPORT_SYMBOL_GPL(ip_set_get_extensions);
434 445
435/* 446int
436 * Creating/destroying/renaming/swapping affect the existence and 447ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
448 const void *e, bool active)
449{
450 if (SET_WITH_TIMEOUT(set)) {
451 unsigned long *timeout = ext_timeout(e, set);
452
453 if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
454 htonl(active ? ip_set_timeout_get(timeout)
455 : *timeout)))
456 return -EMSGSIZE;
457 }
458 if (SET_WITH_COUNTER(set) &&
459 ip_set_put_counter(skb, ext_counter(e, set)))
460 return -EMSGSIZE;
461 if (SET_WITH_COMMENT(set) &&
462 ip_set_put_comment(skb, ext_comment(e, set)))
463 return -EMSGSIZE;
464 if (SET_WITH_SKBINFO(set) &&
465 ip_set_put_skbinfo(skb, ext_skbinfo(e, set)))
466 return -EMSGSIZE;
467 return 0;
468}
469EXPORT_SYMBOL_GPL(ip_set_put_extensions);
470
471/* Creating/destroying/renaming/swapping affect the existence and
437 * the properties of a set. All of these can be executed from userspace 472 * the properties of a set. All of these can be executed from userspace
438 * only and serialized by the nfnl mutex indirectly from nfnetlink. 473 * only and serialized by the nfnl mutex indirectly from nfnetlink.
439 * 474 *
@@ -460,8 +495,7 @@ __ip_set_put(struct ip_set *set)
460 write_unlock_bh(&ip_set_ref_lock); 495 write_unlock_bh(&ip_set_ref_lock);
461} 496}
462 497
463/* 498/* Add, del and test set entries from kernel.
464 * Add, del and test set entries from kernel.
465 * 499 *
466 * The set behind the index must exist and must be referenced 500 * The set behind the index must exist and must be referenced
467 * so it can't be destroyed (or changed) under our foot. 501 * so it can't be destroyed (or changed) under our foot.
@@ -489,23 +523,23 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
489 dev_net(par->in ? par->in : par->out), index); 523 dev_net(par->in ? par->in : par->out), index);
490 int ret = 0; 524 int ret = 0;
491 525
492 BUG_ON(set == NULL); 526 BUG_ON(!set);
493 pr_debug("set %s, index %u\n", set->name, index); 527 pr_debug("set %s, index %u\n", set->name, index);
494 528
495 if (opt->dim < set->type->dimension || 529 if (opt->dim < set->type->dimension ||
496 !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) 530 !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
497 return 0; 531 return 0;
498 532
499 read_lock_bh(&set->lock); 533 rcu_read_lock_bh();
500 ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt); 534 ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
501 read_unlock_bh(&set->lock); 535 rcu_read_unlock_bh();
502 536
503 if (ret == -EAGAIN) { 537 if (ret == -EAGAIN) {
504 /* Type requests element to be completed */ 538 /* Type requests element to be completed */
505 pr_debug("element must be completed, ADD is triggered\n"); 539 pr_debug("element must be completed, ADD is triggered\n");
506 write_lock_bh(&set->lock); 540 spin_lock_bh(&set->lock);
507 set->variant->kadt(set, skb, par, IPSET_ADD, opt); 541 set->variant->kadt(set, skb, par, IPSET_ADD, opt);
508 write_unlock_bh(&set->lock); 542 spin_unlock_bh(&set->lock);
509 ret = 1; 543 ret = 1;
510 } else { 544 } else {
511 /* --return-nomatch: invert matched element */ 545 /* --return-nomatch: invert matched element */
@@ -528,16 +562,16 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
528 dev_net(par->in ? par->in : par->out), index); 562 dev_net(par->in ? par->in : par->out), index);
529 int ret; 563 int ret;
530 564
531 BUG_ON(set == NULL); 565 BUG_ON(!set);
532 pr_debug("set %s, index %u\n", set->name, index); 566 pr_debug("set %s, index %u\n", set->name, index);
533 567
534 if (opt->dim < set->type->dimension || 568 if (opt->dim < set->type->dimension ||
535 !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) 569 !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
536 return -IPSET_ERR_TYPE_MISMATCH; 570 return -IPSET_ERR_TYPE_MISMATCH;
537 571
538 write_lock_bh(&set->lock); 572 spin_lock_bh(&set->lock);
539 ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); 573 ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
540 write_unlock_bh(&set->lock); 574 spin_unlock_bh(&set->lock);
541 575
542 return ret; 576 return ret;
543} 577}
@@ -551,23 +585,22 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
551 dev_net(par->in ? par->in : par->out), index); 585 dev_net(par->in ? par->in : par->out), index);
552 int ret = 0; 586 int ret = 0;
553 587
554 BUG_ON(set == NULL); 588 BUG_ON(!set);
555 pr_debug("set %s, index %u\n", set->name, index); 589 pr_debug("set %s, index %u\n", set->name, index);
556 590
557 if (opt->dim < set->type->dimension || 591 if (opt->dim < set->type->dimension ||
558 !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) 592 !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
559 return -IPSET_ERR_TYPE_MISMATCH; 593 return -IPSET_ERR_TYPE_MISMATCH;
560 594
561 write_lock_bh(&set->lock); 595 spin_lock_bh(&set->lock);
562 ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); 596 ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
563 write_unlock_bh(&set->lock); 597 spin_unlock_bh(&set->lock);
564 598
565 return ret; 599 return ret;
566} 600}
567EXPORT_SYMBOL_GPL(ip_set_del); 601EXPORT_SYMBOL_GPL(ip_set_del);
568 602
569/* 603/* Find set by name, reference it once. The reference makes sure the
570 * Find set by name, reference it once. The reference makes sure the
571 * thing pointed to, does not go away under our feet. 604 * thing pointed to, does not go away under our feet.
572 * 605 *
573 */ 606 */
@@ -581,7 +614,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
581 rcu_read_lock(); 614 rcu_read_lock();
582 for (i = 0; i < inst->ip_set_max; i++) { 615 for (i = 0; i < inst->ip_set_max; i++) {
583 s = rcu_dereference(inst->ip_set_list)[i]; 616 s = rcu_dereference(inst->ip_set_list)[i];
584 if (s != NULL && STREQ(s->name, name)) { 617 if (s && STRNCMP(s->name, name)) {
585 __ip_set_get(s); 618 __ip_set_get(s);
586 index = i; 619 index = i;
587 *set = s; 620 *set = s;
@@ -594,8 +627,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
594} 627}
595EXPORT_SYMBOL_GPL(ip_set_get_byname); 628EXPORT_SYMBOL_GPL(ip_set_get_byname);
596 629
597/* 630/* If the given set pointer points to a valid set, decrement
598 * If the given set pointer points to a valid set, decrement
599 * reference count by 1. The caller shall not assume the index 631 * reference count by 1. The caller shall not assume the index
600 * to be valid, after calling this function. 632 * to be valid, after calling this function.
601 * 633 *
@@ -608,7 +640,7 @@ __ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
608 640
609 rcu_read_lock(); 641 rcu_read_lock();
610 set = rcu_dereference(inst->ip_set_list)[index]; 642 set = rcu_dereference(inst->ip_set_list)[index];
611 if (set != NULL) 643 if (set)
612 __ip_set_put(set); 644 __ip_set_put(set);
613 rcu_read_unlock(); 645 rcu_read_unlock();
614} 646}
@@ -622,8 +654,7 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index)
622} 654}
623EXPORT_SYMBOL_GPL(ip_set_put_byindex); 655EXPORT_SYMBOL_GPL(ip_set_put_byindex);
624 656
625/* 657/* Get the name of a set behind a set index.
626 * Get the name of a set behind a set index.
627 * We assume the set is referenced, so it does exist and 658 * We assume the set is referenced, so it does exist and
628 * can't be destroyed. The set cannot be renamed due to 659 * can't be destroyed. The set cannot be renamed due to
629 * the referencing either. 660 * the referencing either.
@@ -634,7 +665,7 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index)
634{ 665{
635 const struct ip_set *set = ip_set_rcu_get(net, index); 666 const struct ip_set *set = ip_set_rcu_get(net, index);
636 667
637 BUG_ON(set == NULL); 668 BUG_ON(!set);
638 BUG_ON(set->ref == 0); 669 BUG_ON(set->ref == 0);
639 670
640 /* Referenced, so it's safe */ 671 /* Referenced, so it's safe */
@@ -642,13 +673,11 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index)
642} 673}
643EXPORT_SYMBOL_GPL(ip_set_name_byindex); 674EXPORT_SYMBOL_GPL(ip_set_name_byindex);
644 675
645/* 676/* Routines to call by external subsystems, which do not
646 * Routines to call by external subsystems, which do not
647 * call nfnl_lock for us. 677 * call nfnl_lock for us.
648 */ 678 */
649 679
650/* 680/* Find set by index, reference it once. The reference makes sure the
651 * Find set by index, reference it once. The reference makes sure the
652 * thing pointed to, does not go away under our feet. 681 * thing pointed to, does not go away under our feet.
653 * 682 *
654 * The nfnl mutex is used in the function. 683 * The nfnl mutex is used in the function.
@@ -674,8 +703,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
674} 703}
675EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex); 704EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
676 705
677/* 706/* If the given set pointer points to a valid set, decrement
678 * If the given set pointer points to a valid set, decrement
679 * reference count by 1. The caller shall not assume the index 707 * reference count by 1. The caller shall not assume the index
680 * to be valid, after calling this function. 708 * to be valid, after calling this function.
681 * 709 *
@@ -690,15 +718,14 @@ ip_set_nfnl_put(struct net *net, ip_set_id_t index)
690 nfnl_lock(NFNL_SUBSYS_IPSET); 718 nfnl_lock(NFNL_SUBSYS_IPSET);
691 if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */ 719 if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
692 set = ip_set(inst, index); 720 set = ip_set(inst, index);
693 if (set != NULL) 721 if (set)
694 __ip_set_put(set); 722 __ip_set_put(set);
695 } 723 }
696 nfnl_unlock(NFNL_SUBSYS_IPSET); 724 nfnl_unlock(NFNL_SUBSYS_IPSET);
697} 725}
698EXPORT_SYMBOL_GPL(ip_set_nfnl_put); 726EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
699 727
700/* 728/* Communication protocol with userspace over netlink.
701 * Communication protocol with userspace over netlink.
702 * 729 *
703 * The commands are serialized by the nfnl mutex. 730 * The commands are serialized by the nfnl mutex.
704 */ 731 */
@@ -725,7 +752,7 @@ start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
725 752
726 nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), 753 nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
727 sizeof(*nfmsg), flags); 754 sizeof(*nfmsg), flags);
728 if (nlh == NULL) 755 if (!nlh)
729 return NULL; 756 return NULL;
730 757
731 nfmsg = nlmsg_data(nlh); 758 nfmsg = nlmsg_data(nlh);
@@ -758,7 +785,7 @@ find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id)
758 *id = IPSET_INVALID_ID; 785 *id = IPSET_INVALID_ID;
759 for (i = 0; i < inst->ip_set_max; i++) { 786 for (i = 0; i < inst->ip_set_max; i++) {
760 set = ip_set(inst, i); 787 set = ip_set(inst, i);
761 if (set != NULL && STREQ(set->name, name)) { 788 if (set && STRNCMP(set->name, name)) {
762 *id = i; 789 *id = i;
763 break; 790 break;
764 } 791 }
@@ -784,10 +811,10 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
784 *index = IPSET_INVALID_ID; 811 *index = IPSET_INVALID_ID;
785 for (i = 0; i < inst->ip_set_max; i++) { 812 for (i = 0; i < inst->ip_set_max; i++) {
786 s = ip_set(inst, i); 813 s = ip_set(inst, i);
787 if (s == NULL) { 814 if (!s) {
788 if (*index == IPSET_INVALID_ID) 815 if (*index == IPSET_INVALID_ID)
789 *index = i; 816 *index = i;
790 } else if (STREQ(name, s->name)) { 817 } else if (STRNCMP(name, s->name)) {
791 /* Name clash */ 818 /* Name clash */
792 *set = s; 819 *set = s;
793 return -EEXIST; 820 return -EEXIST;
@@ -816,18 +843,18 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
816 struct ip_set_net *inst = ip_set_pernet(net); 843 struct ip_set_net *inst = ip_set_pernet(net);
817 struct ip_set *set, *clash = NULL; 844 struct ip_set *set, *clash = NULL;
818 ip_set_id_t index = IPSET_INVALID_ID; 845 ip_set_id_t index = IPSET_INVALID_ID;
819 struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {}; 846 struct nlattr *tb[IPSET_ATTR_CREATE_MAX + 1] = {};
820 const char *name, *typename; 847 const char *name, *typename;
821 u8 family, revision; 848 u8 family, revision;
822 u32 flags = flag_exist(nlh); 849 u32 flags = flag_exist(nlh);
823 int ret = 0; 850 int ret = 0;
824 851
825 if (unlikely(protocol_failed(attr) || 852 if (unlikely(protocol_failed(attr) ||
826 attr[IPSET_ATTR_SETNAME] == NULL || 853 !attr[IPSET_ATTR_SETNAME] ||
827 attr[IPSET_ATTR_TYPENAME] == NULL || 854 !attr[IPSET_ATTR_TYPENAME] ||
828 attr[IPSET_ATTR_REVISION] == NULL || 855 !attr[IPSET_ATTR_REVISION] ||
829 attr[IPSET_ATTR_FAMILY] == NULL || 856 !attr[IPSET_ATTR_FAMILY] ||
830 (attr[IPSET_ATTR_DATA] != NULL && 857 (attr[IPSET_ATTR_DATA] &&
831 !flag_nested(attr[IPSET_ATTR_DATA])))) 858 !flag_nested(attr[IPSET_ATTR_DATA]))))
832 return -IPSET_ERR_PROTOCOL; 859 return -IPSET_ERR_PROTOCOL;
833 860
@@ -838,33 +865,29 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
838 pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n", 865 pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
839 name, typename, family_name(family), revision); 866 name, typename, family_name(family), revision);
840 867
841 /* 868 /* First, and without any locks, allocate and initialize
842 * First, and without any locks, allocate and initialize
843 * a normal base set structure. 869 * a normal base set structure.
844 */ 870 */
845 set = kzalloc(sizeof(struct ip_set), GFP_KERNEL); 871 set = kzalloc(sizeof(*set), GFP_KERNEL);
846 if (!set) 872 if (!set)
847 return -ENOMEM; 873 return -ENOMEM;
848 rwlock_init(&set->lock); 874 spin_lock_init(&set->lock);
849 strlcpy(set->name, name, IPSET_MAXNAMELEN); 875 strlcpy(set->name, name, IPSET_MAXNAMELEN);
850 set->family = family; 876 set->family = family;
851 set->revision = revision; 877 set->revision = revision;
852 878
853 /* 879 /* Next, check that we know the type, and take
854 * Next, check that we know the type, and take
855 * a reference on the type, to make sure it stays available 880 * a reference on the type, to make sure it stays available
856 * while constructing our new set. 881 * while constructing our new set.
857 * 882 *
858 * After referencing the type, we try to create the type 883 * After referencing the type, we try to create the type
859 * specific part of the set without holding any locks. 884 * specific part of the set without holding any locks.
860 */ 885 */
861 ret = find_set_type_get(typename, family, revision, &(set->type)); 886 ret = find_set_type_get(typename, family, revision, &set->type);
862 if (ret) 887 if (ret)
863 goto out; 888 goto out;
864 889
865 /* 890 /* Without holding any locks, create private part. */
866 * Without holding any locks, create private part.
867 */
868 if (attr[IPSET_ATTR_DATA] && 891 if (attr[IPSET_ATTR_DATA] &&
869 nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], 892 nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
870 set->type->create_policy)) { 893 set->type->create_policy)) {
@@ -878,8 +901,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
878 901
879 /* BTW, ret==0 here. */ 902 /* BTW, ret==0 here. */
880 903
881 /* 904 /* Here, we have a valid, constructed set and we are protected
882 * Here, we have a valid, constructed set and we are protected
883 * by the nfnl mutex. Find the first free index in ip_set_list 905 * by the nfnl mutex. Find the first free index in ip_set_list
884 * and check clashing. 906 * and check clashing.
885 */ 907 */
@@ -887,7 +909,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
887 if (ret == -EEXIST) { 909 if (ret == -EEXIST) {
888 /* If this is the same set and requested, ignore error */ 910 /* If this is the same set and requested, ignore error */
889 if ((flags & IPSET_FLAG_EXIST) && 911 if ((flags & IPSET_FLAG_EXIST) &&
890 STREQ(set->type->name, clash->type->name) && 912 STRNCMP(set->type->name, clash->type->name) &&
891 set->type->family == clash->type->family && 913 set->type->family == clash->type->family &&
892 set->type->revision_min == clash->type->revision_min && 914 set->type->revision_min == clash->type->revision_min &&
893 set->type->revision_max == clash->type->revision_max && 915 set->type->revision_max == clash->type->revision_max &&
@@ -902,7 +924,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
902 /* Wraparound */ 924 /* Wraparound */
903 goto cleanup; 925 goto cleanup;
904 926
905 list = kzalloc(sizeof(struct ip_set *) * i, GFP_KERNEL); 927 list = kcalloc(i, sizeof(struct ip_set *), GFP_KERNEL);
906 if (!list) 928 if (!list)
907 goto cleanup; 929 goto cleanup;
908 /* nfnl mutex is held, both lists are valid */ 930 /* nfnl mutex is held, both lists are valid */
@@ -916,12 +938,11 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
916 inst->ip_set_max = i; 938 inst->ip_set_max = i;
917 kfree(tmp); 939 kfree(tmp);
918 ret = 0; 940 ret = 0;
919 } else if (ret) 941 } else if (ret) {
920 goto cleanup; 942 goto cleanup;
943 }
921 944
922 /* 945 /* Finally! Add our shiny new set to the list, and be done. */
923 * Finally! Add our shiny new set to the list, and be done.
924 */
925 pr_debug("create: '%s' created with index %u!\n", set->name, index); 946 pr_debug("create: '%s' created with index %u!\n", set->name, index);
926 ip_set(inst, index) = set; 947 ip_set(inst, index) = set;
927 948
@@ -946,12 +967,9 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
946}; 967};
947 968
948static void 969static void
949ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index) 970ip_set_destroy_set(struct ip_set *set)
950{ 971{
951 struct ip_set *set = ip_set(inst, index);
952
953 pr_debug("set: %s\n", set->name); 972 pr_debug("set: %s\n", set->name);
954 ip_set(inst, index) = NULL;
955 973
956 /* Must call it without holding any lock */ 974 /* Must call it without holding any lock */
957 set->variant->destroy(set); 975 set->variant->destroy(set);
@@ -986,30 +1004,36 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
986 if (!attr[IPSET_ATTR_SETNAME]) { 1004 if (!attr[IPSET_ATTR_SETNAME]) {
987 for (i = 0; i < inst->ip_set_max; i++) { 1005 for (i = 0; i < inst->ip_set_max; i++) {
988 s = ip_set(inst, i); 1006 s = ip_set(inst, i);
989 if (s != NULL && s->ref) { 1007 if (s && s->ref) {
990 ret = -IPSET_ERR_BUSY; 1008 ret = -IPSET_ERR_BUSY;
991 goto out; 1009 goto out;
992 } 1010 }
993 } 1011 }
1012 inst->is_destroyed = true;
994 read_unlock_bh(&ip_set_ref_lock); 1013 read_unlock_bh(&ip_set_ref_lock);
995 for (i = 0; i < inst->ip_set_max; i++) { 1014 for (i = 0; i < inst->ip_set_max; i++) {
996 s = ip_set(inst, i); 1015 s = ip_set(inst, i);
997 if (s != NULL) 1016 if (s) {
998 ip_set_destroy_set(inst, i); 1017 ip_set(inst, i) = NULL;
1018 ip_set_destroy_set(s);
1019 }
999 } 1020 }
1021 /* Modified by ip_set_destroy() only, which is serialized */
1022 inst->is_destroyed = false;
1000 } else { 1023 } else {
1001 s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), 1024 s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
1002 &i); 1025 &i);
1003 if (s == NULL) { 1026 if (!s) {
1004 ret = -ENOENT; 1027 ret = -ENOENT;
1005 goto out; 1028 goto out;
1006 } else if (s->ref) { 1029 } else if (s->ref) {
1007 ret = -IPSET_ERR_BUSY; 1030 ret = -IPSET_ERR_BUSY;
1008 goto out; 1031 goto out;
1009 } 1032 }
1033 ip_set(inst, i) = NULL;
1010 read_unlock_bh(&ip_set_ref_lock); 1034 read_unlock_bh(&ip_set_ref_lock);
1011 1035
1012 ip_set_destroy_set(inst, i); 1036 ip_set_destroy_set(s);
1013 } 1037 }
1014 return 0; 1038 return 0;
1015out: 1039out:
@@ -1024,9 +1048,9 @@ ip_set_flush_set(struct ip_set *set)
1024{ 1048{
1025 pr_debug("set: %s\n", set->name); 1049 pr_debug("set: %s\n", set->name);
1026 1050
1027 write_lock_bh(&set->lock); 1051 spin_lock_bh(&set->lock);
1028 set->variant->flush(set); 1052 set->variant->flush(set);
1029 write_unlock_bh(&set->lock); 1053 spin_unlock_bh(&set->lock);
1030} 1054}
1031 1055
1032static int 1056static int
@@ -1044,12 +1068,12 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
1044 if (!attr[IPSET_ATTR_SETNAME]) { 1068 if (!attr[IPSET_ATTR_SETNAME]) {
1045 for (i = 0; i < inst->ip_set_max; i++) { 1069 for (i = 0; i < inst->ip_set_max; i++) {
1046 s = ip_set(inst, i); 1070 s = ip_set(inst, i);
1047 if (s != NULL) 1071 if (s)
1048 ip_set_flush_set(s); 1072 ip_set_flush_set(s);
1049 } 1073 }
1050 } else { 1074 } else {
1051 s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); 1075 s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1052 if (s == NULL) 1076 if (!s)
1053 return -ENOENT; 1077 return -ENOENT;
1054 1078
1055 ip_set_flush_set(s); 1079 ip_set_flush_set(s);
@@ -1081,12 +1105,12 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
1081 int ret = 0; 1105 int ret = 0;
1082 1106
1083 if (unlikely(protocol_failed(attr) || 1107 if (unlikely(protocol_failed(attr) ||
1084 attr[IPSET_ATTR_SETNAME] == NULL || 1108 !attr[IPSET_ATTR_SETNAME] ||
1085 attr[IPSET_ATTR_SETNAME2] == NULL)) 1109 !attr[IPSET_ATTR_SETNAME2]))
1086 return -IPSET_ERR_PROTOCOL; 1110 return -IPSET_ERR_PROTOCOL;
1087 1111
1088 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); 1112 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1089 if (set == NULL) 1113 if (!set)
1090 return -ENOENT; 1114 return -ENOENT;
1091 1115
1092 read_lock_bh(&ip_set_ref_lock); 1116 read_lock_bh(&ip_set_ref_lock);
@@ -1098,7 +1122,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
1098 name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); 1122 name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
1099 for (i = 0; i < inst->ip_set_max; i++) { 1123 for (i = 0; i < inst->ip_set_max; i++) {
1100 s = ip_set(inst, i); 1124 s = ip_set(inst, i);
1101 if (s != NULL && STREQ(s->name, name2)) { 1125 if (s && STRNCMP(s->name, name2)) {
1102 ret = -IPSET_ERR_EXIST_SETNAME2; 1126 ret = -IPSET_ERR_EXIST_SETNAME2;
1103 goto out; 1127 goto out;
1104 } 1128 }
@@ -1130,23 +1154,24 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
1130 char from_name[IPSET_MAXNAMELEN]; 1154 char from_name[IPSET_MAXNAMELEN];
1131 1155
1132 if (unlikely(protocol_failed(attr) || 1156 if (unlikely(protocol_failed(attr) ||
1133 attr[IPSET_ATTR_SETNAME] == NULL || 1157 !attr[IPSET_ATTR_SETNAME] ||
1134 attr[IPSET_ATTR_SETNAME2] == NULL)) 1158 !attr[IPSET_ATTR_SETNAME2]))
1135 return -IPSET_ERR_PROTOCOL; 1159 return -IPSET_ERR_PROTOCOL;
1136 1160
1137 from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), 1161 from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
1138 &from_id); 1162 &from_id);
1139 if (from == NULL) 1163 if (!from)
1140 return -ENOENT; 1164 return -ENOENT;
1141 1165
1142 to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]), 1166 to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]),
1143 &to_id); 1167 &to_id);
1144 if (to == NULL) 1168 if (!to)
1145 return -IPSET_ERR_EXIST_SETNAME2; 1169 return -IPSET_ERR_EXIST_SETNAME2;
1146 1170
1147 /* Features must not change. 1171 /* Features must not change.
1148 * Not an artificial restriction anymore, as we must prevent 1172 * Not an artifical restriction anymore, as we must prevent
1149 * possible loops created by swapping in setlist type of sets. */ 1173 * possible loops created by swapping in setlist type of sets.
1174 */
1150 if (!(from->type->features == to->type->features && 1175 if (!(from->type->features == to->type->features &&
1151 from->family == to->family)) 1176 from->family == to->family))
1152 return -IPSET_ERR_TYPE_MISMATCH; 1177 return -IPSET_ERR_TYPE_MISMATCH;
@@ -1177,12 +1202,16 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
1177static int 1202static int
1178ip_set_dump_done(struct netlink_callback *cb) 1203ip_set_dump_done(struct netlink_callback *cb)
1179{ 1204{
1180 struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET];
1181 if (cb->args[IPSET_CB_ARG0]) { 1205 if (cb->args[IPSET_CB_ARG0]) {
1182 pr_debug("release set %s\n", 1206 struct ip_set_net *inst =
1183 ip_set(inst, cb->args[IPSET_CB_INDEX])->name); 1207 (struct ip_set_net *)cb->args[IPSET_CB_NET];
1184 __ip_set_put_byindex(inst, 1208 ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
1185 (ip_set_id_t) cb->args[IPSET_CB_INDEX]); 1209 struct ip_set *set = ip_set(inst, index);
1210
1211 if (set->variant->uref)
1212 set->variant->uref(set, cb, false);
1213 pr_debug("release set %s\n", set->name);
1214 __ip_set_put_byindex(inst, index);
1186 } 1215 }
1187 return 0; 1216 return 0;
1188} 1217}
@@ -1204,7 +1233,7 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
1204{ 1233{
1205 struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); 1234 struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
1206 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); 1235 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
1207 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; 1236 struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
1208 struct nlattr *attr = (void *)nlh + min_len; 1237 struct nlattr *attr = (void *)nlh + min_len;
1209 u32 dump_type; 1238 u32 dump_type;
1210 ip_set_id_t index; 1239 ip_set_id_t index;
@@ -1213,27 +1242,23 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
1213 nla_parse(cda, IPSET_ATTR_CMD_MAX, 1242 nla_parse(cda, IPSET_ATTR_CMD_MAX,
1214 attr, nlh->nlmsg_len - min_len, ip_set_setname_policy); 1243 attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
1215 1244
1216 /* cb->args[IPSET_CB_NET]: net namespace
1217 * [IPSET_CB_DUMP]: dump single set/all sets
1218 * [IPSET_CB_INDEX]: set index
1219 * [IPSET_CB_ARG0]: type specific
1220 */
1221
1222 if (cda[IPSET_ATTR_SETNAME]) { 1245 if (cda[IPSET_ATTR_SETNAME]) {
1223 struct ip_set *set; 1246 struct ip_set *set;
1224 1247
1225 set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]), 1248 set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]),
1226 &index); 1249 &index);
1227 if (set == NULL) 1250 if (!set)
1228 return -ENOENT; 1251 return -ENOENT;
1229 1252
1230 dump_type = DUMP_ONE; 1253 dump_type = DUMP_ONE;
1231 cb->args[IPSET_CB_INDEX] = index; 1254 cb->args[IPSET_CB_INDEX] = index;
1232 } else 1255 } else {
1233 dump_type = DUMP_ALL; 1256 dump_type = DUMP_ALL;
1257 }
1234 1258
1235 if (cda[IPSET_ATTR_FLAGS]) { 1259 if (cda[IPSET_ATTR_FLAGS]) {
1236 u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]); 1260 u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
1261
1237 dump_type |= (f << 16); 1262 dump_type |= (f << 16);
1238 } 1263 }
1239 cb->args[IPSET_CB_NET] = (unsigned long)inst; 1264 cb->args[IPSET_CB_NET] = (unsigned long)inst;
@@ -1251,6 +1276,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
1251 unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; 1276 unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
1252 struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk)); 1277 struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk));
1253 u32 dump_type, dump_flags; 1278 u32 dump_type, dump_flags;
1279 bool is_destroyed;
1254 int ret = 0; 1280 int ret = 0;
1255 1281
1256 if (!cb->args[IPSET_CB_DUMP]) { 1282 if (!cb->args[IPSET_CB_DUMP]) {
@@ -1258,7 +1284,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
1258 if (ret < 0) { 1284 if (ret < 0) {
1259 nlh = nlmsg_hdr(cb->skb); 1285 nlh = nlmsg_hdr(cb->skb);
1260 /* We have to create and send the error message 1286 /* We have to create and send the error message
1261 * manually :-( */ 1287 * manually :-(
1288 */
1262 if (nlh->nlmsg_flags & NLM_F_ACK) 1289 if (nlh->nlmsg_flags & NLM_F_ACK)
1263 netlink_ack(cb->skb, nlh, ret); 1290 netlink_ack(cb->skb, nlh, ret);
1264 return ret; 1291 return ret;
@@ -1276,13 +1303,21 @@ dump_last:
1276 pr_debug("dump type, flag: %u %u index: %ld\n", 1303 pr_debug("dump type, flag: %u %u index: %ld\n",
1277 dump_type, dump_flags, cb->args[IPSET_CB_INDEX]); 1304 dump_type, dump_flags, cb->args[IPSET_CB_INDEX]);
1278 for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) { 1305 for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) {
1279 index = (ip_set_id_t) cb->args[IPSET_CB_INDEX]; 1306 index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
1307 write_lock_bh(&ip_set_ref_lock);
1280 set = ip_set(inst, index); 1308 set = ip_set(inst, index);
1281 if (set == NULL) { 1309 is_destroyed = inst->is_destroyed;
1310 if (!set || is_destroyed) {
1311 write_unlock_bh(&ip_set_ref_lock);
1282 if (dump_type == DUMP_ONE) { 1312 if (dump_type == DUMP_ONE) {
1283 ret = -ENOENT; 1313 ret = -ENOENT;
1284 goto out; 1314 goto out;
1285 } 1315 }
1316 if (is_destroyed) {
1317 /* All sets are just being destroyed */
1318 ret = 0;
1319 goto out;
1320 }
1286 continue; 1321 continue;
1287 } 1322 }
1288 /* When dumping all sets, we must dump "sorted" 1323 /* When dumping all sets, we must dump "sorted"
@@ -1290,14 +1325,17 @@ dump_last:
1290 */ 1325 */
1291 if (dump_type != DUMP_ONE && 1326 if (dump_type != DUMP_ONE &&
1292 ((dump_type == DUMP_ALL) == 1327 ((dump_type == DUMP_ALL) ==
1293 !!(set->type->features & IPSET_DUMP_LAST))) 1328 !!(set->type->features & IPSET_DUMP_LAST))) {
1329 write_unlock_bh(&ip_set_ref_lock);
1294 continue; 1330 continue;
1331 }
1295 pr_debug("List set: %s\n", set->name); 1332 pr_debug("List set: %s\n", set->name);
1296 if (!cb->args[IPSET_CB_ARG0]) { 1333 if (!cb->args[IPSET_CB_ARG0]) {
1297 /* Start listing: make sure set won't be destroyed */ 1334 /* Start listing: make sure set won't be destroyed */
1298 pr_debug("reference set\n"); 1335 pr_debug("reference set\n");
1299 __ip_set_get(set); 1336 set->ref++;
1300 } 1337 }
1338 write_unlock_bh(&ip_set_ref_lock);
1301 nlh = start_msg(skb, NETLINK_CB(cb->skb).portid, 1339 nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
1302 cb->nlh->nlmsg_seq, flags, 1340 cb->nlh->nlmsg_seq, flags,
1303 IPSET_CMD_LIST); 1341 IPSET_CMD_LIST);
@@ -1325,11 +1363,13 @@ dump_last:
1325 goto release_refcount; 1363 goto release_refcount;
1326 if (dump_flags & IPSET_FLAG_LIST_HEADER) 1364 if (dump_flags & IPSET_FLAG_LIST_HEADER)
1327 goto next_set; 1365 goto next_set;
1366 if (set->variant->uref)
1367 set->variant->uref(set, cb, true);
1328 /* Fall through and add elements */ 1368 /* Fall through and add elements */
1329 default: 1369 default:
1330 read_lock_bh(&set->lock); 1370 rcu_read_lock_bh();
1331 ret = set->variant->list(set, skb, cb); 1371 ret = set->variant->list(set, skb, cb);
1332 read_unlock_bh(&set->lock); 1372 rcu_read_unlock_bh();
1333 if (!cb->args[IPSET_CB_ARG0]) 1373 if (!cb->args[IPSET_CB_ARG0])
1334 /* Set is done, proceed with next one */ 1374 /* Set is done, proceed with next one */
1335 goto next_set; 1375 goto next_set;
@@ -1341,6 +1381,8 @@ dump_last:
1341 dump_type = DUMP_LAST; 1381 dump_type = DUMP_LAST;
1342 cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16); 1382 cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16);
1343 cb->args[IPSET_CB_INDEX] = 0; 1383 cb->args[IPSET_CB_INDEX] = 0;
1384 if (set && set->variant->uref)
1385 set->variant->uref(set, cb, false);
1344 goto dump_last; 1386 goto dump_last;
1345 } 1387 }
1346 goto out; 1388 goto out;
@@ -1355,7 +1397,10 @@ next_set:
1355release_refcount: 1397release_refcount:
1356 /* If there was an error or set is done, release set */ 1398 /* If there was an error or set is done, release set */
1357 if (ret || !cb->args[IPSET_CB_ARG0]) { 1399 if (ret || !cb->args[IPSET_CB_ARG0]) {
1358 pr_debug("release set %s\n", ip_set(inst, index)->name); 1400 set = ip_set(inst, index);
1401 if (set->variant->uref)
1402 set->variant->uref(set, cb, false);
1403 pr_debug("release set %s\n", set->name);
1359 __ip_set_put_byindex(inst, index); 1404 __ip_set_put_byindex(inst, index);
1360 cb->args[IPSET_CB_ARG0] = 0; 1405 cb->args[IPSET_CB_ARG0] = 0;
1361 } 1406 }
@@ -1407,9 +1452,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
1407 bool eexist = flags & IPSET_FLAG_EXIST, retried = false; 1452 bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
1408 1453
1409 do { 1454 do {
1410 write_lock_bh(&set->lock); 1455 spin_lock_bh(&set->lock);
1411 ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); 1456 ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
1412 write_unlock_bh(&set->lock); 1457 spin_unlock_bh(&set->lock);
1413 retried = true; 1458 retried = true;
1414 } while (ret == -EAGAIN && 1459 } while (ret == -EAGAIN &&
1415 set->variant->resize && 1460 set->variant->resize &&
@@ -1425,12 +1470,12 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
1425 size_t payload = min(SIZE_MAX, 1470 size_t payload = min(SIZE_MAX,
1426 sizeof(*errmsg) + nlmsg_len(nlh)); 1471 sizeof(*errmsg) + nlmsg_len(nlh));
1427 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); 1472 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
1428 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; 1473 struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
1429 struct nlattr *cmdattr; 1474 struct nlattr *cmdattr;
1430 u32 *errline; 1475 u32 *errline;
1431 1476
1432 skb2 = nlmsg_new(payload, GFP_KERNEL); 1477 skb2 = nlmsg_new(payload, GFP_KERNEL);
1433 if (skb2 == NULL) 1478 if (!skb2)
1434 return -ENOMEM; 1479 return -ENOMEM;
1435 rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid, 1480 rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid,
1436 nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); 1481 nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
@@ -1447,7 +1492,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
1447 1492
1448 *errline = lineno; 1493 *errline = lineno;
1449 1494
1450 netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); 1495 netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid,
1496 MSG_DONTWAIT);
1451 /* Signal netlink not to send its ACK/errmsg. */ 1497 /* Signal netlink not to send its ACK/errmsg. */
1452 return -EINTR; 1498 return -EINTR;
1453 } 1499 }
@@ -1462,25 +1508,25 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
1462{ 1508{
1463 struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); 1509 struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
1464 struct ip_set *set; 1510 struct ip_set *set;
1465 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; 1511 struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
1466 const struct nlattr *nla; 1512 const struct nlattr *nla;
1467 u32 flags = flag_exist(nlh); 1513 u32 flags = flag_exist(nlh);
1468 bool use_lineno; 1514 bool use_lineno;
1469 int ret = 0; 1515 int ret = 0;
1470 1516
1471 if (unlikely(protocol_failed(attr) || 1517 if (unlikely(protocol_failed(attr) ||
1472 attr[IPSET_ATTR_SETNAME] == NULL || 1518 !attr[IPSET_ATTR_SETNAME] ||
1473 !((attr[IPSET_ATTR_DATA] != NULL) ^ 1519 !((attr[IPSET_ATTR_DATA] != NULL) ^
1474 (attr[IPSET_ATTR_ADT] != NULL)) || 1520 (attr[IPSET_ATTR_ADT] != NULL)) ||
1475 (attr[IPSET_ATTR_DATA] != NULL && 1521 (attr[IPSET_ATTR_DATA] &&
1476 !flag_nested(attr[IPSET_ATTR_DATA])) || 1522 !flag_nested(attr[IPSET_ATTR_DATA])) ||
1477 (attr[IPSET_ATTR_ADT] != NULL && 1523 (attr[IPSET_ATTR_ADT] &&
1478 (!flag_nested(attr[IPSET_ATTR_ADT]) || 1524 (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1479 attr[IPSET_ATTR_LINENO] == NULL)))) 1525 !attr[IPSET_ATTR_LINENO]))))
1480 return -IPSET_ERR_PROTOCOL; 1526 return -IPSET_ERR_PROTOCOL;
1481 1527
1482 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); 1528 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1483 if (set == NULL) 1529 if (!set)
1484 return -ENOENT; 1530 return -ENOENT;
1485 1531
1486 use_lineno = !!attr[IPSET_ATTR_LINENO]; 1532 use_lineno = !!attr[IPSET_ATTR_LINENO];
@@ -1517,25 +1563,25 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
1517{ 1563{
1518 struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); 1564 struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
1519 struct ip_set *set; 1565 struct ip_set *set;
1520 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; 1566 struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
1521 const struct nlattr *nla; 1567 const struct nlattr *nla;
1522 u32 flags = flag_exist(nlh); 1568 u32 flags = flag_exist(nlh);
1523 bool use_lineno; 1569 bool use_lineno;
1524 int ret = 0; 1570 int ret = 0;
1525 1571
1526 if (unlikely(protocol_failed(attr) || 1572 if (unlikely(protocol_failed(attr) ||
1527 attr[IPSET_ATTR_SETNAME] == NULL || 1573 !attr[IPSET_ATTR_SETNAME] ||
1528 !((attr[IPSET_ATTR_DATA] != NULL) ^ 1574 !((attr[IPSET_ATTR_DATA] != NULL) ^
1529 (attr[IPSET_ATTR_ADT] != NULL)) || 1575 (attr[IPSET_ATTR_ADT] != NULL)) ||
1530 (attr[IPSET_ATTR_DATA] != NULL && 1576 (attr[IPSET_ATTR_DATA] &&
1531 !flag_nested(attr[IPSET_ATTR_DATA])) || 1577 !flag_nested(attr[IPSET_ATTR_DATA])) ||
1532 (attr[IPSET_ATTR_ADT] != NULL && 1578 (attr[IPSET_ATTR_ADT] &&
1533 (!flag_nested(attr[IPSET_ATTR_ADT]) || 1579 (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1534 attr[IPSET_ATTR_LINENO] == NULL)))) 1580 !attr[IPSET_ATTR_LINENO]))))
1535 return -IPSET_ERR_PROTOCOL; 1581 return -IPSET_ERR_PROTOCOL;
1536 1582
1537 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); 1583 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1538 if (set == NULL) 1584 if (!set)
1539 return -ENOENT; 1585 return -ENOENT;
1540 1586
1541 use_lineno = !!attr[IPSET_ATTR_LINENO]; 1587 use_lineno = !!attr[IPSET_ATTR_LINENO];
@@ -1572,26 +1618,26 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
1572{ 1618{
1573 struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); 1619 struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
1574 struct ip_set *set; 1620 struct ip_set *set;
1575 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; 1621 struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
1576 int ret = 0; 1622 int ret = 0;
1577 1623
1578 if (unlikely(protocol_failed(attr) || 1624 if (unlikely(protocol_failed(attr) ||
1579 attr[IPSET_ATTR_SETNAME] == NULL || 1625 !attr[IPSET_ATTR_SETNAME] ||
1580 attr[IPSET_ATTR_DATA] == NULL || 1626 !attr[IPSET_ATTR_DATA] ||
1581 !flag_nested(attr[IPSET_ATTR_DATA]))) 1627 !flag_nested(attr[IPSET_ATTR_DATA])))
1582 return -IPSET_ERR_PROTOCOL; 1628 return -IPSET_ERR_PROTOCOL;
1583 1629
1584 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); 1630 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1585 if (set == NULL) 1631 if (!set)
1586 return -ENOENT; 1632 return -ENOENT;
1587 1633
1588 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], 1634 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
1589 set->type->adt_policy)) 1635 set->type->adt_policy))
1590 return -IPSET_ERR_PROTOCOL; 1636 return -IPSET_ERR_PROTOCOL;
1591 1637
1592 read_lock_bh(&set->lock); 1638 rcu_read_lock_bh();
1593 ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0); 1639 ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0);
1594 read_unlock_bh(&set->lock); 1640 rcu_read_unlock_bh();
1595 /* Userspace can't trigger element to be re-added */ 1641 /* Userspace can't trigger element to be re-added */
1596 if (ret == -EAGAIN) 1642 if (ret == -EAGAIN)
1597 ret = 1; 1643 ret = 1;
@@ -1613,15 +1659,15 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
1613 int ret = 0; 1659 int ret = 0;
1614 1660
1615 if (unlikely(protocol_failed(attr) || 1661 if (unlikely(protocol_failed(attr) ||
1616 attr[IPSET_ATTR_SETNAME] == NULL)) 1662 !attr[IPSET_ATTR_SETNAME]))
1617 return -IPSET_ERR_PROTOCOL; 1663 return -IPSET_ERR_PROTOCOL;
1618 1664
1619 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); 1665 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1620 if (set == NULL) 1666 if (!set)
1621 return -ENOENT; 1667 return -ENOENT;
1622 1668
1623 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1669 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1624 if (skb2 == NULL) 1670 if (!skb2)
1625 return -ENOMEM; 1671 return -ENOMEM;
1626 1672
1627 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 1673 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
@@ -1670,8 +1716,8 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb,
1670 int ret = 0; 1716 int ret = 0;
1671 1717
1672 if (unlikely(protocol_failed(attr) || 1718 if (unlikely(protocol_failed(attr) ||
1673 attr[IPSET_ATTR_TYPENAME] == NULL || 1719 !attr[IPSET_ATTR_TYPENAME] ||
1674 attr[IPSET_ATTR_FAMILY] == NULL)) 1720 !attr[IPSET_ATTR_FAMILY]))
1675 return -IPSET_ERR_PROTOCOL; 1721 return -IPSET_ERR_PROTOCOL;
1676 1722
1677 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); 1723 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
@@ -1681,7 +1727,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb,
1681 return ret; 1727 return ret;
1682 1728
1683 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1729 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1684 if (skb2 == NULL) 1730 if (!skb2)
1685 return -ENOMEM; 1731 return -ENOMEM;
1686 1732
1687 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 1733 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
@@ -1726,11 +1772,11 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
1726 struct nlmsghdr *nlh2; 1772 struct nlmsghdr *nlh2;
1727 int ret = 0; 1773 int ret = 0;
1728 1774
1729 if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL)) 1775 if (unlikely(!attr[IPSET_ATTR_PROTOCOL]))
1730 return -IPSET_ERR_PROTOCOL; 1776 return -IPSET_ERR_PROTOCOL;
1731 1777
1732 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1778 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1733 if (skb2 == NULL) 1779 if (!skb2)
1734 return -ENOMEM; 1780 return -ENOMEM;
1735 1781
1736 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 1782 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
@@ -1858,7 +1904,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
1858 ret = -EFAULT; 1904 ret = -EFAULT;
1859 goto done; 1905 goto done;
1860 } 1906 }
1861 op = (unsigned int *) data; 1907 op = (unsigned int *)data;
1862 1908
1863 if (*op < IP_SET_OP_VERSION) { 1909 if (*op < IP_SET_OP_VERSION) {
1864 /* Check the version at the beginning of operations */ 1910 /* Check the version at the beginning of operations */
@@ -1970,10 +2016,11 @@ ip_set_net_init(struct net *net)
1970 if (inst->ip_set_max >= IPSET_INVALID_ID) 2016 if (inst->ip_set_max >= IPSET_INVALID_ID)
1971 inst->ip_set_max = IPSET_INVALID_ID - 1; 2017 inst->ip_set_max = IPSET_INVALID_ID - 1;
1972 2018
1973 list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL); 2019 list = kcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL);
1974 if (!list) 2020 if (!list)
1975 return -ENOMEM; 2021 return -ENOMEM;
1976 inst->is_deleted = 0; 2022 inst->is_deleted = false;
2023 inst->is_destroyed = false;
1977 rcu_assign_pointer(inst->ip_set_list, list); 2024 rcu_assign_pointer(inst->ip_set_list, list);
1978 return 0; 2025 return 0;
1979} 2026}
@@ -1986,12 +2033,14 @@ ip_set_net_exit(struct net *net)
1986 struct ip_set *set = NULL; 2033 struct ip_set *set = NULL;
1987 ip_set_id_t i; 2034 ip_set_id_t i;
1988 2035
1989 inst->is_deleted = 1; /* flag for ip_set_nfnl_put */ 2036 inst->is_deleted = true; /* flag for ip_set_nfnl_put */
1990 2037
1991 for (i = 0; i < inst->ip_set_max; i++) { 2038 for (i = 0; i < inst->ip_set_max; i++) {
1992 set = ip_set(inst, i); 2039 set = ip_set(inst, i);
1993 if (set != NULL) 2040 if (set) {
1994 ip_set_destroy_set(inst, i); 2041 ip_set(inst, i) = NULL;
2042 ip_set_destroy_set(set);
2043 }
1995 } 2044 }
1996 kfree(rcu_dereference_protected(inst->ip_set_list, 1)); 2045 kfree(rcu_dereference_protected(inst->ip_set_list, 1));
1997} 2046}
@@ -2003,11 +2052,11 @@ static struct pernet_operations ip_set_net_ops = {
2003 .size = sizeof(struct ip_set_net) 2052 .size = sizeof(struct ip_set_net)
2004}; 2053};
2005 2054
2006
2007static int __init 2055static int __init
2008ip_set_init(void) 2056ip_set_init(void)
2009{ 2057{
2010 int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); 2058 int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
2059
2011 if (ret != 0) { 2060 if (ret != 0) {
2012 pr_err("ip_set: cannot register with nfnetlink.\n"); 2061 pr_err("ip_set: cannot register with nfnetlink.\n");
2013 return ret; 2062 return ret;
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 29fb01ddff93..42c3e3ba1b94 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -30,7 +30,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
30 const struct tcphdr *th; 30 const struct tcphdr *th;
31 31
32 th = skb_header_pointer(skb, protooff, sizeof(_tcph), &_tcph); 32 th = skb_header_pointer(skb, protooff, sizeof(_tcph), &_tcph);
33 if (th == NULL) 33 if (!th)
34 /* No choice either */ 34 /* No choice either */
35 return false; 35 return false;
36 36
@@ -42,7 +42,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
42 const sctp_sctphdr_t *sh; 42 const sctp_sctphdr_t *sh;
43 43
44 sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh); 44 sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh);
45 if (sh == NULL) 45 if (!sh)
46 /* No choice either */ 46 /* No choice either */
47 return false; 47 return false;
48 48
@@ -55,7 +55,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
55 const struct udphdr *uh; 55 const struct udphdr *uh;
56 56
57 uh = skb_header_pointer(skb, protooff, sizeof(_udph), &_udph); 57 uh = skb_header_pointer(skb, protooff, sizeof(_udph), &_udph);
58 if (uh == NULL) 58 if (!uh)
59 /* No choice either */ 59 /* No choice either */
60 return false; 60 return false;
61 61
@@ -67,7 +67,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
67 const struct icmphdr *ic; 67 const struct icmphdr *ic;
68 68
69 ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich); 69 ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
70 if (ic == NULL) 70 if (!ic)
71 return false; 71 return false;
72 72
73 *port = (__force __be16)htons((ic->type << 8) | ic->code); 73 *port = (__force __be16)htons((ic->type << 8) | ic->code);
@@ -78,7 +78,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
78 const struct icmp6hdr *ic; 78 const struct icmp6hdr *ic;
79 79
80 ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich); 80 ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
81 if (ic == NULL) 81 if (!ic)
82 return false; 82 return false;
83 83
84 *port = (__force __be16) 84 *port = (__force __be16)
@@ -98,7 +98,7 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
98 __be16 *port, u8 *proto) 98 __be16 *port, u8 *proto)
99{ 99{
100 const struct iphdr *iph = ip_hdr(skb); 100 const struct iphdr *iph = ip_hdr(skb);
101 unsigned int protooff = ip_hdrlen(skb); 101 unsigned int protooff = skb_network_offset(skb) + ip_hdrlen(skb);
102 int protocol = iph->protocol; 102 int protocol = iph->protocol;
103 103
104 /* See comments at tcp_match in ip_tables.c */ 104 /* See comments at tcp_match in ip_tables.c */
@@ -116,7 +116,8 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
116 return false; 116 return false;
117 default: 117 default:
118 /* Other protocols doesn't have ports, 118 /* Other protocols doesn't have ports,
119 so we can match fragments */ 119 * so we can match fragments.
120 */
120 *proto = protocol; 121 *proto = protocol;
121 return true; 122 return true;
122 } 123 }
@@ -135,7 +136,9 @@ ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
135 __be16 frag_off = 0; 136 __be16 frag_off = 0;
136 137
137 nexthdr = ipv6_hdr(skb)->nexthdr; 138 nexthdr = ipv6_hdr(skb)->nexthdr;
138 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, 139 protoff = ipv6_skip_exthdr(skb,
140 skb_network_offset(skb) +
141 sizeof(struct ipv6hdr), &nexthdr,
139 &frag_off); 142 &frag_off);
140 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) 143 if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
141 return false; 144 return false;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 974ff386db0f..afe905c208af 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -10,19 +10,19 @@
10 10
11#include <linux/rcupdate.h> 11#include <linux/rcupdate.h>
12#include <linux/jhash.h> 12#include <linux/jhash.h>
13#include <linux/types.h>
13#include <linux/netfilter/ipset/ip_set_timeout.h> 14#include <linux/netfilter/ipset/ip_set_timeout.h>
14#ifndef rcu_dereference_bh 15
15#define rcu_dereference_bh(p) rcu_dereference(p) 16#define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c)
16#endif 17#define ipset_dereference_protected(p, set) \
18 __ipset_dereference_protected(p, spin_is_locked(&(set)->lock))
17 19
18#define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1) 20#define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1)
19 21
20/* Hashing which uses arrays to resolve clashing. The hash table is resized 22/* Hashing which uses arrays to resolve clashing. The hash table is resized
21 * (doubled) when searching becomes too long. 23 * (doubled) when searching becomes too long.
22 * Internally jhash is used with the assumption that the size of the 24 * Internally jhash is used with the assumption that the size of the
23 * stored data is a multiple of sizeof(u32). If storage supports timeout, 25 * stored data is a multiple of sizeof(u32).
24 * the timeout field must be the last one in the data structure - that field
25 * is ignored when computing the hash key.
26 * 26 *
27 * Readers and resizing 27 * Readers and resizing
28 * 28 *
@@ -35,7 +35,9 @@
35/* Number of elements to store in an initial array block */ 35/* Number of elements to store in an initial array block */
36#define AHASH_INIT_SIZE 4 36#define AHASH_INIT_SIZE 4
37/* Max number of elements to store in an array block */ 37/* Max number of elements to store in an array block */
38#define AHASH_MAX_SIZE (3*AHASH_INIT_SIZE) 38#define AHASH_MAX_SIZE (3 * AHASH_INIT_SIZE)
39/* Max muber of elements in the array block when tuned */
40#define AHASH_MAX_TUNED 64
39 41
40/* Max number of elements can be tuned */ 42/* Max number of elements can be tuned */
41#ifdef IP_SET_HASH_WITH_MULTI 43#ifdef IP_SET_HASH_WITH_MULTI
@@ -53,8 +55,9 @@ tune_ahash_max(u8 curr, u32 multi)
53 /* Currently, at listing one hash bucket must fit into a message. 55 /* Currently, at listing one hash bucket must fit into a message.
54 * Therefore we have a hard limit here. 56 * Therefore we have a hard limit here.
55 */ 57 */
56 return n > curr && n <= 64 ? n : curr; 58 return n > curr && n <= AHASH_MAX_TUNED ? n : curr;
57} 59}
60
58#define TUNE_AHASH_MAX(h, multi) \ 61#define TUNE_AHASH_MAX(h, multi) \
59 ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi)) 62 ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
60#else 63#else
@@ -64,18 +67,23 @@ tune_ahash_max(u8 curr, u32 multi)
64 67
65/* A hash bucket */ 68/* A hash bucket */
66struct hbucket { 69struct hbucket {
67 void *value; /* the array of the values */ 70 struct rcu_head rcu; /* for call_rcu_bh */
71 /* Which positions are used in the array */
72 DECLARE_BITMAP(used, AHASH_MAX_TUNED);
68 u8 size; /* size of the array */ 73 u8 size; /* size of the array */
69 u8 pos; /* position of the first free entry */ 74 u8 pos; /* position of the first free entry */
70}; 75 unsigned char value[0]; /* the array of the values */
76} __attribute__ ((aligned));
71 77
72/* The hash table: the table size stored here in order to make resizing easy */ 78/* The hash table: the table size stored here in order to make resizing easy */
73struct htable { 79struct htable {
80 atomic_t ref; /* References for resizing */
81 atomic_t uref; /* References for dumping */
74 u8 htable_bits; /* size of hash table == 2^htable_bits */ 82 u8 htable_bits; /* size of hash table == 2^htable_bits */
75 struct hbucket bucket[0]; /* hashtable buckets */ 83 struct hbucket __rcu *bucket[0]; /* hashtable buckets */
76}; 84};
77 85
78#define hbucket(h, i) (&((h)->bucket[i])) 86#define hbucket(h, i) ((h)->bucket[i])
79 87
80#ifndef IPSET_NET_COUNT 88#ifndef IPSET_NET_COUNT
81#define IPSET_NET_COUNT 1 89#define IPSET_NET_COUNT 1
@@ -83,8 +91,8 @@ struct htable {
83 91
84/* Book-keeping of the prefixes added to the set */ 92/* Book-keeping of the prefixes added to the set */
85struct net_prefixes { 93struct net_prefixes {
86 u32 nets[IPSET_NET_COUNT]; /* number of elements per cidr */ 94 u32 nets[IPSET_NET_COUNT]; /* number of elements for this cidr */
87 u8 cidr[IPSET_NET_COUNT]; /* the different cidr values in the set */ 95 u8 cidr[IPSET_NET_COUNT]; /* the cidr value */
88}; 96};
89 97
90/* Compute the hash table size */ 98/* Compute the hash table size */
@@ -97,11 +105,11 @@ htable_size(u8 hbits)
97 if (hbits > 31) 105 if (hbits > 31)
98 return 0; 106 return 0;
99 hsize = jhash_size(hbits); 107 hsize = jhash_size(hbits);
100 if ((((size_t)-1) - sizeof(struct htable))/sizeof(struct hbucket) 108 if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *)
101 < hsize) 109 < hsize)
102 return 0; 110 return 0;
103 111
104 return hsize * sizeof(struct hbucket) + sizeof(struct htable); 112 return hsize * sizeof(struct hbucket *) + sizeof(struct htable);
105} 113}
106 114
107/* Compute htable_bits from the user input parameter hashsize */ 115/* Compute htable_bits from the user input parameter hashsize */
@@ -110,6 +118,7 @@ htable_bits(u32 hashsize)
110{ 118{
111 /* Assume that hashsize == 2^htable_bits */ 119 /* Assume that hashsize == 2^htable_bits */
112 u8 bits = fls(hashsize - 1); 120 u8 bits = fls(hashsize - 1);
121
113 if (jhash_size(bits) != hashsize) 122 if (jhash_size(bits) != hashsize)
114 /* Round up to the first 2^n value */ 123 /* Round up to the first 2^n value */
115 bits = fls(hashsize); 124 bits = fls(hashsize);
@@ -117,30 +126,6 @@ htable_bits(u32 hashsize)
117 return bits; 126 return bits;
118} 127}
119 128
120static int
121hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
122{
123 if (n->pos >= n->size) {
124 void *tmp;
125
126 if (n->size >= ahash_max)
127 /* Trigger rehashing */
128 return -EAGAIN;
129
130 tmp = kzalloc((n->size + AHASH_INIT_SIZE) * dsize,
131 GFP_ATOMIC);
132 if (!tmp)
133 return -ENOMEM;
134 if (n->size) {
135 memcpy(tmp, n->value, n->size * dsize);
136 kfree(n->value);
137 }
138 n->value = tmp;
139 n->size += AHASH_INIT_SIZE;
140 }
141 return 0;
142}
143
144#ifdef IP_SET_HASH_WITH_NETS 129#ifdef IP_SET_HASH_WITH_NETS
145#if IPSET_NET_COUNT > 1 130#if IPSET_NET_COUNT > 1
146#define __CIDR(cidr, i) (cidr[i]) 131#define __CIDR(cidr, i) (cidr[i])
@@ -149,17 +134,21 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
149#endif 134#endif
150 135
151/* cidr + 1 is stored in net_prefixes to support /0 */ 136/* cidr + 1 is stored in net_prefixes to support /0 */
152#define SCIDR(cidr, i) (__CIDR(cidr, i) + 1) 137#define NCIDR_PUT(cidr) ((cidr) + 1)
138#define NCIDR_GET(cidr) ((cidr) - 1)
153 139
154#ifdef IP_SET_HASH_WITH_NETS_PACKED 140#ifdef IP_SET_HASH_WITH_NETS_PACKED
155/* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */ 141/* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */
156#define GCIDR(cidr, i) (__CIDR(cidr, i) + 1) 142#define DCIDR_PUT(cidr) ((cidr) - 1)
157#define NCIDR(cidr) (cidr) 143#define DCIDR_GET(cidr, i) (__CIDR(cidr, i) + 1)
158#else 144#else
159#define GCIDR(cidr, i) (__CIDR(cidr, i)) 145#define DCIDR_PUT(cidr) (cidr)
160#define NCIDR(cidr) (cidr - 1) 146#define DCIDR_GET(cidr, i) __CIDR(cidr, i)
161#endif 147#endif
162 148
149#define INIT_CIDR(cidr, host_mask) \
150 DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask))
151
163#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) 152#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128)
164 153
165#ifdef IP_SET_HASH_WITH_NET0 154#ifdef IP_SET_HASH_WITH_NET0
@@ -180,6 +169,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
180#undef mtype_data_equal 169#undef mtype_data_equal
181#undef mtype_do_data_match 170#undef mtype_do_data_match
182#undef mtype_data_set_flags 171#undef mtype_data_set_flags
172#undef mtype_data_reset_elem
183#undef mtype_data_reset_flags 173#undef mtype_data_reset_flags
184#undef mtype_data_netmask 174#undef mtype_data_netmask
185#undef mtype_data_list 175#undef mtype_data_list
@@ -193,7 +183,6 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
193#undef mtype_ahash_memsize 183#undef mtype_ahash_memsize
194#undef mtype_flush 184#undef mtype_flush
195#undef mtype_destroy 185#undef mtype_destroy
196#undef mtype_gc_init
197#undef mtype_same_set 186#undef mtype_same_set
198#undef mtype_kadt 187#undef mtype_kadt
199#undef mtype_uadt 188#undef mtype_uadt
@@ -203,6 +192,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
203#undef mtype_del 192#undef mtype_del
204#undef mtype_test_cidrs 193#undef mtype_test_cidrs
205#undef mtype_test 194#undef mtype_test
195#undef mtype_uref
206#undef mtype_expire 196#undef mtype_expire
207#undef mtype_resize 197#undef mtype_resize
208#undef mtype_head 198#undef mtype_head
@@ -227,6 +217,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
227#define mtype_data_list IPSET_TOKEN(MTYPE, _data_list) 217#define mtype_data_list IPSET_TOKEN(MTYPE, _data_list)
228#define mtype_data_next IPSET_TOKEN(MTYPE, _data_next) 218#define mtype_data_next IPSET_TOKEN(MTYPE, _data_next)
229#define mtype_elem IPSET_TOKEN(MTYPE, _elem) 219#define mtype_elem IPSET_TOKEN(MTYPE, _elem)
220
230#define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy) 221#define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy)
231#define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup) 222#define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup)
232#define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr) 223#define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr)
@@ -234,7 +225,6 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
234#define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize) 225#define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize)
235#define mtype_flush IPSET_TOKEN(MTYPE, _flush) 226#define mtype_flush IPSET_TOKEN(MTYPE, _flush)
236#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) 227#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy)
237#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
238#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) 228#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set)
239#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) 229#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt)
240#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) 230#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt)
@@ -244,23 +234,36 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
244#define mtype_del IPSET_TOKEN(MTYPE, _del) 234#define mtype_del IPSET_TOKEN(MTYPE, _del)
245#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) 235#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs)
246#define mtype_test IPSET_TOKEN(MTYPE, _test) 236#define mtype_test IPSET_TOKEN(MTYPE, _test)
237#define mtype_uref IPSET_TOKEN(MTYPE, _uref)
247#define mtype_expire IPSET_TOKEN(MTYPE, _expire) 238#define mtype_expire IPSET_TOKEN(MTYPE, _expire)
248#define mtype_resize IPSET_TOKEN(MTYPE, _resize) 239#define mtype_resize IPSET_TOKEN(MTYPE, _resize)
249#define mtype_head IPSET_TOKEN(MTYPE, _head) 240#define mtype_head IPSET_TOKEN(MTYPE, _head)
250#define mtype_list IPSET_TOKEN(MTYPE, _list) 241#define mtype_list IPSET_TOKEN(MTYPE, _list)
251#define mtype_gc IPSET_TOKEN(MTYPE, _gc) 242#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
243#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
252#define mtype_variant IPSET_TOKEN(MTYPE, _variant) 244#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
253#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) 245#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
254 246
247#ifndef MTYPE
248#error "MTYPE is not defined!"
249#endif
250
251#ifndef HOST_MASK
252#error "HOST_MASK is not defined!"
253#endif
254
255#ifndef HKEY_DATALEN 255#ifndef HKEY_DATALEN
256#define HKEY_DATALEN sizeof(struct mtype_elem) 256#define HKEY_DATALEN sizeof(struct mtype_elem)
257#endif 257#endif
258 258
259#define HKEY(data, initval, htable_bits) \ 259#define HKEY(data, initval, htable_bits) \
260(jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval) \ 260(jhash2((u32 *)(data), HKEY_DATALEN / sizeof(u32), initval) \
261 & jhash_mask(htable_bits)) 261 & jhash_mask(htable_bits))
262 262
263#ifndef htype 263#ifndef htype
264#ifndef HTYPE
265#error "HTYPE is not defined!"
266#endif /* HTYPE */
264#define htype HTYPE 267#define htype HTYPE
265 268
266/* The generic hash structure */ 269/* The generic hash structure */
@@ -280,18 +283,16 @@ struct htype {
280#ifdef IP_SET_HASH_WITH_NETMASK 283#ifdef IP_SET_HASH_WITH_NETMASK
281 u8 netmask; /* netmask value for subnets to store */ 284 u8 netmask; /* netmask value for subnets to store */
282#endif 285#endif
283#ifdef IP_SET_HASH_WITH_RBTREE
284 struct rb_root rbtree;
285#endif
286#ifdef IP_SET_HASH_WITH_NETS 286#ifdef IP_SET_HASH_WITH_NETS
287 struct net_prefixes nets[0]; /* book-keeping of prefixes */ 287 struct net_prefixes nets[0]; /* book-keeping of prefixes */
288#endif 288#endif
289}; 289};
290#endif 290#endif /* htype */
291 291
292#ifdef IP_SET_HASH_WITH_NETS 292#ifdef IP_SET_HASH_WITH_NETS
293/* Network cidr size book keeping when the hash stores different 293/* Network cidr size book keeping when the hash stores different
294 * sized networks */ 294 * sized networks. cidr == real cidr + 1 to support /0.
295 */
295static void 296static void
296mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) 297mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
297{ 298{
@@ -299,11 +300,11 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
299 300
300 /* Add in increasing prefix order, so larger cidr first */ 301 /* Add in increasing prefix order, so larger cidr first */
301 for (i = 0, j = -1; i < nets_length && h->nets[i].cidr[n]; i++) { 302 for (i = 0, j = -1; i < nets_length && h->nets[i].cidr[n]; i++) {
302 if (j != -1) 303 if (j != -1) {
303 continue; 304 continue;
304 else if (h->nets[i].cidr[n] < cidr) 305 } else if (h->nets[i].cidr[n] < cidr) {
305 j = i; 306 j = i;
306 else if (h->nets[i].cidr[n] == cidr) { 307 } else if (h->nets[i].cidr[n] == cidr) {
307 h->nets[cidr - 1].nets[n]++; 308 h->nets[cidr - 1].nets[n]++;
308 return; 309 return;
309 } 310 }
@@ -322,15 +323,15 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
322 u8 i, j, net_end = nets_length - 1; 323 u8 i, j, net_end = nets_length - 1;
323 324
324 for (i = 0; i < nets_length; i++) { 325 for (i = 0; i < nets_length; i++) {
325 if (h->nets[i].cidr[n] != cidr) 326 if (h->nets[i].cidr[n] != cidr)
326 continue; 327 continue;
327 h->nets[cidr -1].nets[n]--; 328 h->nets[cidr - 1].nets[n]--;
328 if (h->nets[cidr -1].nets[n] > 0) 329 if (h->nets[cidr - 1].nets[n] > 0)
329 return; 330 return;
330 for (j = i; j < net_end && h->nets[j].cidr[n]; j++) 331 for (j = i; j < net_end && h->nets[j].cidr[n]; j++)
331 h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; 332 h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
332 h->nets[j].cidr[n] = 0; 333 h->nets[j].cidr[n] = 0;
333 return; 334 return;
334 } 335 }
335} 336}
336#endif 337#endif
@@ -341,15 +342,18 @@ mtype_ahash_memsize(const struct htype *h, const struct htable *t,
341 u8 nets_length, size_t dsize) 342 u8 nets_length, size_t dsize)
342{ 343{
343 u32 i; 344 u32 i;
344 size_t memsize = sizeof(*h) 345 struct hbucket *n;
345 + sizeof(*t) 346 size_t memsize = sizeof(*h) + sizeof(*t);
347
346#ifdef IP_SET_HASH_WITH_NETS 348#ifdef IP_SET_HASH_WITH_NETS
347 + sizeof(struct net_prefixes) * nets_length 349 memsize += sizeof(struct net_prefixes) * nets_length;
348#endif 350#endif
349 + jhash_size(t->htable_bits) * sizeof(struct hbucket); 351 for (i = 0; i < jhash_size(t->htable_bits); i++) {
350 352 n = rcu_dereference_bh(hbucket(t, i));
351 for (i = 0; i < jhash_size(t->htable_bits); i++) 353 if (!n)
352 memsize += t->bucket[i].size * dsize; 354 continue;
355 memsize += sizeof(struct hbucket) + n->size * dsize;
356 }
353 357
354 return memsize; 358 return memsize;
355} 359}
@@ -364,7 +368,8 @@ mtype_ext_cleanup(struct ip_set *set, struct hbucket *n)
364 int i; 368 int i;
365 369
366 for (i = 0; i < n->pos; i++) 370 for (i = 0; i < n->pos; i++)
367 ip_set_ext_destroy(set, ahash_data(n, i, set->dsize)); 371 if (test_bit(i, n->used))
372 ip_set_ext_destroy(set, ahash_data(n, i, set->dsize));
368} 373}
369 374
370/* Flush a hash type of set: destroy all elements */ 375/* Flush a hash type of set: destroy all elements */
@@ -376,16 +381,16 @@ mtype_flush(struct ip_set *set)
376 struct hbucket *n; 381 struct hbucket *n;
377 u32 i; 382 u32 i;
378 383
379 t = rcu_dereference_bh_nfnl(h->table); 384 t = ipset_dereference_protected(h->table, set);
380 for (i = 0; i < jhash_size(t->htable_bits); i++) { 385 for (i = 0; i < jhash_size(t->htable_bits); i++) {
381 n = hbucket(t, i); 386 n = __ipset_dereference_protected(hbucket(t, i), 1);
382 if (n->size) { 387 if (!n)
383 if (set->extensions & IPSET_EXT_DESTROY) 388 continue;
384 mtype_ext_cleanup(set, n); 389 if (set->extensions & IPSET_EXT_DESTROY)
385 n->size = n->pos = 0; 390 mtype_ext_cleanup(set, n);
386 /* FIXME: use slab cache */ 391 /* FIXME: use slab cache */
387 kfree(n->value); 392 rcu_assign_pointer(hbucket(t, i), NULL);
388 } 393 kfree_rcu(n, rcu);
389 } 394 }
390#ifdef IP_SET_HASH_WITH_NETS 395#ifdef IP_SET_HASH_WITH_NETS
391 memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family)); 396 memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family));
@@ -401,13 +406,13 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
401 u32 i; 406 u32 i;
402 407
403 for (i = 0; i < jhash_size(t->htable_bits); i++) { 408 for (i = 0; i < jhash_size(t->htable_bits); i++) {
404 n = hbucket(t, i); 409 n = __ipset_dereference_protected(hbucket(t, i), 1);
405 if (n->size) { 410 if (!n)
406 if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) 411 continue;
407 mtype_ext_cleanup(set, n); 412 if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
408 /* FIXME: use slab cache */ 413 mtype_ext_cleanup(set, n);
409 kfree(n->value); 414 /* FIXME: use slab cache */
410 } 415 kfree(n);
411 } 416 }
412 417
413 ip_set_free(t); 418 ip_set_free(t);
@@ -419,13 +424,11 @@ mtype_destroy(struct ip_set *set)
419{ 424{
420 struct htype *h = set->data; 425 struct htype *h = set->data;
421 426
422 if (set->extensions & IPSET_EXT_TIMEOUT) 427 if (SET_WITH_TIMEOUT(set))
423 del_timer_sync(&h->gc); 428 del_timer_sync(&h->gc);
424 429
425 mtype_ahash_destroy(set, rcu_dereference_bh_nfnl(h->table), true); 430 mtype_ahash_destroy(set,
426#ifdef IP_SET_HASH_WITH_RBTREE 431 __ipset_dereference_protected(h->table, 1), true);
427 rbtree_destroy(&h->rbtree);
428#endif
429 kfree(h); 432 kfree(h);
430 433
431 set->data = NULL; 434 set->data = NULL;
@@ -437,7 +440,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
437 struct htype *h = set->data; 440 struct htype *h = set->data;
438 441
439 init_timer(&h->gc); 442 init_timer(&h->gc);
440 h->gc.data = (unsigned long) set; 443 h->gc.data = (unsigned long)set;
441 h->gc.function = gc; 444 h->gc.function = gc;
442 h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; 445 h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
443 add_timer(&h->gc); 446 add_timer(&h->gc);
@@ -470,61 +473,71 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
470 struct htable *t; 473 struct htable *t;
471 struct hbucket *n; 474 struct hbucket *n;
472 struct mtype_elem *data; 475 struct mtype_elem *data;
473 u32 i; 476 u32 i, j, d;
474 int j;
475#ifdef IP_SET_HASH_WITH_NETS 477#ifdef IP_SET_HASH_WITH_NETS
476 u8 k; 478 u8 k;
477#endif 479#endif
478 480
479 rcu_read_lock_bh(); 481 t = ipset_dereference_protected(h->table, set);
480 t = rcu_dereference_bh(h->table);
481 for (i = 0; i < jhash_size(t->htable_bits); i++) { 482 for (i = 0; i < jhash_size(t->htable_bits); i++) {
482 n = hbucket(t, i); 483 n = __ipset_dereference_protected(hbucket(t, i), 1);
483 for (j = 0; j < n->pos; j++) { 484 if (!n)
485 continue;
486 for (j = 0, d = 0; j < n->pos; j++) {
487 if (!test_bit(j, n->used)) {
488 d++;
489 continue;
490 }
484 data = ahash_data(n, j, dsize); 491 data = ahash_data(n, j, dsize);
485 if (ip_set_timeout_expired(ext_timeout(data, set))) { 492 if (ip_set_timeout_expired(ext_timeout(data, set))) {
486 pr_debug("expired %u/%u\n", i, j); 493 pr_debug("expired %u/%u\n", i, j);
494 clear_bit(j, n->used);
495 smp_mb__after_atomic();
487#ifdef IP_SET_HASH_WITH_NETS 496#ifdef IP_SET_HASH_WITH_NETS
488 for (k = 0; k < IPSET_NET_COUNT; k++) 497 for (k = 0; k < IPSET_NET_COUNT; k++)
489 mtype_del_cidr(h, SCIDR(data->cidr, k), 498 mtype_del_cidr(h,
490 nets_length, k); 499 NCIDR_PUT(DCIDR_GET(data->cidr,
500 k)),
501 nets_length, k);
491#endif 502#endif
492 ip_set_ext_destroy(set, data); 503 ip_set_ext_destroy(set, data);
493 if (j != n->pos - 1)
494 /* Not last one */
495 memcpy(data,
496 ahash_data(n, n->pos - 1, dsize),
497 dsize);
498 n->pos--;
499 h->elements--; 504 h->elements--;
505 d++;
500 } 506 }
501 } 507 }
502 if (n->pos + AHASH_INIT_SIZE < n->size) { 508 if (d >= AHASH_INIT_SIZE) {
503 void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) 509 struct hbucket *tmp = kzalloc(sizeof(*tmp) +
504 * dsize, 510 (n->size - AHASH_INIT_SIZE) * dsize,
505 GFP_ATOMIC); 511 GFP_ATOMIC);
506 if (!tmp) 512 if (!tmp)
507 /* Still try to delete expired elements */ 513 /* Still try to delete expired elements */
508 continue; 514 continue;
509 n->size -= AHASH_INIT_SIZE; 515 tmp->size = n->size - AHASH_INIT_SIZE;
510 memcpy(tmp, n->value, n->size * dsize); 516 for (j = 0, d = 0; j < n->pos; j++) {
511 kfree(n->value); 517 if (!test_bit(j, n->used))
512 n->value = tmp; 518 continue;
519 data = ahash_data(n, j, dsize);
520 memcpy(tmp->value + d * dsize, data, dsize);
521 set_bit(j, tmp->used);
522 d++;
523 }
524 tmp->pos = d;
525 rcu_assign_pointer(hbucket(t, i), tmp);
526 kfree_rcu(n, rcu);
513 } 527 }
514 } 528 }
515 rcu_read_unlock_bh();
516} 529}
517 530
518static void 531static void
519mtype_gc(unsigned long ul_set) 532mtype_gc(unsigned long ul_set)
520{ 533{
521 struct ip_set *set = (struct ip_set *) ul_set; 534 struct ip_set *set = (struct ip_set *)ul_set;
522 struct htype *h = set->data; 535 struct htype *h = set->data;
523 536
524 pr_debug("called\n"); 537 pr_debug("called\n");
525 write_lock_bh(&set->lock); 538 spin_lock_bh(&set->lock);
526 mtype_expire(set, h, NLEN(set->family), set->dsize); 539 mtype_expire(set, h, NLEN(set->family), set->dsize);
527 write_unlock_bh(&set->lock); 540 spin_unlock_bh(&set->lock);
528 541
529 h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; 542 h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
530 add_timer(&h->gc); 543 add_timer(&h->gc);
@@ -532,93 +545,152 @@ mtype_gc(unsigned long ul_set)
532 545
533/* Resize a hash: create a new hash table with doubling the hashsize 546/* Resize a hash: create a new hash table with doubling the hashsize
534 * and inserting the elements to it. Repeat until we succeed or 547 * and inserting the elements to it. Repeat until we succeed or
535 * fail due to memory pressures. */ 548 * fail due to memory pressures.
549 */
536static int 550static int
537mtype_resize(struct ip_set *set, bool retried) 551mtype_resize(struct ip_set *set, bool retried)
538{ 552{
539 struct htype *h = set->data; 553 struct htype *h = set->data;
540 struct htable *t, *orig = rcu_dereference_bh_nfnl(h->table); 554 struct htable *t, *orig;
541 u8 htable_bits = orig->htable_bits; 555 u8 htable_bits;
556 size_t dsize = set->dsize;
542#ifdef IP_SET_HASH_WITH_NETS 557#ifdef IP_SET_HASH_WITH_NETS
543 u8 flags; 558 u8 flags;
559 struct mtype_elem *tmp;
544#endif 560#endif
545 struct mtype_elem *data; 561 struct mtype_elem *data;
546 struct mtype_elem *d; 562 struct mtype_elem *d;
547 struct hbucket *n, *m; 563 struct hbucket *n, *m;
548 u32 i, j; 564 u32 i, j, key;
549 int ret; 565 int ret;
550 566
551 /* Try to cleanup once */ 567#ifdef IP_SET_HASH_WITH_NETS
552 if (SET_WITH_TIMEOUT(set) && !retried) { 568 tmp = kmalloc(dsize, GFP_KERNEL);
553 i = h->elements; 569 if (!tmp)
554 write_lock_bh(&set->lock); 570 return -ENOMEM;
555 mtype_expire(set, set->data, NLEN(set->family), set->dsize); 571#endif
556 write_unlock_bh(&set->lock); 572 rcu_read_lock_bh();
557 if (h->elements < i) 573 orig = rcu_dereference_bh_nfnl(h->table);
558 return 0; 574 htable_bits = orig->htable_bits;
559 } 575 rcu_read_unlock_bh();
560 576
561retry: 577retry:
562 ret = 0; 578 ret = 0;
563 htable_bits++; 579 htable_bits++;
564 pr_debug("attempt to resize set %s from %u to %u, t %p\n",
565 set->name, orig->htable_bits, htable_bits, orig);
566 if (!htable_bits) { 580 if (!htable_bits) {
567 /* In case we have plenty of memory :-) */ 581 /* In case we have plenty of memory :-) */
568 pr_warn("Cannot increase the hashsize of set %s further\n", 582 pr_warn("Cannot increase the hashsize of set %s further\n",
569 set->name); 583 set->name);
570 return -IPSET_ERR_HASH_FULL; 584 ret = -IPSET_ERR_HASH_FULL;
585 goto out;
586 }
587 t = ip_set_alloc(htable_size(htable_bits));
588 if (!t) {
589 ret = -ENOMEM;
590 goto out;
571 } 591 }
572 t = ip_set_alloc(sizeof(*t)
573 + jhash_size(htable_bits) * sizeof(struct hbucket));
574 if (!t)
575 return -ENOMEM;
576 t->htable_bits = htable_bits; 592 t->htable_bits = htable_bits;
577 593
578 read_lock_bh(&set->lock); 594 spin_lock_bh(&set->lock);
595 orig = __ipset_dereference_protected(h->table, 1);
596 /* There can't be another parallel resizing, but dumping is possible */
597 atomic_set(&orig->ref, 1);
598 atomic_inc(&orig->uref);
599 pr_debug("attempt to resize set %s from %u to %u, t %p\n",
600 set->name, orig->htable_bits, htable_bits, orig);
579 for (i = 0; i < jhash_size(orig->htable_bits); i++) { 601 for (i = 0; i < jhash_size(orig->htable_bits); i++) {
580 n = hbucket(orig, i); 602 n = __ipset_dereference_protected(hbucket(orig, i), 1);
603 if (!n)
604 continue;
581 for (j = 0; j < n->pos; j++) { 605 for (j = 0; j < n->pos; j++) {
582 data = ahash_data(n, j, set->dsize); 606 if (!test_bit(j, n->used))
607 continue;
608 data = ahash_data(n, j, dsize);
583#ifdef IP_SET_HASH_WITH_NETS 609#ifdef IP_SET_HASH_WITH_NETS
610 /* We have readers running parallel with us,
611 * so the live data cannot be modified.
612 */
584 flags = 0; 613 flags = 0;
614 memcpy(tmp, data, dsize);
615 data = tmp;
585 mtype_data_reset_flags(data, &flags); 616 mtype_data_reset_flags(data, &flags);
586#endif 617#endif
587 m = hbucket(t, HKEY(data, h->initval, htable_bits)); 618 key = HKEY(data, h->initval, htable_bits);
588 ret = hbucket_elem_add(m, AHASH_MAX(h), set->dsize); 619 m = __ipset_dereference_protected(hbucket(t, key), 1);
589 if (ret < 0) { 620 if (!m) {
590#ifdef IP_SET_HASH_WITH_NETS 621 m = kzalloc(sizeof(*m) +
591 mtype_data_reset_flags(data, &flags); 622 AHASH_INIT_SIZE * dsize,
592#endif 623 GFP_ATOMIC);
593 read_unlock_bh(&set->lock); 624 if (!m) {
594 mtype_ahash_destroy(set, t, false); 625 ret = -ENOMEM;
595 if (ret == -EAGAIN) 626 goto cleanup;
596 goto retry; 627 }
597 return ret; 628 m->size = AHASH_INIT_SIZE;
629 RCU_INIT_POINTER(hbucket(t, key), m);
630 } else if (m->pos >= m->size) {
631 struct hbucket *ht;
632
633 if (m->size >= AHASH_MAX(h)) {
634 ret = -EAGAIN;
635 } else {
636 ht = kzalloc(sizeof(*ht) +
637 (m->size + AHASH_INIT_SIZE)
638 * dsize,
639 GFP_ATOMIC);
640 if (!ht)
641 ret = -ENOMEM;
642 }
643 if (ret < 0)
644 goto cleanup;
645 memcpy(ht, m, sizeof(struct hbucket) +
646 m->size * dsize);
647 ht->size = m->size + AHASH_INIT_SIZE;
648 kfree(m);
649 m = ht;
650 RCU_INIT_POINTER(hbucket(t, key), ht);
598 } 651 }
599 d = ahash_data(m, m->pos++, set->dsize); 652 d = ahash_data(m, m->pos, dsize);
600 memcpy(d, data, set->dsize); 653 memcpy(d, data, dsize);
654 set_bit(m->pos++, m->used);
601#ifdef IP_SET_HASH_WITH_NETS 655#ifdef IP_SET_HASH_WITH_NETS
602 mtype_data_reset_flags(d, &flags); 656 mtype_data_reset_flags(d, &flags);
603#endif 657#endif
604 } 658 }
605 } 659 }
606
607 rcu_assign_pointer(h->table, t); 660 rcu_assign_pointer(h->table, t);
608 read_unlock_bh(&set->lock); 661
662 spin_unlock_bh(&set->lock);
609 663
610 /* Give time to other readers of the set */ 664 /* Give time to other readers of the set */
611 synchronize_rcu_bh(); 665 synchronize_rcu_bh();
612 666
613 pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, 667 pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
614 orig->htable_bits, orig, t->htable_bits, t); 668 orig->htable_bits, orig, t->htable_bits, t);
615 mtype_ahash_destroy(set, orig, false); 669 /* If there's nobody else dumping the table, destroy it */
670 if (atomic_dec_and_test(&orig->uref)) {
671 pr_debug("Table destroy by resize %p\n", orig);
672 mtype_ahash_destroy(set, orig, false);
673 }
616 674
617 return 0; 675out:
676#ifdef IP_SET_HASH_WITH_NETS
677 kfree(tmp);
678#endif
679 return ret;
680
681cleanup:
682 atomic_set(&orig->ref, 0);
683 atomic_dec(&orig->uref);
684 spin_unlock_bh(&set->lock);
685 mtype_ahash_destroy(set, t, false);
686 if (ret == -EAGAIN)
687 goto retry;
688 goto out;
618} 689}
619 690
620/* Add an element to a hash and update the internal counters when succeeded, 691/* Add an element to a hash and update the internal counters when succeeded,
621 * otherwise report the proper error code. */ 692 * otherwise report the proper error code.
693 */
622static int 694static int
623mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, 695mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
624 struct ip_set_ext *mext, u32 flags) 696 struct ip_set_ext *mext, u32 flags)
@@ -627,17 +699,49 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
627 struct htable *t; 699 struct htable *t;
628 const struct mtype_elem *d = value; 700 const struct mtype_elem *d = value;
629 struct mtype_elem *data; 701 struct mtype_elem *data;
630 struct hbucket *n; 702 struct hbucket *n, *old = ERR_PTR(-ENOENT);
631 int i, ret = 0; 703 int i, j = -1;
632 int j = AHASH_MAX(h) + 1;
633 bool flag_exist = flags & IPSET_FLAG_EXIST; 704 bool flag_exist = flags & IPSET_FLAG_EXIST;
705 bool deleted = false, forceadd = false, reuse = false;
634 u32 key, multi = 0; 706 u32 key, multi = 0;
635 707
636 rcu_read_lock_bh(); 708 if (h->elements >= h->maxelem) {
637 t = rcu_dereference_bh(h->table); 709 if (SET_WITH_TIMEOUT(set))
710 /* FIXME: when set is full, we slow down here */
711 mtype_expire(set, h, NLEN(set->family), set->dsize);
712 if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set))
713 forceadd = true;
714 }
715
716 t = ipset_dereference_protected(h->table, set);
638 key = HKEY(value, h->initval, t->htable_bits); 717 key = HKEY(value, h->initval, t->htable_bits);
639 n = hbucket(t, key); 718 n = __ipset_dereference_protected(hbucket(t, key), 1);
719 if (!n) {
720 if (forceadd) {
721 if (net_ratelimit())
722 pr_warn("Set %s is full, maxelem %u reached\n",
723 set->name, h->maxelem);
724 return -IPSET_ERR_HASH_FULL;
725 } else if (h->elements >= h->maxelem) {
726 goto set_full;
727 }
728 old = NULL;
729 n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize,
730 GFP_ATOMIC);
731 if (!n)
732 return -ENOMEM;
733 n->size = AHASH_INIT_SIZE;
734 goto copy_elem;
735 }
640 for (i = 0; i < n->pos; i++) { 736 for (i = 0; i < n->pos; i++) {
737 if (!test_bit(i, n->used)) {
738 /* Reuse first deleted entry */
739 if (j == -1) {
740 deleted = reuse = true;
741 j = i;
742 }
743 continue;
744 }
641 data = ahash_data(n, i, set->dsize); 745 data = ahash_data(n, i, set->dsize);
642 if (mtype_data_equal(data, d, &multi)) { 746 if (mtype_data_equal(data, d, &multi)) {
643 if (flag_exist || 747 if (flag_exist ||
@@ -645,85 +749,94 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
645 ip_set_timeout_expired(ext_timeout(data, set)))) { 749 ip_set_timeout_expired(ext_timeout(data, set)))) {
646 /* Just the extensions could be overwritten */ 750 /* Just the extensions could be overwritten */
647 j = i; 751 j = i;
648 goto reuse_slot; 752 goto overwrite_extensions;
649 } else {
650 ret = -IPSET_ERR_EXIST;
651 goto out;
652 } 753 }
754 return -IPSET_ERR_EXIST;
653 } 755 }
654 /* Reuse first timed out entry */ 756 /* Reuse first timed out entry */
655 if (SET_WITH_TIMEOUT(set) && 757 if (SET_WITH_TIMEOUT(set) &&
656 ip_set_timeout_expired(ext_timeout(data, set)) && 758 ip_set_timeout_expired(ext_timeout(data, set)) &&
657 j != AHASH_MAX(h) + 1) 759 j == -1) {
658 j = i; 760 j = i;
761 reuse = true;
762 }
659 } 763 }
660 if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set) && n->pos) { 764 if (reuse || forceadd) {
661 /* Choosing the first entry in the array to replace */
662 j = 0;
663 goto reuse_slot;
664 }
665 if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem)
666 /* FIXME: when set is full, we slow down here */
667 mtype_expire(set, h, NLEN(set->family), set->dsize);
668
669 if (h->elements >= h->maxelem) {
670 if (net_ratelimit())
671 pr_warn("Set %s is full, maxelem %u reached\n",
672 set->name, h->maxelem);
673 ret = -IPSET_ERR_HASH_FULL;
674 goto out;
675 }
676
677reuse_slot:
678 if (j != AHASH_MAX(h) + 1) {
679 /* Fill out reused slot */
680 data = ahash_data(n, j, set->dsize); 765 data = ahash_data(n, j, set->dsize);
766 if (!deleted) {
681#ifdef IP_SET_HASH_WITH_NETS 767#ifdef IP_SET_HASH_WITH_NETS
682 for (i = 0; i < IPSET_NET_COUNT; i++) { 768 for (i = 0; i < IPSET_NET_COUNT; i++)
683 mtype_del_cidr(h, SCIDR(data->cidr, i), 769 mtype_del_cidr(h,
684 NLEN(set->family), i); 770 NCIDR_PUT(DCIDR_GET(data->cidr, i)),
685 mtype_add_cidr(h, SCIDR(d->cidr, i), 771 NLEN(set->family), i);
686 NLEN(set->family), i);
687 }
688#endif 772#endif
689 ip_set_ext_destroy(set, data); 773 ip_set_ext_destroy(set, data);
690 } else { 774 h->elements--;
691 /* Use/create a new slot */ 775 }
776 goto copy_data;
777 }
778 if (h->elements >= h->maxelem)
779 goto set_full;
780 /* Create a new slot */
781 if (n->pos >= n->size) {
692 TUNE_AHASH_MAX(h, multi); 782 TUNE_AHASH_MAX(h, multi);
693 ret = hbucket_elem_add(n, AHASH_MAX(h), set->dsize); 783 if (n->size >= AHASH_MAX(h)) {
694 if (ret != 0) { 784 /* Trigger rehashing */
695 if (ret == -EAGAIN) 785 mtype_data_next(&h->next, d);
696 mtype_data_next(&h->next, d); 786 return -EAGAIN;
697 goto out;
698 } 787 }
699 data = ahash_data(n, n->pos++, set->dsize); 788 old = n;
789 n = kzalloc(sizeof(*n) +
790 (old->size + AHASH_INIT_SIZE) * set->dsize,
791 GFP_ATOMIC);
792 if (!n)
793 return -ENOMEM;
794 memcpy(n, old, sizeof(struct hbucket) +
795 old->size * set->dsize);
796 n->size = old->size + AHASH_INIT_SIZE;
797 }
798
799copy_elem:
800 j = n->pos++;
801 data = ahash_data(n, j, set->dsize);
802copy_data:
803 h->elements++;
700#ifdef IP_SET_HASH_WITH_NETS 804#ifdef IP_SET_HASH_WITH_NETS
701 for (i = 0; i < IPSET_NET_COUNT; i++) 805 for (i = 0; i < IPSET_NET_COUNT; i++)
702 mtype_add_cidr(h, SCIDR(d->cidr, i), NLEN(set->family), 806 mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)),
703 i); 807 NLEN(set->family), i);
704#endif 808#endif
705 h->elements++;
706 }
707 memcpy(data, d, sizeof(struct mtype_elem)); 809 memcpy(data, d, sizeof(struct mtype_elem));
810overwrite_extensions:
708#ifdef IP_SET_HASH_WITH_NETS 811#ifdef IP_SET_HASH_WITH_NETS
709 mtype_data_set_flags(data, flags); 812 mtype_data_set_flags(data, flags);
710#endif 813#endif
711 if (SET_WITH_TIMEOUT(set))
712 ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
713 if (SET_WITH_COUNTER(set)) 814 if (SET_WITH_COUNTER(set))
714 ip_set_init_counter(ext_counter(data, set), ext); 815 ip_set_init_counter(ext_counter(data, set), ext);
715 if (SET_WITH_COMMENT(set)) 816 if (SET_WITH_COMMENT(set))
716 ip_set_init_comment(ext_comment(data, set), ext); 817 ip_set_init_comment(ext_comment(data, set), ext);
717 if (SET_WITH_SKBINFO(set)) 818 if (SET_WITH_SKBINFO(set))
718 ip_set_init_skbinfo(ext_skbinfo(data, set), ext); 819 ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
820 /* Must come last for the case when timed out entry is reused */
821 if (SET_WITH_TIMEOUT(set))
822 ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
823 smp_mb__before_atomic();
824 set_bit(j, n->used);
825 if (old != ERR_PTR(-ENOENT)) {
826 rcu_assign_pointer(hbucket(t, key), n);
827 if (old)
828 kfree_rcu(old, rcu);
829 }
719 830
720out: 831 return 0;
721 rcu_read_unlock_bh(); 832set_full:
722 return ret; 833 if (net_ratelimit())
834 pr_warn("Set %s is full, maxelem %u reached\n",
835 set->name, h->maxelem);
836 return -IPSET_ERR_HASH_FULL;
723} 837}
724 838
725/* Delete an element from the hash: swap it with the last element 839/* Delete an element from the hash and free up space if possible.
726 * and free up space if possible.
727 */ 840 */
728static int 841static int
729mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, 842mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
@@ -734,55 +847,70 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
734 const struct mtype_elem *d = value; 847 const struct mtype_elem *d = value;
735 struct mtype_elem *data; 848 struct mtype_elem *data;
736 struct hbucket *n; 849 struct hbucket *n;
737 int i, ret = -IPSET_ERR_EXIST; 850 int i, j, k, ret = -IPSET_ERR_EXIST;
738#ifdef IP_SET_HASH_WITH_NETS
739 u8 j;
740#endif
741 u32 key, multi = 0; 851 u32 key, multi = 0;
852 size_t dsize = set->dsize;
742 853
743 rcu_read_lock_bh(); 854 t = ipset_dereference_protected(h->table, set);
744 t = rcu_dereference_bh(h->table);
745 key = HKEY(value, h->initval, t->htable_bits); 855 key = HKEY(value, h->initval, t->htable_bits);
746 n = hbucket(t, key); 856 n = __ipset_dereference_protected(hbucket(t, key), 1);
747 for (i = 0; i < n->pos; i++) { 857 if (!n)
748 data = ahash_data(n, i, set->dsize); 858 goto out;
859 for (i = 0, k = 0; i < n->pos; i++) {
860 if (!test_bit(i, n->used)) {
861 k++;
862 continue;
863 }
864 data = ahash_data(n, i, dsize);
749 if (!mtype_data_equal(data, d, &multi)) 865 if (!mtype_data_equal(data, d, &multi))
750 continue; 866 continue;
751 if (SET_WITH_TIMEOUT(set) && 867 if (SET_WITH_TIMEOUT(set) &&
752 ip_set_timeout_expired(ext_timeout(data, set))) 868 ip_set_timeout_expired(ext_timeout(data, set)))
753 goto out; 869 goto out;
754 if (i != n->pos - 1)
755 /* Not last one */
756 memcpy(data, ahash_data(n, n->pos - 1, set->dsize),
757 set->dsize);
758 870
759 n->pos--; 871 ret = 0;
872 clear_bit(i, n->used);
873 smp_mb__after_atomic();
874 if (i + 1 == n->pos)
875 n->pos--;
760 h->elements--; 876 h->elements--;
761#ifdef IP_SET_HASH_WITH_NETS 877#ifdef IP_SET_HASH_WITH_NETS
762 for (j = 0; j < IPSET_NET_COUNT; j++) 878 for (j = 0; j < IPSET_NET_COUNT; j++)
763 mtype_del_cidr(h, SCIDR(d->cidr, j), NLEN(set->family), 879 mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)),
764 j); 880 NLEN(set->family), j);
765#endif 881#endif
766 ip_set_ext_destroy(set, data); 882 ip_set_ext_destroy(set, data);
767 if (n->pos + AHASH_INIT_SIZE < n->size) { 883
768 void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) 884 for (; i < n->pos; i++) {
769 * set->dsize, 885 if (!test_bit(i, n->used))
770 GFP_ATOMIC); 886 k++;
771 if (!tmp) { 887 }
772 ret = 0; 888 if (n->pos == 0 && k == 0) {
889 rcu_assign_pointer(hbucket(t, key), NULL);
890 kfree_rcu(n, rcu);
891 } else if (k >= AHASH_INIT_SIZE) {
892 struct hbucket *tmp = kzalloc(sizeof(*tmp) +
893 (n->size - AHASH_INIT_SIZE) * dsize,
894 GFP_ATOMIC);
895 if (!tmp)
773 goto out; 896 goto out;
897 tmp->size = n->size - AHASH_INIT_SIZE;
898 for (j = 0, k = 0; j < n->pos; j++) {
899 if (!test_bit(j, n->used))
900 continue;
901 data = ahash_data(n, j, dsize);
902 memcpy(tmp->value + k * dsize, data, dsize);
903 set_bit(j, tmp->used);
904 k++;
774 } 905 }
775 n->size -= AHASH_INIT_SIZE; 906 tmp->pos = k;
776 memcpy(tmp, n->value, n->size * set->dsize); 907 rcu_assign_pointer(hbucket(t, key), tmp);
777 kfree(n->value); 908 kfree_rcu(n, rcu);
778 n->value = tmp;
779 } 909 }
780 ret = 0;
781 goto out; 910 goto out;
782 } 911 }
783 912
784out: 913out:
785 rcu_read_unlock_bh();
786 return ret; 914 return ret;
787} 915}
788 916
@@ -801,7 +929,8 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
801 929
802#ifdef IP_SET_HASH_WITH_NETS 930#ifdef IP_SET_HASH_WITH_NETS
803/* Special test function which takes into account the different network 931/* Special test function which takes into account the different network
804 * sizes added to the set */ 932 * sizes added to the set
933 */
805static int 934static int
806mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, 935mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
807 const struct ip_set_ext *ext, 936 const struct ip_set_ext *ext,
@@ -824,16 +953,21 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
824 for (; j < nets_length && h->nets[j].cidr[0] && !multi; j++) { 953 for (; j < nets_length && h->nets[j].cidr[0] && !multi; j++) {
825#if IPSET_NET_COUNT == 2 954#if IPSET_NET_COUNT == 2
826 mtype_data_reset_elem(d, &orig); 955 mtype_data_reset_elem(d, &orig);
827 mtype_data_netmask(d, NCIDR(h->nets[j].cidr[0]), false); 956 mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false);
828 for (k = 0; k < nets_length && h->nets[k].cidr[1] && !multi; 957 for (k = 0; k < nets_length && h->nets[k].cidr[1] && !multi;
829 k++) { 958 k++) {
830 mtype_data_netmask(d, NCIDR(h->nets[k].cidr[1]), true); 959 mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]),
960 true);
831#else 961#else
832 mtype_data_netmask(d, NCIDR(h->nets[j].cidr[0])); 962 mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]));
833#endif 963#endif
834 key = HKEY(d, h->initval, t->htable_bits); 964 key = HKEY(d, h->initval, t->htable_bits);
835 n = hbucket(t, key); 965 n = rcu_dereference_bh(hbucket(t, key));
966 if (!n)
967 continue;
836 for (i = 0; i < n->pos; i++) { 968 for (i = 0; i < n->pos; i++) {
969 if (!test_bit(i, n->used))
970 continue;
837 data = ahash_data(n, i, set->dsize); 971 data = ahash_data(n, i, set->dsize);
838 if (!mtype_data_equal(data, d, &multi)) 972 if (!mtype_data_equal(data, d, &multi))
839 continue; 973 continue;
@@ -871,13 +1005,13 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
871 int i, ret = 0; 1005 int i, ret = 0;
872 u32 key, multi = 0; 1006 u32 key, multi = 0;
873 1007
874 rcu_read_lock_bh();
875 t = rcu_dereference_bh(h->table); 1008 t = rcu_dereference_bh(h->table);
876#ifdef IP_SET_HASH_WITH_NETS 1009#ifdef IP_SET_HASH_WITH_NETS
877 /* If we test an IP address and not a network address, 1010 /* If we test an IP address and not a network address,
878 * try all possible network sizes */ 1011 * try all possible network sizes
1012 */
879 for (i = 0; i < IPSET_NET_COUNT; i++) 1013 for (i = 0; i < IPSET_NET_COUNT; i++)
880 if (GCIDR(d->cidr, i) != SET_HOST_MASK(set->family)) 1014 if (DCIDR_GET(d->cidr, i) != SET_HOST_MASK(set->family))
881 break; 1015 break;
882 if (i == IPSET_NET_COUNT) { 1016 if (i == IPSET_NET_COUNT) {
883 ret = mtype_test_cidrs(set, d, ext, mext, flags); 1017 ret = mtype_test_cidrs(set, d, ext, mext, flags);
@@ -886,8 +1020,14 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
886#endif 1020#endif
887 1021
888 key = HKEY(d, h->initval, t->htable_bits); 1022 key = HKEY(d, h->initval, t->htable_bits);
889 n = hbucket(t, key); 1023 n = rcu_dereference_bh(hbucket(t, key));
1024 if (!n) {
1025 ret = 0;
1026 goto out;
1027 }
890 for (i = 0; i < n->pos; i++) { 1028 for (i = 0; i < n->pos; i++) {
1029 if (!test_bit(i, n->used))
1030 continue;
891 data = ahash_data(n, i, set->dsize); 1031 data = ahash_data(n, i, set->dsize);
892 if (mtype_data_equal(data, d, &multi) && 1032 if (mtype_data_equal(data, d, &multi) &&
893 !(SET_WITH_TIMEOUT(set) && 1033 !(SET_WITH_TIMEOUT(set) &&
@@ -897,7 +1037,6 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
897 } 1037 }
898 } 1038 }
899out: 1039out:
900 rcu_read_unlock_bh();
901 return ret; 1040 return ret;
902} 1041}
903 1042
@@ -909,15 +1048,19 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
909 const struct htable *t; 1048 const struct htable *t;
910 struct nlattr *nested; 1049 struct nlattr *nested;
911 size_t memsize; 1050 size_t memsize;
1051 u8 htable_bits;
912 1052
1053 rcu_read_lock_bh();
913 t = rcu_dereference_bh_nfnl(h->table); 1054 t = rcu_dereference_bh_nfnl(h->table);
914 memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize); 1055 memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize);
1056 htable_bits = t->htable_bits;
1057 rcu_read_unlock_bh();
915 1058
916 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 1059 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
917 if (!nested) 1060 if (!nested)
918 goto nla_put_failure; 1061 goto nla_put_failure;
919 if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE, 1062 if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE,
920 htonl(jhash_size(t->htable_bits))) || 1063 htonl(jhash_size(htable_bits))) ||
921 nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem))) 1064 nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)))
922 goto nla_put_failure; 1065 goto nla_put_failure;
923#ifdef IP_SET_HASH_WITH_NETMASK 1066#ifdef IP_SET_HASH_WITH_NETMASK
@@ -941,32 +1084,63 @@ nla_put_failure:
941 return -EMSGSIZE; 1084 return -EMSGSIZE;
942} 1085}
943 1086
1087/* Make possible to run dumping parallel with resizing */
1088static void
1089mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start)
1090{
1091 struct htype *h = set->data;
1092 struct htable *t;
1093
1094 if (start) {
1095 rcu_read_lock_bh();
1096 t = rcu_dereference_bh_nfnl(h->table);
1097 atomic_inc(&t->uref);
1098 cb->args[IPSET_CB_PRIVATE] = (unsigned long)t;
1099 rcu_read_unlock_bh();
1100 } else if (cb->args[IPSET_CB_PRIVATE]) {
1101 t = (struct htable *)cb->args[IPSET_CB_PRIVATE];
1102 if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
1103 /* Resizing didn't destroy the hash table */
1104 pr_debug("Table destroy by dump: %p\n", t);
1105 mtype_ahash_destroy(set, t, false);
1106 }
1107 cb->args[IPSET_CB_PRIVATE] = 0;
1108 }
1109}
1110
944/* Reply a LIST/SAVE request: dump the elements of the specified set */ 1111/* Reply a LIST/SAVE request: dump the elements of the specified set */
945static int 1112static int
946mtype_list(const struct ip_set *set, 1113mtype_list(const struct ip_set *set,
947 struct sk_buff *skb, struct netlink_callback *cb) 1114 struct sk_buff *skb, struct netlink_callback *cb)
948{ 1115{
949 const struct htype *h = set->data; 1116 const struct htable *t;
950 const struct htable *t = rcu_dereference_bh_nfnl(h->table);
951 struct nlattr *atd, *nested; 1117 struct nlattr *atd, *nested;
952 const struct hbucket *n; 1118 const struct hbucket *n;
953 const struct mtype_elem *e; 1119 const struct mtype_elem *e;
954 u32 first = cb->args[IPSET_CB_ARG0]; 1120 u32 first = cb->args[IPSET_CB_ARG0];
955 /* We assume that one hash bucket fills into one page */ 1121 /* We assume that one hash bucket fills into one page */
956 void *incomplete; 1122 void *incomplete;
957 int i; 1123 int i, ret = 0;
958 1124
959 atd = ipset_nest_start(skb, IPSET_ATTR_ADT); 1125 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
960 if (!atd) 1126 if (!atd)
961 return -EMSGSIZE; 1127 return -EMSGSIZE;
1128
962 pr_debug("list hash set %s\n", set->name); 1129 pr_debug("list hash set %s\n", set->name);
1130 t = (const struct htable *)cb->args[IPSET_CB_PRIVATE];
1131 /* Expire may replace a hbucket with another one */
1132 rcu_read_lock();
963 for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits); 1133 for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
964 cb->args[IPSET_CB_ARG0]++) { 1134 cb->args[IPSET_CB_ARG0]++) {
965 incomplete = skb_tail_pointer(skb); 1135 incomplete = skb_tail_pointer(skb);
966 n = hbucket(t, cb->args[IPSET_CB_ARG0]); 1136 n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0]));
967 pr_debug("cb->arg bucket: %lu, t %p n %p\n", 1137 pr_debug("cb->arg bucket: %lu, t %p n %p\n",
968 cb->args[IPSET_CB_ARG0], t, n); 1138 cb->args[IPSET_CB_ARG0], t, n);
1139 if (!n)
1140 continue;
969 for (i = 0; i < n->pos; i++) { 1141 for (i = 0; i < n->pos; i++) {
1142 if (!test_bit(i, n->used))
1143 continue;
970 e = ahash_data(n, i, set->dsize); 1144 e = ahash_data(n, i, set->dsize);
971 if (SET_WITH_TIMEOUT(set) && 1145 if (SET_WITH_TIMEOUT(set) &&
972 ip_set_timeout_expired(ext_timeout(e, set))) 1146 ip_set_timeout_expired(ext_timeout(e, set)))
@@ -977,9 +1151,10 @@ mtype_list(const struct ip_set *set,
977 if (!nested) { 1151 if (!nested) {
978 if (cb->args[IPSET_CB_ARG0] == first) { 1152 if (cb->args[IPSET_CB_ARG0] == first) {
979 nla_nest_cancel(skb, atd); 1153 nla_nest_cancel(skb, atd);
980 return -EMSGSIZE; 1154 ret = -EMSGSIZE;
981 } else 1155 goto out;
982 goto nla_put_failure; 1156 }
1157 goto nla_put_failure;
983 } 1158 }
984 if (mtype_data_list(skb, e)) 1159 if (mtype_data_list(skb, e))
985 goto nla_put_failure; 1160 goto nla_put_failure;
@@ -992,7 +1167,7 @@ mtype_list(const struct ip_set *set,
992 /* Set listing finished */ 1167 /* Set listing finished */
993 cb->args[IPSET_CB_ARG0] = 0; 1168 cb->args[IPSET_CB_ARG0] = 0;
994 1169
995 return 0; 1170 goto out;
996 1171
997nla_put_failure: 1172nla_put_failure:
998 nlmsg_trim(skb, incomplete); 1173 nlmsg_trim(skb, incomplete);
@@ -1000,20 +1175,24 @@ nla_put_failure:
1000 pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n", 1175 pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n",
1001 set->name); 1176 set->name);
1002 cb->args[IPSET_CB_ARG0] = 0; 1177 cb->args[IPSET_CB_ARG0] = 0;
1003 return -EMSGSIZE; 1178 ret = -EMSGSIZE;
1179 } else {
1180 ipset_nest_end(skb, atd);
1004 } 1181 }
1005 ipset_nest_end(skb, atd); 1182out:
1006 return 0; 1183 rcu_read_unlock();
1184 return ret;
1007} 1185}
1008 1186
1009static int 1187static int
1010IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, 1188IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
1011 const struct xt_action_param *par, 1189 const struct xt_action_param *par,
1012 enum ipset_adt adt, struct ip_set_adt_opt *opt); 1190 enum ipset_adt adt, struct ip_set_adt_opt *opt);
1013 1191
1014static int 1192static int
1015IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], 1193IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
1016 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried); 1194 enum ipset_adt adt, u32 *lineno, u32 flags,
1195 bool retried);
1017 1196
1018static const struct ip_set_type_variant mtype_variant = { 1197static const struct ip_set_type_variant mtype_variant = {
1019 .kadt = mtype_kadt, 1198 .kadt = mtype_kadt,
@@ -1027,6 +1206,7 @@ static const struct ip_set_type_variant mtype_variant = {
1027 .flush = mtype_flush, 1206 .flush = mtype_flush,
1028 .head = mtype_head, 1207 .head = mtype_head,
1029 .list = mtype_list, 1208 .list = mtype_list,
1209 .uref = mtype_uref,
1030 .resize = mtype_resize, 1210 .resize = mtype_resize,
1031 .same_set = mtype_same_set, 1211 .same_set = mtype_same_set,
1032}; 1212};
@@ -1045,7 +1225,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1045 u8 netmask; 1225 u8 netmask;
1046#endif 1226#endif
1047 size_t hsize; 1227 size_t hsize;
1048 struct HTYPE *h; 1228 struct htype *h;
1049 struct htable *t; 1229 struct htable *t;
1050 1230
1051#ifndef IP_SET_PROTO_UNDEF 1231#ifndef IP_SET_PROTO_UNDEF
@@ -1064,12 +1244,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1064 1244
1065 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || 1245 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
1066 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || 1246 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
1067#ifdef IP_SET_HASH_WITH_MARKMASK
1068 !ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK) ||
1069#endif
1070 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 1247 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
1071 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) 1248 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
1072 return -IPSET_ERR_PROTOCOL; 1249 return -IPSET_ERR_PROTOCOL;
1250#ifdef IP_SET_HASH_WITH_MARKMASK
1251 /* Separated condition in order to avoid directive in argument list */
1252 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK)))
1253 return -IPSET_ERR_PROTOCOL;
1254#endif
1073 1255
1074 if (tb[IPSET_ATTR_HASHSIZE]) { 1256 if (tb[IPSET_ATTR_HASHSIZE]) {
1075 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); 1257 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
@@ -1092,7 +1274,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1092#endif 1274#endif
1093#ifdef IP_SET_HASH_WITH_MARKMASK 1275#ifdef IP_SET_HASH_WITH_MARKMASK
1094 if (tb[IPSET_ATTR_MARKMASK]) { 1276 if (tb[IPSET_ATTR_MARKMASK]) {
1095 markmask = ntohl(nla_get_u32(tb[IPSET_ATTR_MARKMASK])); 1277 markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK]));
1096 1278
1097 if (markmask == 0) 1279 if (markmask == 0)
1098 return -IPSET_ERR_INVALID_MARKMASK; 1280 return -IPSET_ERR_INVALID_MARKMASK;
@@ -1165,3 +1347,5 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1165 return 0; 1347 return 0;
1166} 1348}
1167#endif /* IP_SET_EMIT_CREATE */ 1349#endif /* IP_SET_EMIT_CREATE */
1350
1351#undef HKEY_DATALEN
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 76959d79e9d1..9d6bf19f7b78 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -56,15 +56,15 @@ hash_ip4_data_equal(const struct hash_ip4_elem *e1,
56 return e1->ip == e2->ip; 56 return e1->ip == e2->ip;
57} 57}
58 58
59static inline bool 59static bool
60hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *e) 60hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *e)
61{ 61{
62 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip)) 62 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip))
63 goto nla_put_failure; 63 goto nla_put_failure;
64 return 0; 64 return false;
65 65
66nla_put_failure: 66nla_put_failure:
67 return 1; 67 return true;
68} 68}
69 69
70static inline void 70static inline void
@@ -74,7 +74,6 @@ hash_ip4_data_next(struct hash_ip4_elem *next, const struct hash_ip4_elem *e)
74} 74}
75 75
76#define MTYPE hash_ip4 76#define MTYPE hash_ip4
77#define PF 4
78#define HOST_MASK 32 77#define HOST_MASK 32
79#include "ip_set_hash_gen.h" 78#include "ip_set_hash_gen.h"
80 79
@@ -109,20 +108,17 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
109 u32 ip = 0, ip_to = 0, hosts; 108 u32 ip = 0, ip_to = 0, hosts;
110 int ret = 0; 109 int ret = 0;
111 110
112 if (unlikely(!tb[IPSET_ATTR_IP] ||
113 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
114 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
115 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
116 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
117 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
118 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
119 return -IPSET_ERR_PROTOCOL;
120
121 if (tb[IPSET_ATTR_LINENO]) 111 if (tb[IPSET_ATTR_LINENO])
122 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 112 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
123 113
124 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || 114 if (unlikely(!tb[IPSET_ATTR_IP]))
125 ip_set_get_extensions(set, tb, &ext); 115 return -IPSET_ERR_PROTOCOL;
116
117 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
118 if (ret)
119 return ret;
120
121 ret = ip_set_get_extensions(set, tb, &ext);
126 if (ret) 122 if (ret)
127 return ret; 123 return ret;
128 124
@@ -145,7 +141,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
145 } else if (tb[IPSET_ATTR_CIDR]) { 141 } else if (tb[IPSET_ATTR_CIDR]) {
146 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 142 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
147 143
148 if (!cidr || cidr > 32) 144 if (!cidr || cidr > HOST_MASK)
149 return -IPSET_ERR_INVALID_CIDR; 145 return -IPSET_ERR_INVALID_CIDR;
150 ip_set_mask_from_to(ip, ip_to, cidr); 146 ip_set_mask_from_to(ip, ip_to, cidr);
151 } 147 }
@@ -162,8 +158,8 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
162 158
163 if (ret && !ip_set_eexist(ret, flags)) 159 if (ret && !ip_set_eexist(ret, flags))
164 return ret; 160 return ret;
165 else 161
166 ret = 0; 162 ret = 0;
167 } 163 }
168 return ret; 164 return ret;
169} 165}
@@ -196,10 +192,10 @@ hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *e)
196{ 192{
197 if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6)) 193 if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6))
198 goto nla_put_failure; 194 goto nla_put_failure;
199 return 0; 195 return false;
200 196
201nla_put_failure: 197nla_put_failure:
202 return 1; 198 return true;
203} 199}
204 200
205static inline void 201static inline void
@@ -208,12 +204,9 @@ hash_ip6_data_next(struct hash_ip4_elem *next, const struct hash_ip6_elem *e)
208} 204}
209 205
210#undef MTYPE 206#undef MTYPE
211#undef PF
212#undef HOST_MASK 207#undef HOST_MASK
213#undef HKEY_DATALEN
214 208
215#define MTYPE hash_ip6 209#define MTYPE hash_ip6
216#define PF 6
217#define HOST_MASK 128 210#define HOST_MASK 128
218 211
219#define IP_SET_EMIT_CREATE 212#define IP_SET_EMIT_CREATE
@@ -247,22 +240,25 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
247 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 240 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
248 int ret; 241 int ret;
249 242
250 if (unlikely(!tb[IPSET_ATTR_IP] ||
251 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
252 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
253 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
254 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
255 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
256 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
257 tb[IPSET_ATTR_IP_TO] ||
258 tb[IPSET_ATTR_CIDR]))
259 return -IPSET_ERR_PROTOCOL;
260
261 if (tb[IPSET_ATTR_LINENO]) 243 if (tb[IPSET_ATTR_LINENO])
262 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 244 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
263 245
264 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || 246 if (unlikely(!tb[IPSET_ATTR_IP]))
265 ip_set_get_extensions(set, tb, &ext); 247 return -IPSET_ERR_PROTOCOL;
248 if (unlikely(tb[IPSET_ATTR_IP_TO]))
249 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
250 if (unlikely(tb[IPSET_ATTR_CIDR])) {
251 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
252
253 if (cidr != HOST_MASK)
254 return -IPSET_ERR_INVALID_CIDR;
255 }
256
257 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
258 if (ret)
259 return ret;
260
261 ret = ip_set_get_extensions(set, tb, &ext);
266 if (ret) 262 if (ret)
267 return ret; 263 return ret;
268 264
@@ -301,7 +297,8 @@ static struct ip_set_type hash_ip_type __read_mostly = {
301 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 297 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
302 [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, 298 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
303 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, 299 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
304 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, 300 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
301 .len = IPSET_MAX_COMMENT_SIZE },
305 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, 302 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
306 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, 303 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
307 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, 304 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -318,6 +315,7 @@ hash_ip_init(void)
318static void __exit 315static void __exit
319hash_ip_fini(void) 316hash_ip_fini(void)
320{ 317{
318 rcu_barrier();
321 ip_set_type_unregister(&hash_ip_type); 319 ip_set_type_unregister(&hash_ip_type);
322} 320}
323 321
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 7abf9788cfa8..a0695a2ab585 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -63,10 +63,10 @@ hash_ipmark4_data_list(struct sk_buff *skb,
63 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || 63 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
64 nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark))) 64 nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark)))
65 goto nla_put_failure; 65 goto nla_put_failure;
66 return 0; 66 return false;
67 67
68nla_put_failure: 68nla_put_failure:
69 return 1; 69 return true;
70} 70}
71 71
72static inline void 72static inline void
@@ -76,10 +76,8 @@ hash_ipmark4_data_next(struct hash_ipmark4_elem *next,
76 next->ip = d->ip; 76 next->ip = d->ip;
77} 77}
78 78
79#define MTYPE hash_ipmark4 79#define MTYPE hash_ipmark4
80#define PF 4 80#define HOST_MASK 32
81#define HOST_MASK 32
82#define HKEY_DATALEN sizeof(struct hash_ipmark4_elem)
83#include "ip_set_hash_gen.h" 81#include "ip_set_hash_gen.h"
84 82
85static int 83static int
@@ -110,25 +108,22 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
110 u32 ip, ip_to = 0; 108 u32 ip, ip_to = 0;
111 int ret; 109 int ret;
112 110
111 if (tb[IPSET_ATTR_LINENO])
112 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
113
113 if (unlikely(!tb[IPSET_ATTR_IP] || 114 if (unlikely(!tb[IPSET_ATTR_IP] ||
114 !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) || 115 !ip_set_attr_netorder(tb, IPSET_ATTR_MARK)))
115 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
116 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
117 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
118 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
119 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
120 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
121 return -IPSET_ERR_PROTOCOL; 116 return -IPSET_ERR_PROTOCOL;
122 117
123 if (tb[IPSET_ATTR_LINENO]) 118 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip);
124 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 119 if (ret)
120 return ret;
125 121
126 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) || 122 ret = ip_set_get_extensions(set, tb, &ext);
127 ip_set_get_extensions(set, tb, &ext);
128 if (ret) 123 if (ret)
129 return ret; 124 return ret;
130 125
131 e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK])); 126 e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK]));
132 e.mark &= h->markmask; 127 e.mark &= h->markmask;
133 128
134 if (adt == IPSET_TEST || 129 if (adt == IPSET_TEST ||
@@ -147,7 +142,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
147 } else if (tb[IPSET_ATTR_CIDR]) { 142 } else if (tb[IPSET_ATTR_CIDR]) {
148 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 143 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
149 144
150 if (!cidr || cidr > 32) 145 if (!cidr || cidr > HOST_MASK)
151 return -IPSET_ERR_INVALID_CIDR; 146 return -IPSET_ERR_INVALID_CIDR;
152 ip_set_mask_from_to(ip, ip_to, cidr); 147 ip_set_mask_from_to(ip, ip_to, cidr);
153 } 148 }
@@ -160,8 +155,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
160 155
161 if (ret && !ip_set_eexist(ret, flags)) 156 if (ret && !ip_set_eexist(ret, flags))
162 return ret; 157 return ret;
163 else 158
164 ret = 0; 159 ret = 0;
165 } 160 }
166 return ret; 161 return ret;
167} 162}
@@ -191,10 +186,10 @@ hash_ipmark6_data_list(struct sk_buff *skb,
191 if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || 186 if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) ||
192 nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark))) 187 nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark)))
193 goto nla_put_failure; 188 goto nla_put_failure;
194 return 0; 189 return false;
195 190
196nla_put_failure: 191nla_put_failure:
197 return 1; 192 return true;
198} 193}
199 194
200static inline void 195static inline void
@@ -204,18 +199,13 @@ hash_ipmark6_data_next(struct hash_ipmark4_elem *next,
204} 199}
205 200
206#undef MTYPE 201#undef MTYPE
207#undef PF
208#undef HOST_MASK 202#undef HOST_MASK
209#undef HKEY_DATALEN
210 203
211#define MTYPE hash_ipmark6 204#define MTYPE hash_ipmark6
212#define PF 6
213#define HOST_MASK 128 205#define HOST_MASK 128
214#define HKEY_DATALEN sizeof(struct hash_ipmark6_elem) 206#define IP_SET_EMIT_CREATE
215#define IP_SET_EMIT_CREATE
216#include "ip_set_hash_gen.h" 207#include "ip_set_hash_gen.h"
217 208
218
219static int 209static int
220hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb, 210hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb,
221 const struct xt_action_param *par, 211 const struct xt_action_param *par,
@@ -243,27 +233,30 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
243 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 233 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
244 int ret; 234 int ret;
245 235
236 if (tb[IPSET_ATTR_LINENO])
237 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
238
246 if (unlikely(!tb[IPSET_ATTR_IP] || 239 if (unlikely(!tb[IPSET_ATTR_IP] ||
247 !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) || 240 !ip_set_attr_netorder(tb, IPSET_ATTR_MARK)))
248 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
249 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
250 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
251 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
252 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
253 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
254 tb[IPSET_ATTR_IP_TO] ||
255 tb[IPSET_ATTR_CIDR]))
256 return -IPSET_ERR_PROTOCOL; 241 return -IPSET_ERR_PROTOCOL;
242 if (unlikely(tb[IPSET_ATTR_IP_TO]))
243 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
244 if (unlikely(tb[IPSET_ATTR_CIDR])) {
245 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
257 246
258 if (tb[IPSET_ATTR_LINENO]) 247 if (cidr != HOST_MASK)
259 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 248 return -IPSET_ERR_INVALID_CIDR;
249 }
260 250
261 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || 251 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
262 ip_set_get_extensions(set, tb, &ext);
263 if (ret) 252 if (ret)
264 return ret; 253 return ret;
265 254
266 e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK])); 255 ret = ip_set_get_extensions(set, tb, &ext);
256 if (ret)
257 return ret;
258
259 e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK]));
267 e.mark &= h->markmask; 260 e.mark &= h->markmask;
268 261
269 if (adt == IPSET_TEST) { 262 if (adt == IPSET_TEST) {
@@ -274,10 +267,8 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
274 ret = adtfn(set, &e, &ext, &ext, flags); 267 ret = adtfn(set, &e, &ext, &ext, flags);
275 if (ret && !ip_set_eexist(ret, flags)) 268 if (ret && !ip_set_eexist(ret, flags))
276 return ret; 269 return ret;
277 else
278 ret = 0;
279 270
280 return ret; 271 return 0;
281} 272}
282 273
283static struct ip_set_type hash_ipmark_type __read_mostly = { 274static struct ip_set_type hash_ipmark_type __read_mostly = {
@@ -307,7 +298,8 @@ static struct ip_set_type hash_ipmark_type __read_mostly = {
307 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 298 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
308 [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, 299 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
309 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, 300 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
310 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, 301 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
302 .len = IPSET_MAX_COMMENT_SIZE },
311 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, 303 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
312 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, 304 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
313 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, 305 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -324,6 +316,7 @@ hash_ipmark_init(void)
324static void __exit 316static void __exit
325hash_ipmark_fini(void) 317hash_ipmark_fini(void)
326{ 318{
319 rcu_barrier();
327 ip_set_type_unregister(&hash_ipmark_type); 320 ip_set_type_unregister(&hash_ipmark_type);
328} 321}
329 322
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index dcbcceb9a52f..9d84b3dff603 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -69,10 +69,10 @@ hash_ipport4_data_list(struct sk_buff *skb,
69 nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || 69 nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
70 nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) 70 nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto))
71 goto nla_put_failure; 71 goto nla_put_failure;
72 return 0; 72 return false;
73 73
74nla_put_failure: 74nla_put_failure:
75 return 1; 75 return true;
76} 76}
77 77
78static inline void 78static inline void
@@ -83,10 +83,8 @@ hash_ipport4_data_next(struct hash_ipport4_elem *next,
83 next->port = d->port; 83 next->port = d->port;
84} 84}
85 85
86#define MTYPE hash_ipport4 86#define MTYPE hash_ipport4
87#define PF 4 87#define HOST_MASK 32
88#define HOST_MASK 32
89#define HKEY_DATALEN sizeof(struct hash_ipport4_elem)
90#include "ip_set_hash_gen.h" 88#include "ip_set_hash_gen.h"
91 89
92static int 90static int
@@ -118,29 +116,23 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
118 bool with_ports = false; 116 bool with_ports = false;
119 int ret; 117 int ret;
120 118
119 if (tb[IPSET_ATTR_LINENO])
120 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
121
121 if (unlikely(!tb[IPSET_ATTR_IP] || 122 if (unlikely(!tb[IPSET_ATTR_IP] ||
122 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 123 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
123 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 124 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
124 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
125 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
126 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
127 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
128 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
129 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
130 return -IPSET_ERR_PROTOCOL; 125 return -IPSET_ERR_PROTOCOL;
131 126
132 if (tb[IPSET_ATTR_LINENO]) 127 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip);
133 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 128 if (ret)
129 return ret;
134 130
135 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) || 131 ret = ip_set_get_extensions(set, tb, &ext);
136 ip_set_get_extensions(set, tb, &ext);
137 if (ret) 132 if (ret)
138 return ret; 133 return ret;
139 134
140 if (tb[IPSET_ATTR_PORT]) 135 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
141 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
142 else
143 return -IPSET_ERR_PROTOCOL;
144 136
145 if (tb[IPSET_ATTR_PROTO]) { 137 if (tb[IPSET_ATTR_PROTO]) {
146 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); 138 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -148,8 +140,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
148 140
149 if (e.proto == 0) 141 if (e.proto == 0)
150 return -IPSET_ERR_INVALID_PROTO; 142 return -IPSET_ERR_INVALID_PROTO;
151 } else 143 } else {
152 return -IPSET_ERR_MISSING_PROTO; 144 return -IPSET_ERR_MISSING_PROTO;
145 }
153 146
154 if (!(with_ports || e.proto == IPPROTO_ICMP)) 147 if (!(with_ports || e.proto == IPPROTO_ICMP))
155 e.port = 0; 148 e.port = 0;
@@ -171,7 +164,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
171 } else if (tb[IPSET_ATTR_CIDR]) { 164 } else if (tb[IPSET_ATTR_CIDR]) {
172 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 165 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
173 166
174 if (!cidr || cidr > 32) 167 if (!cidr || cidr > HOST_MASK)
175 return -IPSET_ERR_INVALID_CIDR; 168 return -IPSET_ERR_INVALID_CIDR;
176 ip_set_mask_from_to(ip, ip_to, cidr); 169 ip_set_mask_from_to(ip, ip_to, cidr);
177 } 170 }
@@ -195,8 +188,8 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
195 188
196 if (ret && !ip_set_eexist(ret, flags)) 189 if (ret && !ip_set_eexist(ret, flags))
197 return ret; 190 return ret;
198 else 191
199 ret = 0; 192 ret = 0;
200 } 193 }
201 } 194 }
202 return ret; 195 return ret;
@@ -231,10 +224,10 @@ hash_ipport6_data_list(struct sk_buff *skb,
231 nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || 224 nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
232 nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) 225 nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto))
233 goto nla_put_failure; 226 goto nla_put_failure;
234 return 0; 227 return false;
235 228
236nla_put_failure: 229nla_put_failure:
237 return 1; 230 return true;
238} 231}
239 232
240static inline void 233static inline void
@@ -245,15 +238,11 @@ hash_ipport6_data_next(struct hash_ipport4_elem *next,
245} 238}
246 239
247#undef MTYPE 240#undef MTYPE
248#undef PF
249#undef HOST_MASK 241#undef HOST_MASK
250#undef HKEY_DATALEN
251 242
252#define MTYPE hash_ipport6 243#define MTYPE hash_ipport6
253#define PF 6
254#define HOST_MASK 128 244#define HOST_MASK 128
255#define HKEY_DATALEN sizeof(struct hash_ipport6_elem) 245#define IP_SET_EMIT_CREATE
256#define IP_SET_EMIT_CREATE
257#include "ip_set_hash_gen.h" 246#include "ip_set_hash_gen.h"
258 247
259static int 248static int
@@ -285,31 +274,31 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
285 bool with_ports = false; 274 bool with_ports = false;
286 int ret; 275 int ret;
287 276
277 if (tb[IPSET_ATTR_LINENO])
278 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
279
288 if (unlikely(!tb[IPSET_ATTR_IP] || 280 if (unlikely(!tb[IPSET_ATTR_IP] ||
289 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 281 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
290 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 282 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
291 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
292 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
293 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
294 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
295 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
296 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
297 tb[IPSET_ATTR_IP_TO] ||
298 tb[IPSET_ATTR_CIDR]))
299 return -IPSET_ERR_PROTOCOL; 283 return -IPSET_ERR_PROTOCOL;
284 if (unlikely(tb[IPSET_ATTR_IP_TO]))
285 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
286 if (unlikely(tb[IPSET_ATTR_CIDR])) {
287 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
300 288
301 if (tb[IPSET_ATTR_LINENO]) 289 if (cidr != HOST_MASK)
302 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 290 return -IPSET_ERR_INVALID_CIDR;
291 }
303 292
304 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || 293 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
305 ip_set_get_extensions(set, tb, &ext);
306 if (ret) 294 if (ret)
307 return ret; 295 return ret;
308 296
309 if (tb[IPSET_ATTR_PORT]) 297 ret = ip_set_get_extensions(set, tb, &ext);
310 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); 298 if (ret)
311 else 299 return ret;
312 return -IPSET_ERR_PROTOCOL; 300
301 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
313 302
314 if (tb[IPSET_ATTR_PROTO]) { 303 if (tb[IPSET_ATTR_PROTO]) {
315 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); 304 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -317,8 +306,9 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
317 306
318 if (e.proto == 0) 307 if (e.proto == 0)
319 return -IPSET_ERR_INVALID_PROTO; 308 return -IPSET_ERR_INVALID_PROTO;
320 } else 309 } else {
321 return -IPSET_ERR_MISSING_PROTO; 310 return -IPSET_ERR_MISSING_PROTO;
311 }
322 312
323 if (!(with_ports || e.proto == IPPROTO_ICMPV6)) 313 if (!(with_ports || e.proto == IPPROTO_ICMPV6))
324 e.port = 0; 314 e.port = 0;
@@ -341,8 +331,8 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
341 331
342 if (ret && !ip_set_eexist(ret, flags)) 332 if (ret && !ip_set_eexist(ret, flags))
343 return ret; 333 return ret;
344 else 334
345 ret = 0; 335 ret = 0;
346 } 336 }
347 return ret; 337 return ret;
348} 338}
@@ -376,7 +366,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
376 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 366 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
377 [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, 367 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
378 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, 368 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
379 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, 369 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
370 .len = IPSET_MAX_COMMENT_SIZE },
380 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, 371 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
381 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, 372 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
382 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, 373 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -393,6 +384,7 @@ hash_ipport_init(void)
393static void __exit 384static void __exit
394hash_ipport_fini(void) 385hash_ipport_fini(void)
395{ 386{
387 rcu_barrier();
396 ip_set_type_unregister(&hash_ipport_type); 388 ip_set_type_unregister(&hash_ipport_type);
397} 389}
398 390
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 7ef93fc887a1..215b7b942038 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -63,17 +63,17 @@ hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
63 63
64static bool 64static bool
65hash_ipportip4_data_list(struct sk_buff *skb, 65hash_ipportip4_data_list(struct sk_buff *skb,
66 const struct hash_ipportip4_elem *data) 66 const struct hash_ipportip4_elem *data)
67{ 67{
68 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || 68 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
69 nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip2) || 69 nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip2) ||
70 nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || 70 nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
71 nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) 71 nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto))
72 goto nla_put_failure; 72 goto nla_put_failure;
73 return 0; 73 return false;
74 74
75nla_put_failure: 75nla_put_failure:
76 return 1; 76 return true;
77} 77}
78 78
79static inline void 79static inline void
@@ -86,7 +86,6 @@ hash_ipportip4_data_next(struct hash_ipportip4_elem *next,
86 86
87/* Common functions */ 87/* Common functions */
88#define MTYPE hash_ipportip4 88#define MTYPE hash_ipportip4
89#define PF 4
90#define HOST_MASK 32 89#define HOST_MASK 32
91#include "ip_set_hash_gen.h" 90#include "ip_set_hash_gen.h"
92 91
@@ -120,22 +119,19 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
120 bool with_ports = false; 119 bool with_ports = false;
121 int ret; 120 int ret;
122 121
122 if (tb[IPSET_ATTR_LINENO])
123 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
124
123 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || 125 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
124 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 126 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
125 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 127 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
126 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
127 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
128 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
129 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
130 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
131 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
132 return -IPSET_ERR_PROTOCOL; 128 return -IPSET_ERR_PROTOCOL;
133 129
134 if (tb[IPSET_ATTR_LINENO]) 130 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip);
135 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 131 if (ret)
132 return ret;
136 133
137 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) || 134 ret = ip_set_get_extensions(set, tb, &ext);
138 ip_set_get_extensions(set, tb, &ext);
139 if (ret) 135 if (ret)
140 return ret; 136 return ret;
141 137
@@ -143,10 +139,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
143 if (ret) 139 if (ret)
144 return ret; 140 return ret;
145 141
146 if (tb[IPSET_ATTR_PORT]) 142 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
147 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
148 else
149 return -IPSET_ERR_PROTOCOL;
150 143
151 if (tb[IPSET_ATTR_PROTO]) { 144 if (tb[IPSET_ATTR_PROTO]) {
152 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); 145 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -154,8 +147,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
154 147
155 if (e.proto == 0) 148 if (e.proto == 0)
156 return -IPSET_ERR_INVALID_PROTO; 149 return -IPSET_ERR_INVALID_PROTO;
157 } else 150 } else {
158 return -IPSET_ERR_MISSING_PROTO; 151 return -IPSET_ERR_MISSING_PROTO;
152 }
159 153
160 if (!(with_ports || e.proto == IPPROTO_ICMP)) 154 if (!(with_ports || e.proto == IPPROTO_ICMP))
161 e.port = 0; 155 e.port = 0;
@@ -177,7 +171,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
177 } else if (tb[IPSET_ATTR_CIDR]) { 171 } else if (tb[IPSET_ATTR_CIDR]) {
178 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 172 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
179 173
180 if (!cidr || cidr > 32) 174 if (!cidr || cidr > HOST_MASK)
181 return -IPSET_ERR_INVALID_CIDR; 175 return -IPSET_ERR_INVALID_CIDR;
182 ip_set_mask_from_to(ip, ip_to, cidr); 176 ip_set_mask_from_to(ip, ip_to, cidr);
183 } 177 }
@@ -201,8 +195,8 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
201 195
202 if (ret && !ip_set_eexist(ret, flags)) 196 if (ret && !ip_set_eexist(ret, flags))
203 return ret; 197 return ret;
204 else 198
205 ret = 0; 199 ret = 0;
206 } 200 }
207 } 201 }
208 return ret; 202 return ret;
@@ -240,10 +234,10 @@ hash_ipportip6_data_list(struct sk_buff *skb,
240 nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || 234 nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
241 nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) 235 nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto))
242 goto nla_put_failure; 236 goto nla_put_failure;
243 return 0; 237 return false;
244 238
245nla_put_failure: 239nla_put_failure:
246 return 1; 240 return true;
247} 241}
248 242
249static inline void 243static inline void
@@ -254,11 +248,9 @@ hash_ipportip6_data_next(struct hash_ipportip4_elem *next,
254} 248}
255 249
256#undef MTYPE 250#undef MTYPE
257#undef PF
258#undef HOST_MASK 251#undef HOST_MASK
259 252
260#define MTYPE hash_ipportip6 253#define MTYPE hash_ipportip6
261#define PF 6
262#define HOST_MASK 128 254#define HOST_MASK 128
263#define IP_SET_EMIT_CREATE 255#define IP_SET_EMIT_CREATE
264#include "ip_set_hash_gen.h" 256#include "ip_set_hash_gen.h"
@@ -293,24 +285,27 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
293 bool with_ports = false; 285 bool with_ports = false;
294 int ret; 286 int ret;
295 287
288 if (tb[IPSET_ATTR_LINENO])
289 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
290
296 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || 291 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
297 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 292 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
298 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 293 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
299 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
300 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
301 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
302 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
303 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
304 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
305 tb[IPSET_ATTR_IP_TO] ||
306 tb[IPSET_ATTR_CIDR]))
307 return -IPSET_ERR_PROTOCOL; 294 return -IPSET_ERR_PROTOCOL;
295 if (unlikely(tb[IPSET_ATTR_IP_TO]))
296 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
297 if (unlikely(tb[IPSET_ATTR_CIDR])) {
298 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
308 299
309 if (tb[IPSET_ATTR_LINENO]) 300 if (cidr != HOST_MASK)
310 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 301 return -IPSET_ERR_INVALID_CIDR;
302 }
311 303
312 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || 304 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
313 ip_set_get_extensions(set, tb, &ext); 305 if (ret)
306 return ret;
307
308 ret = ip_set_get_extensions(set, tb, &ext);
314 if (ret) 309 if (ret)
315 return ret; 310 return ret;
316 311
@@ -318,10 +313,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
318 if (ret) 313 if (ret)
319 return ret; 314 return ret;
320 315
321 if (tb[IPSET_ATTR_PORT]) 316 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
322 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
323 else
324 return -IPSET_ERR_PROTOCOL;
325 317
326 if (tb[IPSET_ATTR_PROTO]) { 318 if (tb[IPSET_ATTR_PROTO]) {
327 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); 319 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -329,8 +321,9 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
329 321
330 if (e.proto == 0) 322 if (e.proto == 0)
331 return -IPSET_ERR_INVALID_PROTO; 323 return -IPSET_ERR_INVALID_PROTO;
332 } else 324 } else {
333 return -IPSET_ERR_MISSING_PROTO; 325 return -IPSET_ERR_MISSING_PROTO;
326 }
334 327
335 if (!(with_ports || e.proto == IPPROTO_ICMPV6)) 328 if (!(with_ports || e.proto == IPPROTO_ICMPV6))
336 e.port = 0; 329 e.port = 0;
@@ -353,8 +346,8 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
353 346
354 if (ret && !ip_set_eexist(ret, flags)) 347 if (ret && !ip_set_eexist(ret, flags))
355 return ret; 348 return ret;
356 else 349
357 ret = 0; 350 ret = 0;
358 } 351 }
359 return ret; 352 return ret;
360} 353}
@@ -388,7 +381,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
388 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 381 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
389 [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, 382 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
390 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, 383 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
391 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, 384 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
385 .len = IPSET_MAX_COMMENT_SIZE },
392 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, 386 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
393 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, 387 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
394 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, 388 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -405,6 +399,7 @@ hash_ipportip_init(void)
405static void __exit 399static void __exit
406hash_ipportip_fini(void) 400hash_ipportip_fini(void)
407{ 401{
402 rcu_barrier();
408 ip_set_type_unregister(&hash_ipportip_type); 403 ip_set_type_unregister(&hash_ipportip_type);
409} 404}
410 405
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index b6012ad92781..9ca719625ea3 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -114,10 +114,10 @@ hash_ipportnet4_data_list(struct sk_buff *skb,
114 (flags && 114 (flags &&
115 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) 115 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
116 goto nla_put_failure; 116 goto nla_put_failure;
117 return 0; 117 return false;
118 118
119nla_put_failure: 119nla_put_failure:
120 return 1; 120 return true;
121} 121}
122 122
123static inline void 123static inline void
@@ -130,7 +130,6 @@ hash_ipportnet4_data_next(struct hash_ipportnet4_elem *next,
130} 130}
131 131
132#define MTYPE hash_ipportnet4 132#define MTYPE hash_ipportnet4
133#define PF 4
134#define HOST_MASK 32 133#define HOST_MASK 32
135#include "ip_set_hash_gen.h" 134#include "ip_set_hash_gen.h"
136 135
@@ -142,7 +141,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
142 const struct hash_ipportnet *h = set->data; 141 const struct hash_ipportnet *h = set->data;
143 ipset_adtfn adtfn = set->variant->adt[adt]; 142 ipset_adtfn adtfn = set->variant->adt[adt];
144 struct hash_ipportnet4_elem e = { 143 struct hash_ipportnet4_elem e = {
145 .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, 144 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
146 }; 145 };
147 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 146 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
148 147
@@ -174,23 +173,20 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
174 u8 cidr; 173 u8 cidr;
175 int ret; 174 int ret;
176 175
176 if (tb[IPSET_ATTR_LINENO])
177 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
178
177 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || 179 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
178 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 180 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
179 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 181 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
180 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 182 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
181 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
182 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
183 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
184 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
185 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
186 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
187 return -IPSET_ERR_PROTOCOL; 183 return -IPSET_ERR_PROTOCOL;
188 184
189 if (tb[IPSET_ATTR_LINENO]) 185 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
190 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 186 if (ret)
187 return ret;
191 188
192 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || 189 ret = ip_set_get_extensions(set, tb, &ext);
193 ip_set_get_extensions(set, tb, &ext);
194 if (ret) 190 if (ret)
195 return ret; 191 return ret;
196 192
@@ -205,10 +201,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
205 e.cidr = cidr - 1; 201 e.cidr = cidr - 1;
206 } 202 }
207 203
208 if (tb[IPSET_ATTR_PORT]) 204 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
209 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
210 else
211 return -IPSET_ERR_PROTOCOL;
212 205
213 if (tb[IPSET_ATTR_PROTO]) { 206 if (tb[IPSET_ATTR_PROTO]) {
214 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); 207 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -216,14 +209,16 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
216 209
217 if (e.proto == 0) 210 if (e.proto == 0)
218 return -IPSET_ERR_INVALID_PROTO; 211 return -IPSET_ERR_INVALID_PROTO;
219 } else 212 } else {
220 return -IPSET_ERR_MISSING_PROTO; 213 return -IPSET_ERR_MISSING_PROTO;
214 }
221 215
222 if (!(with_ports || e.proto == IPPROTO_ICMP)) 216 if (!(with_ports || e.proto == IPPROTO_ICMP))
223 e.port = 0; 217 e.port = 0;
224 218
225 if (tb[IPSET_ATTR_CADT_FLAGS]) { 219 if (tb[IPSET_ATTR_CADT_FLAGS]) {
226 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 220 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
221
227 if (cadt_flags & IPSET_FLAG_NOMATCH) 222 if (cadt_flags & IPSET_FLAG_NOMATCH)
228 flags |= (IPSET_FLAG_NOMATCH << 16); 223 flags |= (IPSET_FLAG_NOMATCH << 16);
229 } 224 }
@@ -249,7 +244,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
249 } else if (tb[IPSET_ATTR_CIDR]) { 244 } else if (tb[IPSET_ATTR_CIDR]) {
250 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 245 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
251 246
252 if (!cidr || cidr > 32) 247 if (!cidr || cidr > HOST_MASK)
253 return -IPSET_ERR_INVALID_CIDR; 248 return -IPSET_ERR_INVALID_CIDR;
254 ip_set_mask_from_to(ip, ip_to, cidr); 249 ip_set_mask_from_to(ip, ip_to, cidr);
255 } 250 }
@@ -270,8 +265,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
270 swap(ip2_from, ip2_to); 265 swap(ip2_from, ip2_to);
271 if (ip2_from + UINT_MAX == ip2_to) 266 if (ip2_from + UINT_MAX == ip2_to)
272 return -IPSET_ERR_HASH_RANGE; 267 return -IPSET_ERR_HASH_RANGE;
273 } else 268 } else {
274 ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1); 269 ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1);
270 }
275 271
276 if (retried) 272 if (retried)
277 ip = ntohl(h->next.ip); 273 ip = ntohl(h->next.ip);
@@ -294,8 +290,8 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
294 290
295 if (ret && !ip_set_eexist(ret, flags)) 291 if (ret && !ip_set_eexist(ret, flags))
296 return ret; 292 return ret;
297 else 293
298 ret = 0; 294 ret = 0;
299 ip2 = ip2_last + 1; 295 ip2 = ip2_last + 1;
300 } 296 }
301 } 297 }
@@ -367,10 +363,10 @@ hash_ipportnet6_data_list(struct sk_buff *skb,
367 (flags && 363 (flags &&
368 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) 364 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
369 goto nla_put_failure; 365 goto nla_put_failure;
370 return 0; 366 return false;
371 367
372nla_put_failure: 368nla_put_failure:
373 return 1; 369 return true;
374} 370}
375 371
376static inline void 372static inline void
@@ -381,11 +377,9 @@ hash_ipportnet6_data_next(struct hash_ipportnet4_elem *next,
381} 377}
382 378
383#undef MTYPE 379#undef MTYPE
384#undef PF
385#undef HOST_MASK 380#undef HOST_MASK
386 381
387#define MTYPE hash_ipportnet6 382#define MTYPE hash_ipportnet6
388#define PF 6
389#define HOST_MASK 128 383#define HOST_MASK 128
390#define IP_SET_EMIT_CREATE 384#define IP_SET_EMIT_CREATE
391#include "ip_set_hash_gen.h" 385#include "ip_set_hash_gen.h"
@@ -398,7 +392,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
398 const struct hash_ipportnet *h = set->data; 392 const struct hash_ipportnet *h = set->data;
399 ipset_adtfn adtfn = set->variant->adt[adt]; 393 ipset_adtfn adtfn = set->variant->adt[adt];
400 struct hash_ipportnet6_elem e = { 394 struct hash_ipportnet6_elem e = {
401 .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, 395 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
402 }; 396 };
403 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 397 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
404 398
@@ -429,27 +423,28 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
429 u8 cidr; 423 u8 cidr;
430 int ret; 424 int ret;
431 425
426 if (tb[IPSET_ATTR_LINENO])
427 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
428
432 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || 429 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
433 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 430 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
434 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 431 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
435 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 432 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
436 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
437 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
438 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
439 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
440 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
441 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
442 tb[IPSET_ATTR_IP_TO] ||
443 tb[IPSET_ATTR_CIDR]))
444 return -IPSET_ERR_PROTOCOL; 433 return -IPSET_ERR_PROTOCOL;
445 if (unlikely(tb[IPSET_ATTR_IP_TO])) 434 if (unlikely(tb[IPSET_ATTR_IP_TO]))
446 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; 435 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
436 if (unlikely(tb[IPSET_ATTR_CIDR])) {
437 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
447 438
448 if (tb[IPSET_ATTR_LINENO]) 439 if (cidr != HOST_MASK)
449 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 440 return -IPSET_ERR_INVALID_CIDR;
441 }
450 442
451 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || 443 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
452 ip_set_get_extensions(set, tb, &ext); 444 if (ret)
445 return ret;
446
447 ret = ip_set_get_extensions(set, tb, &ext);
453 if (ret) 448 if (ret)
454 return ret; 449 return ret;
455 450
@@ -466,10 +461,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
466 461
467 ip6_netmask(&e.ip2, e.cidr + 1); 462 ip6_netmask(&e.ip2, e.cidr + 1);
468 463
469 if (tb[IPSET_ATTR_PORT]) 464 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
470 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
471 else
472 return -IPSET_ERR_PROTOCOL;
473 465
474 if (tb[IPSET_ATTR_PROTO]) { 466 if (tb[IPSET_ATTR_PROTO]) {
475 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); 467 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -477,14 +469,16 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
477 469
478 if (e.proto == 0) 470 if (e.proto == 0)
479 return -IPSET_ERR_INVALID_PROTO; 471 return -IPSET_ERR_INVALID_PROTO;
480 } else 472 } else {
481 return -IPSET_ERR_MISSING_PROTO; 473 return -IPSET_ERR_MISSING_PROTO;
474 }
482 475
483 if (!(with_ports || e.proto == IPPROTO_ICMPV6)) 476 if (!(with_ports || e.proto == IPPROTO_ICMPV6))
484 e.port = 0; 477 e.port = 0;
485 478
486 if (tb[IPSET_ATTR_CADT_FLAGS]) { 479 if (tb[IPSET_ATTR_CADT_FLAGS]) {
487 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 480 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
481
488 if (cadt_flags & IPSET_FLAG_NOMATCH) 482 if (cadt_flags & IPSET_FLAG_NOMATCH)
489 flags |= (IPSET_FLAG_NOMATCH << 16); 483 flags |= (IPSET_FLAG_NOMATCH << 16);
490 } 484 }
@@ -508,8 +502,8 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
508 502
509 if (ret && !ip_set_eexist(ret, flags)) 503 if (ret && !ip_set_eexist(ret, flags))
510 return ret; 504 return ret;
511 else 505
512 ret = 0; 506 ret = 0;
513 } 507 }
514 return ret; 508 return ret;
515} 509}
@@ -547,7 +541,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
547 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 541 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
548 [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, 542 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
549 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, 543 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
550 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, 544 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
545 .len = IPSET_MAX_COMMENT_SIZE },
551 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, 546 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
552 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, 547 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
553 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, 548 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -564,6 +559,7 @@ hash_ipportnet_init(void)
564static void __exit 559static void __exit
565hash_ipportnet_fini(void) 560hash_ipportnet_fini(void)
566{ 561{
562 rcu_barrier();
567 ip_set_type_unregister(&hash_ipportnet_type); 563 ip_set_type_unregister(&hash_ipportnet_type);
568} 564}
569 565
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index 65690b52a4d5..f1e7d2c0f685 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -52,7 +52,12 @@ hash_mac4_data_equal(const struct hash_mac4_elem *e1,
52static inline bool 52static inline bool
53hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e) 53hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e)
54{ 54{
55 return nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether); 55 if (nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether))
56 goto nla_put_failure;
57 return false;
58
59nla_put_failure:
60 return true;
56} 61}
57 62
58static inline void 63static inline void
@@ -62,7 +67,6 @@ hash_mac4_data_next(struct hash_mac4_elem *next,
62} 67}
63 68
64#define MTYPE hash_mac4 69#define MTYPE hash_mac4
65#define PF 4
66#define HOST_MASK 32 70#define HOST_MASK 32
67#define IP_SET_EMIT_CREATE 71#define IP_SET_EMIT_CREATE
68#define IP_SET_PROTO_UNDEF 72#define IP_SET_PROTO_UNDEF
@@ -85,10 +89,10 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
85 return 0; 89 return 0;
86 90
87 if (skb_mac_header(skb) < skb->head || 91 if (skb_mac_header(skb) < skb->head ||
88 (skb_mac_header(skb) + ETH_HLEN) > skb->data) 92 (skb_mac_header(skb) + ETH_HLEN) > skb->data)
89 return -EINVAL; 93 return -EINVAL;
90 94
91 memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN); 95 ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
92 if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0) 96 if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
93 return -EINVAL; 97 return -EINVAL;
94 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); 98 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
@@ -103,22 +107,16 @@ hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[],
103 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 107 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
104 int ret; 108 int ret;
105 109
106 if (unlikely(!tb[IPSET_ATTR_ETHER] ||
107 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
108 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
109 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
110 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
111 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
112 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
113 return -IPSET_ERR_PROTOCOL;
114
115 if (tb[IPSET_ATTR_LINENO]) 110 if (tb[IPSET_ATTR_LINENO])
116 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 111 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
117 112
113 if (unlikely(!tb[IPSET_ATTR_ETHER]))
114 return -IPSET_ERR_PROTOCOL;
115
118 ret = ip_set_get_extensions(set, tb, &ext); 116 ret = ip_set_get_extensions(set, tb, &ext);
119 if (ret) 117 if (ret)
120 return ret; 118 return ret;
121 memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); 119 ether_addr_copy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]));
122 if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0) 120 if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
123 return -IPSET_ERR_HASH_ELEM; 121 return -IPSET_ERR_HASH_ELEM;
124 122
@@ -149,7 +147,8 @@ static struct ip_set_type hash_mac_type __read_mostly = {
149 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 147 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
150 [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, 148 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
151 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, 149 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
152 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, 150 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
151 .len = IPSET_MAX_COMMENT_SIZE },
153 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, 152 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
154 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, 153 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
155 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, 154 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -166,6 +165,7 @@ hash_mac_init(void)
166static void __exit 165static void __exit
167hash_mac_fini(void) 166hash_mac_fini(void)
168{ 167{
168 rcu_barrier();
169 ip_set_type_unregister(&hash_mac_type); 169 ip_set_type_unregister(&hash_mac_type);
170} 170}
171 171
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 6b3ac10ac2f1..3e4bffdc1cc0 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -95,10 +95,10 @@ hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data)
95 (flags && 95 (flags &&
96 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) 96 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
97 goto nla_put_failure; 97 goto nla_put_failure;
98 return 0; 98 return false;
99 99
100nla_put_failure: 100nla_put_failure:
101 return 1; 101 return true;
102} 102}
103 103
104static inline void 104static inline void
@@ -109,7 +109,6 @@ hash_net4_data_next(struct hash_net4_elem *next,
109} 109}
110 110
111#define MTYPE hash_net4 111#define MTYPE hash_net4
112#define PF 4
113#define HOST_MASK 32 112#define HOST_MASK 32
114#include "ip_set_hash_gen.h" 113#include "ip_set_hash_gen.h"
115 114
@@ -121,7 +120,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
121 const struct hash_net *h = set->data; 120 const struct hash_net *h = set->data;
122 ipset_adtfn adtfn = set->variant->adt[adt]; 121 ipset_adtfn adtfn = set->variant->adt[adt];
123 struct hash_net4_elem e = { 122 struct hash_net4_elem e = {
124 .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), 123 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
125 }; 124 };
126 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 125 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
127 126
@@ -147,21 +146,18 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
147 u32 ip = 0, ip_to = 0, last; 146 u32 ip = 0, ip_to = 0, last;
148 int ret; 147 int ret;
149 148
149 if (tb[IPSET_ATTR_LINENO])
150 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
151
150 if (unlikely(!tb[IPSET_ATTR_IP] || 152 if (unlikely(!tb[IPSET_ATTR_IP] ||
151 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 153 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
152 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
153 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
154 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
155 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
156 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
157 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
158 return -IPSET_ERR_PROTOCOL; 154 return -IPSET_ERR_PROTOCOL;
159 155
160 if (tb[IPSET_ATTR_LINENO]) 156 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
161 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 157 if (ret)
158 return ret;
162 159
163 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || 160 ret = ip_set_get_extensions(set, tb, &ext);
164 ip_set_get_extensions(set, tb, &ext);
165 if (ret) 161 if (ret)
166 return ret; 162 return ret;
167 163
@@ -173,6 +169,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
173 169
174 if (tb[IPSET_ATTR_CADT_FLAGS]) { 170 if (tb[IPSET_ATTR_CADT_FLAGS]) {
175 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 171 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
172
176 if (cadt_flags & IPSET_FLAG_NOMATCH) 173 if (cadt_flags & IPSET_FLAG_NOMATCH)
177 flags |= (IPSET_FLAG_NOMATCH << 16); 174 flags |= (IPSET_FLAG_NOMATCH << 16);
178 } 175 }
@@ -180,7 +177,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
180 if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { 177 if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
181 e.ip = htonl(ip & ip_set_hostmask(e.cidr)); 178 e.ip = htonl(ip & ip_set_hostmask(e.cidr));
182 ret = adtfn(set, &e, &ext, &ext, flags); 179 ret = adtfn(set, &e, &ext, &ext, flags);
183 return ip_set_enomatch(ret, flags, adt, set) ? -ret: 180 return ip_set_enomatch(ret, flags, adt, set) ? -ret :
184 ip_set_eexist(ret, flags) ? 0 : ret; 181 ip_set_eexist(ret, flags) ? 0 : ret;
185 } 182 }
186 183
@@ -202,8 +199,8 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
202 ret = adtfn(set, &e, &ext, &ext, flags); 199 ret = adtfn(set, &e, &ext, &ext, flags);
203 if (ret && !ip_set_eexist(ret, flags)) 200 if (ret && !ip_set_eexist(ret, flags))
204 return ret; 201 return ret;
205 else 202
206 ret = 0; 203 ret = 0;
207 ip = last + 1; 204 ip = last + 1;
208 } 205 }
209 return ret; 206 return ret;
@@ -264,10 +261,10 @@ hash_net6_data_list(struct sk_buff *skb, const struct hash_net6_elem *data)
264 (flags && 261 (flags &&
265 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) 262 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
266 goto nla_put_failure; 263 goto nla_put_failure;
267 return 0; 264 return false;
268 265
269nla_put_failure: 266nla_put_failure:
270 return 1; 267 return true;
271} 268}
272 269
273static inline void 270static inline void
@@ -277,11 +274,9 @@ hash_net6_data_next(struct hash_net4_elem *next,
277} 274}
278 275
279#undef MTYPE 276#undef MTYPE
280#undef PF
281#undef HOST_MASK 277#undef HOST_MASK
282 278
283#define MTYPE hash_net6 279#define MTYPE hash_net6
284#define PF 6
285#define HOST_MASK 128 280#define HOST_MASK 128
286#define IP_SET_EMIT_CREATE 281#define IP_SET_EMIT_CREATE
287#include "ip_set_hash_gen.h" 282#include "ip_set_hash_gen.h"
@@ -294,7 +289,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
294 const struct hash_net *h = set->data; 289 const struct hash_net *h = set->data;
295 ipset_adtfn adtfn = set->variant->adt[adt]; 290 ipset_adtfn adtfn = set->variant->adt[adt];
296 struct hash_net6_elem e = { 291 struct hash_net6_elem e = {
297 .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), 292 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
298 }; 293 };
299 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 294 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
300 295
@@ -318,36 +313,34 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
318 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 313 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
319 int ret; 314 int ret;
320 315
316 if (tb[IPSET_ATTR_LINENO])
317 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
318
321 if (unlikely(!tb[IPSET_ATTR_IP] || 319 if (unlikely(!tb[IPSET_ATTR_IP] ||
322 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 320 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
323 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
324 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
325 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
326 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
327 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
328 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
329 return -IPSET_ERR_PROTOCOL; 321 return -IPSET_ERR_PROTOCOL;
330 if (unlikely(tb[IPSET_ATTR_IP_TO])) 322 if (unlikely(tb[IPSET_ATTR_IP_TO]))
331 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; 323 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
332 324
333 if (tb[IPSET_ATTR_LINENO]) 325 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
334 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 326 if (ret)
327 return ret;
335 328
336 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || 329 ret = ip_set_get_extensions(set, tb, &ext);
337 ip_set_get_extensions(set, tb, &ext);
338 if (ret) 330 if (ret)
339 return ret; 331 return ret;
340 332
341 if (tb[IPSET_ATTR_CIDR]) 333 if (tb[IPSET_ATTR_CIDR]) {
342 e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 334 e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
343 335 if (!e.cidr || e.cidr > HOST_MASK)
344 if (!e.cidr || e.cidr > HOST_MASK) 336 return -IPSET_ERR_INVALID_CIDR;
345 return -IPSET_ERR_INVALID_CIDR; 337 }
346 338
347 ip6_netmask(&e.ip, e.cidr); 339 ip6_netmask(&e.ip, e.cidr);
348 340
349 if (tb[IPSET_ATTR_CADT_FLAGS]) { 341 if (tb[IPSET_ATTR_CADT_FLAGS]) {
350 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 342 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
343
351 if (cadt_flags & IPSET_FLAG_NOMATCH) 344 if (cadt_flags & IPSET_FLAG_NOMATCH)
352 flags |= (IPSET_FLAG_NOMATCH << 16); 345 flags |= (IPSET_FLAG_NOMATCH << 16);
353 } 346 }
@@ -383,7 +376,8 @@ static struct ip_set_type hash_net_type __read_mostly = {
383 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, 376 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
384 [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, 377 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
385 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, 378 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
386 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, 379 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
380 .len = IPSET_MAX_COMMENT_SIZE },
387 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, 381 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
388 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, 382 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
389 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, 383 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -400,6 +394,7 @@ hash_net_init(void)
400static void __exit 394static void __exit
401hash_net_fini(void) 395hash_net_fini(void)
402{ 396{
397 rcu_barrier();
403 ip_set_type_unregister(&hash_net_type); 398 ip_set_type_unregister(&hash_net_type);
404} 399}
405 400
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 380ef5148ea1..43d8c9896fa3 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -13,7 +13,6 @@
13#include <linux/skbuff.h> 13#include <linux/skbuff.h>
14#include <linux/errno.h> 14#include <linux/errno.h>
15#include <linux/random.h> 15#include <linux/random.h>
16#include <linux/rbtree.h>
17#include <net/ip.h> 16#include <net/ip.h>
18#include <net/ipv6.h> 17#include <net/ipv6.h>
19#include <net/netlink.h> 18#include <net/netlink.h>
@@ -37,88 +36,13 @@ MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
37IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); 36IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
38MODULE_ALIAS("ip_set_hash:net,iface"); 37MODULE_ALIAS("ip_set_hash:net,iface");
39 38
40/* Interface name rbtree */
41
42struct iface_node {
43 struct rb_node node;
44 char iface[IFNAMSIZ];
45};
46
47#define iface_data(n) (rb_entry(n, struct iface_node, node)->iface)
48
49static void
50rbtree_destroy(struct rb_root *root)
51{
52 struct iface_node *node, *next;
53
54 rbtree_postorder_for_each_entry_safe(node, next, root, node)
55 kfree(node);
56
57 *root = RB_ROOT;
58}
59
60static int
61iface_test(struct rb_root *root, const char **iface)
62{
63 struct rb_node *n = root->rb_node;
64
65 while (n) {
66 const char *d = iface_data(n);
67 int res = strcmp(*iface, d);
68
69 if (res < 0)
70 n = n->rb_left;
71 else if (res > 0)
72 n = n->rb_right;
73 else {
74 *iface = d;
75 return 1;
76 }
77 }
78 return 0;
79}
80
81static int
82iface_add(struct rb_root *root, const char **iface)
83{
84 struct rb_node **n = &(root->rb_node), *p = NULL;
85 struct iface_node *d;
86
87 while (*n) {
88 char *ifname = iface_data(*n);
89 int res = strcmp(*iface, ifname);
90
91 p = *n;
92 if (res < 0)
93 n = &((*n)->rb_left);
94 else if (res > 0)
95 n = &((*n)->rb_right);
96 else {
97 *iface = ifname;
98 return 0;
99 }
100 }
101
102 d = kzalloc(sizeof(*d), GFP_ATOMIC);
103 if (!d)
104 return -ENOMEM;
105 strcpy(d->iface, *iface);
106
107 rb_link_node(&d->node, p, n);
108 rb_insert_color(&d->node, root);
109
110 *iface = d->iface;
111 return 0;
112}
113
114/* Type specific function prefix */ 39/* Type specific function prefix */
115#define HTYPE hash_netiface 40#define HTYPE hash_netiface
116#define IP_SET_HASH_WITH_NETS 41#define IP_SET_HASH_WITH_NETS
117#define IP_SET_HASH_WITH_RBTREE
118#define IP_SET_HASH_WITH_MULTI 42#define IP_SET_HASH_WITH_MULTI
119#define IP_SET_HASH_WITH_NET0 43#define IP_SET_HASH_WITH_NET0
120 44
121#define STREQ(a, b) (strcmp(a, b) == 0) 45#define STRLCPY(a, b) strlcpy(a, b, IFNAMSIZ)
122 46
123/* IPv4 variant */ 47/* IPv4 variant */
124 48
@@ -137,7 +61,7 @@ struct hash_netiface4_elem {
137 u8 cidr; 61 u8 cidr;
138 u8 nomatch; 62 u8 nomatch;
139 u8 elem; 63 u8 elem;
140 const char *iface; 64 char iface[IFNAMSIZ];
141}; 65};
142 66
143/* Common functions */ 67/* Common functions */
@@ -151,7 +75,7 @@ hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
151 ip1->cidr == ip2->cidr && 75 ip1->cidr == ip2->cidr &&
152 (++*multi) && 76 (++*multi) &&
153 ip1->physdev == ip2->physdev && 77 ip1->physdev == ip2->physdev &&
154 ip1->iface == ip2->iface; 78 strcmp(ip1->iface, ip2->iface) == 0;
155} 79}
156 80
157static inline int 81static inline int
@@ -193,10 +117,10 @@ hash_netiface4_data_list(struct sk_buff *skb,
193 (flags && 117 (flags &&
194 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) 118 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
195 goto nla_put_failure; 119 goto nla_put_failure;
196 return 0; 120 return false;
197 121
198nla_put_failure: 122nla_put_failure:
199 return 1; 123 return true;
200} 124}
201 125
202static inline void 126static inline void
@@ -207,7 +131,6 @@ hash_netiface4_data_next(struct hash_netiface4_elem *next,
207} 131}
208 132
209#define MTYPE hash_netiface4 133#define MTYPE hash_netiface4
210#define PF 4
211#define HOST_MASK 32 134#define HOST_MASK 32
212#define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed) 135#define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed)
213#include "ip_set_hash_gen.h" 136#include "ip_set_hash_gen.h"
@@ -220,7 +143,7 @@ static const char *get_physindev_name(const struct sk_buff *skb)
220 return dev ? dev->name : NULL; 143 return dev ? dev->name : NULL;
221} 144}
222 145
223static const char *get_phyoutdev_name(const struct sk_buff *skb) 146static const char *get_physoutdev_name(const struct sk_buff *skb)
224{ 147{
225 struct net_device *dev = nf_bridge_get_physoutdev(skb); 148 struct net_device *dev = nf_bridge_get_physoutdev(skb);
226 149
@@ -236,11 +159,10 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
236 struct hash_netiface *h = set->data; 159 struct hash_netiface *h = set->data;
237 ipset_adtfn adtfn = set->variant->adt[adt]; 160 ipset_adtfn adtfn = set->variant->adt[adt];
238 struct hash_netiface4_elem e = { 161 struct hash_netiface4_elem e = {
239 .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), 162 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
240 .elem = 1, 163 .elem = 1,
241 }; 164 };
242 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 165 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
243 int ret;
244 166
245 if (e.cidr == 0) 167 if (e.cidr == 0)
246 return -EINVAL; 168 return -EINVAL;
@@ -250,35 +172,25 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
250 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); 172 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
251 e.ip &= ip_set_netmask(e.cidr); 173 e.ip &= ip_set_netmask(e.cidr);
252 174
253#define IFACE(dir) (par->dir ? par->dir->name : NULL) 175#define IFACE(dir) (par->dir ? par->dir->name : "")
254#define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC) 176#define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC)
255 177
256 if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { 178 if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
257#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 179#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
258 e.iface = SRCDIR ? get_physindev_name(skb) : 180 const char *eiface = SRCDIR ? get_physindev_name(skb) :
259 get_phyoutdev_name(skb); 181 get_physoutdev_name(skb);
260 182
261 if (!e.iface) 183 if (!eiface)
262 return -EINVAL; 184 return -EINVAL;
185 STRLCPY(e.iface, eiface);
263 e.physdev = 1; 186 e.physdev = 1;
264#else
265 e.iface = NULL;
266#endif 187#endif
267 } else 188 } else {
268 e.iface = SRCDIR ? IFACE(in) : IFACE(out); 189 STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
190 }
269 191
270 if (!e.iface) 192 if (strlen(e.iface) == 0)
271 return -EINVAL; 193 return -EINVAL;
272 ret = iface_test(&h->rbtree, &e.iface);
273 if (adt == IPSET_ADD) {
274 if (!ret) {
275 ret = iface_add(&h->rbtree, &e.iface);
276 if (ret)
277 return ret;
278 }
279 } else if (!ret)
280 return ret;
281
282 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); 194 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
283} 195}
284 196
@@ -291,25 +203,21 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
291 struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; 203 struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
292 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 204 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
293 u32 ip = 0, ip_to = 0, last; 205 u32 ip = 0, ip_to = 0, last;
294 char iface[IFNAMSIZ];
295 int ret; 206 int ret;
296 207
208 if (tb[IPSET_ATTR_LINENO])
209 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
210
297 if (unlikely(!tb[IPSET_ATTR_IP] || 211 if (unlikely(!tb[IPSET_ATTR_IP] ||
298 !tb[IPSET_ATTR_IFACE] || 212 !tb[IPSET_ATTR_IFACE] ||
299 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 213 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
300 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
301 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
302 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
303 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
304 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
305 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
306 return -IPSET_ERR_PROTOCOL; 214 return -IPSET_ERR_PROTOCOL;
307 215
308 if (tb[IPSET_ATTR_LINENO]) 216 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
309 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 217 if (ret)
218 return ret;
310 219
311 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || 220 ret = ip_set_get_extensions(set, tb, &ext);
312 ip_set_get_extensions(set, tb, &ext);
313 if (ret) 221 if (ret)
314 return ret; 222 return ret;
315 223
@@ -318,21 +226,11 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
318 if (e.cidr > HOST_MASK) 226 if (e.cidr > HOST_MASK)
319 return -IPSET_ERR_INVALID_CIDR; 227 return -IPSET_ERR_INVALID_CIDR;
320 } 228 }
321 229 nla_strlcpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ);
322 strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE]));
323 e.iface = iface;
324 ret = iface_test(&h->rbtree, &e.iface);
325 if (adt == IPSET_ADD) {
326 if (!ret) {
327 ret = iface_add(&h->rbtree, &e.iface);
328 if (ret)
329 return ret;
330 }
331 } else if (!ret)
332 return ret;
333 230
334 if (tb[IPSET_ATTR_CADT_FLAGS]) { 231 if (tb[IPSET_ATTR_CADT_FLAGS]) {
335 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 232 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
233
336 if (cadt_flags & IPSET_FLAG_PHYSDEV) 234 if (cadt_flags & IPSET_FLAG_PHYSDEV)
337 e.physdev = 1; 235 e.physdev = 1;
338 if (cadt_flags & IPSET_FLAG_NOMATCH) 236 if (cadt_flags & IPSET_FLAG_NOMATCH)
@@ -353,8 +251,9 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
353 swap(ip, ip_to); 251 swap(ip, ip_to);
354 if (ip + UINT_MAX == ip_to) 252 if (ip + UINT_MAX == ip_to)
355 return -IPSET_ERR_HASH_RANGE; 253 return -IPSET_ERR_HASH_RANGE;
356 } else 254 } else {
357 ip_set_mask_from_to(ip, ip_to, e.cidr); 255 ip_set_mask_from_to(ip, ip_to, e.cidr);
256 }
358 257
359 if (retried) 258 if (retried)
360 ip = ntohl(h->next.ip); 259 ip = ntohl(h->next.ip);
@@ -365,8 +264,8 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
365 264
366 if (ret && !ip_set_eexist(ret, flags)) 265 if (ret && !ip_set_eexist(ret, flags))
367 return ret; 266 return ret;
368 else 267
369 ret = 0; 268 ret = 0;
370 ip = last + 1; 269 ip = last + 1;
371 } 270 }
372 return ret; 271 return ret;
@@ -388,7 +287,7 @@ struct hash_netiface6_elem {
388 u8 cidr; 287 u8 cidr;
389 u8 nomatch; 288 u8 nomatch;
390 u8 elem; 289 u8 elem;
391 const char *iface; 290 char iface[IFNAMSIZ];
392}; 291};
393 292
394/* Common functions */ 293/* Common functions */
@@ -402,7 +301,7 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
402 ip1->cidr == ip2->cidr && 301 ip1->cidr == ip2->cidr &&
403 (++*multi) && 302 (++*multi) &&
404 ip1->physdev == ip2->physdev && 303 ip1->physdev == ip2->physdev &&
405 ip1->iface == ip2->iface; 304 strcmp(ip1->iface, ip2->iface) == 0;
406} 305}
407 306
408static inline int 307static inline int
@@ -444,10 +343,10 @@ hash_netiface6_data_list(struct sk_buff *skb,
444 (flags && 343 (flags &&
445 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) 344 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
446 goto nla_put_failure; 345 goto nla_put_failure;
447 return 0; 346 return false;
448 347
449nla_put_failure: 348nla_put_failure:
450 return 1; 349 return true;
451} 350}
452 351
453static inline void 352static inline void
@@ -457,12 +356,9 @@ hash_netiface6_data_next(struct hash_netiface4_elem *next,
457} 356}
458 357
459#undef MTYPE 358#undef MTYPE
460#undef PF
461#undef HOST_MASK 359#undef HOST_MASK
462#undef HKEY_DATALEN
463 360
464#define MTYPE hash_netiface6 361#define MTYPE hash_netiface6
465#define PF 6
466#define HOST_MASK 128 362#define HOST_MASK 128
467#define HKEY_DATALEN sizeof(struct hash_netiface6_elem_hashed) 363#define HKEY_DATALEN sizeof(struct hash_netiface6_elem_hashed)
468#define IP_SET_EMIT_CREATE 364#define IP_SET_EMIT_CREATE
@@ -476,11 +372,10 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
476 struct hash_netiface *h = set->data; 372 struct hash_netiface *h = set->data;
477 ipset_adtfn adtfn = set->variant->adt[adt]; 373 ipset_adtfn adtfn = set->variant->adt[adt];
478 struct hash_netiface6_elem e = { 374 struct hash_netiface6_elem e = {
479 .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), 375 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
480 .elem = 1, 376 .elem = 1,
481 }; 377 };
482 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 378 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
483 int ret;
484 379
485 if (e.cidr == 0) 380 if (e.cidr == 0)
486 return -EINVAL; 381 return -EINVAL;
@@ -492,85 +387,64 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
492 387
493 if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { 388 if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
494#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 389#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
495 e.iface = SRCDIR ? get_physindev_name(skb) : 390 const char *eiface = SRCDIR ? get_physindev_name(skb) :
496 get_phyoutdev_name(skb); 391 get_physoutdev_name(skb);
497 if (!e.iface)
498 return -EINVAL;
499 392
393 if (!eiface)
394 return -EINVAL;
395 STRLCPY(e.iface, eiface);
500 e.physdev = 1; 396 e.physdev = 1;
501#else
502 e.iface = NULL;
503#endif 397#endif
504 } else 398 } else {
505 e.iface = SRCDIR ? IFACE(in) : IFACE(out); 399 STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
400 }
506 401
507 if (!e.iface) 402 if (strlen(e.iface) == 0)
508 return -EINVAL; 403 return -EINVAL;
509 ret = iface_test(&h->rbtree, &e.iface);
510 if (adt == IPSET_ADD) {
511 if (!ret) {
512 ret = iface_add(&h->rbtree, &e.iface);
513 if (ret)
514 return ret;
515 }
516 } else if (!ret)
517 return ret;
518 404
519 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); 405 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
520} 406}
521 407
522static int 408static int
523hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], 409hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
524 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 410 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
525{ 411{
526 struct hash_netiface *h = set->data;
527 ipset_adtfn adtfn = set->variant->adt[adt]; 412 ipset_adtfn adtfn = set->variant->adt[adt];
528 struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 }; 413 struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 };
529 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 414 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
530 char iface[IFNAMSIZ];
531 int ret; 415 int ret;
532 416
417 if (tb[IPSET_ATTR_LINENO])
418 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
419
533 if (unlikely(!tb[IPSET_ATTR_IP] || 420 if (unlikely(!tb[IPSET_ATTR_IP] ||
534 !tb[IPSET_ATTR_IFACE] || 421 !tb[IPSET_ATTR_IFACE] ||
535 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 422 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
536 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
537 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
538 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
539 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
540 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
541 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
542 return -IPSET_ERR_PROTOCOL; 423 return -IPSET_ERR_PROTOCOL;
543 if (unlikely(tb[IPSET_ATTR_IP_TO])) 424 if (unlikely(tb[IPSET_ATTR_IP_TO]))
544 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; 425 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
545 426
546 if (tb[IPSET_ATTR_LINENO]) 427 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
547 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 428 if (ret)
429 return ret;
548 430
549 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || 431 ret = ip_set_get_extensions(set, tb, &ext);
550 ip_set_get_extensions(set, tb, &ext);
551 if (ret) 432 if (ret)
552 return ret; 433 return ret;
553 434
554 if (tb[IPSET_ATTR_CIDR]) 435 if (tb[IPSET_ATTR_CIDR]) {
555 e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 436 e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
556 if (e.cidr > HOST_MASK) 437 if (e.cidr > HOST_MASK)
557 return -IPSET_ERR_INVALID_CIDR; 438 return -IPSET_ERR_INVALID_CIDR;
439 }
440
558 ip6_netmask(&e.ip, e.cidr); 441 ip6_netmask(&e.ip, e.cidr);
559 442
560 strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE])); 443 nla_strlcpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ);
561 e.iface = iface;
562 ret = iface_test(&h->rbtree, &e.iface);
563 if (adt == IPSET_ADD) {
564 if (!ret) {
565 ret = iface_add(&h->rbtree, &e.iface);
566 if (ret)
567 return ret;
568 }
569 } else if (!ret)
570 return ret;
571 444
572 if (tb[IPSET_ATTR_CADT_FLAGS]) { 445 if (tb[IPSET_ATTR_CADT_FLAGS]) {
573 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 446 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
447
574 if (cadt_flags & IPSET_FLAG_PHYSDEV) 448 if (cadt_flags & IPSET_FLAG_PHYSDEV)
575 e.physdev = 1; 449 e.physdev = 1;
576 if (cadt_flags & IPSET_FLAG_NOMATCH) 450 if (cadt_flags & IPSET_FLAG_NOMATCH)
@@ -613,7 +487,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
613 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 487 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
614 [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, 488 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
615 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, 489 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
616 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, 490 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
491 .len = IPSET_MAX_COMMENT_SIZE },
617 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, 492 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
618 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, 493 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
619 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, 494 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -630,6 +505,7 @@ hash_netiface_init(void)
630static void __exit 505static void __exit
631hash_netiface_fini(void) 506hash_netiface_fini(void)
632{ 507{
508 rcu_barrier();
633 ip_set_type_unregister(&hash_netiface_type); 509 ip_set_type_unregister(&hash_netiface_type);
634} 510}
635 511
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index ea8772afb6e7..3c862c0a76d1 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -57,8 +57,8 @@ struct hash_netnet4_elem {
57 57
58static inline bool 58static inline bool
59hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1, 59hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1,
60 const struct hash_netnet4_elem *ip2, 60 const struct hash_netnet4_elem *ip2,
61 u32 *multi) 61 u32 *multi)
62{ 62{
63 return ip1->ipcmp == ip2->ipcmp && 63 return ip1->ipcmp == ip2->ipcmp &&
64 ip1->ccmp == ip2->ccmp; 64 ip1->ccmp == ip2->ccmp;
@@ -84,7 +84,7 @@ hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags)
84 84
85static inline void 85static inline void
86hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem, 86hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem,
87 struct hash_netnet4_elem *orig) 87 struct hash_netnet4_elem *orig)
88{ 88{
89 elem->ip[1] = orig->ip[1]; 89 elem->ip[1] = orig->ip[1];
90} 90}
@@ -103,7 +103,7 @@ hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner)
103 103
104static bool 104static bool
105hash_netnet4_data_list(struct sk_buff *skb, 105hash_netnet4_data_list(struct sk_buff *skb,
106 const struct hash_netnet4_elem *data) 106 const struct hash_netnet4_elem *data)
107{ 107{
108 u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; 108 u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
109 109
@@ -122,28 +122,27 @@ nla_put_failure:
122 122
123static inline void 123static inline void
124hash_netnet4_data_next(struct hash_netnet4_elem *next, 124hash_netnet4_data_next(struct hash_netnet4_elem *next,
125 const struct hash_netnet4_elem *d) 125 const struct hash_netnet4_elem *d)
126{ 126{
127 next->ipcmp = d->ipcmp; 127 next->ipcmp = d->ipcmp;
128} 128}
129 129
130#define MTYPE hash_netnet4 130#define MTYPE hash_netnet4
131#define PF 4
132#define HOST_MASK 32 131#define HOST_MASK 32
133#include "ip_set_hash_gen.h" 132#include "ip_set_hash_gen.h"
134 133
135static int 134static int
136hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, 135hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
137 const struct xt_action_param *par, 136 const struct xt_action_param *par,
138 enum ipset_adt adt, struct ip_set_adt_opt *opt) 137 enum ipset_adt adt, struct ip_set_adt_opt *opt)
139{ 138{
140 const struct hash_netnet *h = set->data; 139 const struct hash_netnet *h = set->data;
141 ipset_adtfn adtfn = set->variant->adt[adt]; 140 ipset_adtfn adtfn = set->variant->adt[adt];
142 struct hash_netnet4_elem e = { }; 141 struct hash_netnet4_elem e = { };
143 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 142 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
144 143
145 e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); 144 e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
146 e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); 145 e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
147 if (adt == IPSET_TEST) 146 if (adt == IPSET_TEST)
148 e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; 147 e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK;
149 148
@@ -157,53 +156,50 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
157 156
158static int 157static int
159hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], 158hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
160 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 159 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
161{ 160{
162 const struct hash_netnet *h = set->data; 161 const struct hash_netnet *h = set->data;
163 ipset_adtfn adtfn = set->variant->adt[adt]; 162 ipset_adtfn adtfn = set->variant->adt[adt];
164 struct hash_netnet4_elem e = { }; 163 struct hash_netnet4_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
165 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 164 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
166 u32 ip = 0, ip_to = 0, last; 165 u32 ip = 0, ip_to = 0, last;
167 u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2; 166 u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2;
168 u8 cidr, cidr2;
169 int ret; 167 int ret;
170 168
171 e.cidr[0] = e.cidr[1] = HOST_MASK; 169 if (tb[IPSET_ATTR_LINENO])
170 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
171
172 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || 172 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
173 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 173 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
174 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
175 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
176 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
177 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
178 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
179 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
180 return -IPSET_ERR_PROTOCOL; 174 return -IPSET_ERR_PROTOCOL;
181 175
182 if (tb[IPSET_ATTR_LINENO]) 176 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
183 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 177 if (ret)
178 return ret;
184 179
185 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || 180 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from);
186 ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) || 181 if (ret)
187 ip_set_get_extensions(set, tb, &ext); 182 return ret;
183
184 ret = ip_set_get_extensions(set, tb, &ext);
188 if (ret) 185 if (ret)
189 return ret; 186 return ret;
190 187
191 if (tb[IPSET_ATTR_CIDR]) { 188 if (tb[IPSET_ATTR_CIDR]) {
192 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 189 e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
193 if (!cidr || cidr > HOST_MASK) 190 if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
194 return -IPSET_ERR_INVALID_CIDR; 191 return -IPSET_ERR_INVALID_CIDR;
195 e.cidr[0] = cidr;
196 } 192 }
197 193
198 if (tb[IPSET_ATTR_CIDR2]) { 194 if (tb[IPSET_ATTR_CIDR2]) {
199 cidr2 = nla_get_u8(tb[IPSET_ATTR_CIDR2]); 195 e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
200 if (!cidr2 || cidr2 > HOST_MASK) 196 if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
201 return -IPSET_ERR_INVALID_CIDR; 197 return -IPSET_ERR_INVALID_CIDR;
202 e.cidr[1] = cidr2;
203 } 198 }
204 199
205 if (tb[IPSET_ATTR_CADT_FLAGS]) { 200 if (tb[IPSET_ATTR_CADT_FLAGS]) {
206 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 201 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
202
207 if (cadt_flags & IPSET_FLAG_NOMATCH) 203 if (cadt_flags & IPSET_FLAG_NOMATCH)
208 flags |= (IPSET_FLAG_NOMATCH << 16); 204 flags |= (IPSET_FLAG_NOMATCH << 16);
209 } 205 }
@@ -226,8 +222,9 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
226 swap(ip, ip_to); 222 swap(ip, ip_to);
227 if (unlikely(ip + UINT_MAX == ip_to)) 223 if (unlikely(ip + UINT_MAX == ip_to))
228 return -IPSET_ERR_HASH_RANGE; 224 return -IPSET_ERR_HASH_RANGE;
229 } else 225 } else {
230 ip_set_mask_from_to(ip, ip_to, e.cidr[0]); 226 ip_set_mask_from_to(ip, ip_to, e.cidr[0]);
227 }
231 228
232 ip2_to = ip2_from; 229 ip2_to = ip2_from;
233 if (tb[IPSET_ATTR_IP2_TO]) { 230 if (tb[IPSET_ATTR_IP2_TO]) {
@@ -238,28 +235,27 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
238 swap(ip2_from, ip2_to); 235 swap(ip2_from, ip2_to);
239 if (unlikely(ip2_from + UINT_MAX == ip2_to)) 236 if (unlikely(ip2_from + UINT_MAX == ip2_to))
240 return -IPSET_ERR_HASH_RANGE; 237 return -IPSET_ERR_HASH_RANGE;
241 } else 238 } else {
242 ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); 239 ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
240 }
243 241
244 if (retried) 242 if (retried)
245 ip = ntohl(h->next.ip[0]); 243 ip = ntohl(h->next.ip[0]);
246 244
247 while (!after(ip, ip_to)) { 245 while (!after(ip, ip_to)) {
248 e.ip[0] = htonl(ip); 246 e.ip[0] = htonl(ip);
249 last = ip_set_range_to_cidr(ip, ip_to, &cidr); 247 last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
250 e.cidr[0] = cidr;
251 ip2 = (retried && 248 ip2 = (retried &&
252 ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1]) 249 ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1])
253 : ip2_from; 250 : ip2_from;
254 while (!after(ip2, ip2_to)) { 251 while (!after(ip2, ip2_to)) {
255 e.ip[1] = htonl(ip2); 252 e.ip[1] = htonl(ip2);
256 last2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr2); 253 last2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
257 e.cidr[1] = cidr2;
258 ret = adtfn(set, &e, &ext, &ext, flags); 254 ret = adtfn(set, &e, &ext, &ext, flags);
259 if (ret && !ip_set_eexist(ret, flags)) 255 if (ret && !ip_set_eexist(ret, flags))
260 return ret; 256 return ret;
261 else 257
262 ret = 0; 258 ret = 0;
263 ip2 = last2 + 1; 259 ip2 = last2 + 1;
264 } 260 }
265 ip = last + 1; 261 ip = last + 1;
@@ -283,8 +279,8 @@ struct hash_netnet6_elem {
283 279
284static inline bool 280static inline bool
285hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1, 281hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1,
286 const struct hash_netnet6_elem *ip2, 282 const struct hash_netnet6_elem *ip2,
287 u32 *multi) 283 u32 *multi)
288{ 284{
289 return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) && 285 return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) &&
290 ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) && 286 ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) &&
@@ -311,7 +307,7 @@ hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags)
311 307
312static inline void 308static inline void
313hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem, 309hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem,
314 struct hash_netnet6_elem *orig) 310 struct hash_netnet6_elem *orig)
315{ 311{
316 elem->ip[1] = orig->ip[1]; 312 elem->ip[1] = orig->ip[1];
317} 313}
@@ -330,7 +326,7 @@ hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner)
330 326
331static bool 327static bool
332hash_netnet6_data_list(struct sk_buff *skb, 328hash_netnet6_data_list(struct sk_buff *skb,
333 const struct hash_netnet6_elem *data) 329 const struct hash_netnet6_elem *data)
334{ 330{
335 u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; 331 u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
336 332
@@ -349,34 +345,32 @@ nla_put_failure:
349 345
350static inline void 346static inline void
351hash_netnet6_data_next(struct hash_netnet4_elem *next, 347hash_netnet6_data_next(struct hash_netnet4_elem *next,
352 const struct hash_netnet6_elem *d) 348 const struct hash_netnet6_elem *d)
353{ 349{
354} 350}
355 351
356#undef MTYPE 352#undef MTYPE
357#undef PF
358#undef HOST_MASK 353#undef HOST_MASK
359 354
360#define MTYPE hash_netnet6 355#define MTYPE hash_netnet6
361#define PF 6
362#define HOST_MASK 128 356#define HOST_MASK 128
363#define IP_SET_EMIT_CREATE 357#define IP_SET_EMIT_CREATE
364#include "ip_set_hash_gen.h" 358#include "ip_set_hash_gen.h"
365 359
366static int 360static int
367hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, 361hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
368 const struct xt_action_param *par, 362 const struct xt_action_param *par,
369 enum ipset_adt adt, struct ip_set_adt_opt *opt) 363 enum ipset_adt adt, struct ip_set_adt_opt *opt)
370{ 364{
371 const struct hash_netnet *h = set->data; 365 const struct hash_netnet *h = set->data;
372 ipset_adtfn adtfn = set->variant->adt[adt]; 366 ipset_adtfn adtfn = set->variant->adt[adt];
373 struct hash_netnet6_elem e = { }; 367 struct hash_netnet6_elem e = { };
374 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 368 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
375 369
376 e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); 370 e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
377 e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); 371 e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
378 if (adt == IPSET_TEST) 372 if (adt == IPSET_TEST)
379 e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK; 373 e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK;
380 374
381 ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6); 375 ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6);
382 ip6addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1].in6); 376 ip6addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1].in6);
@@ -388,50 +382,52 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
388 382
389static int 383static int
390hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], 384hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
391 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 385 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
392{ 386{
393 ipset_adtfn adtfn = set->variant->adt[adt]; 387 ipset_adtfn adtfn = set->variant->adt[adt];
394 struct hash_netnet6_elem e = { }; 388 struct hash_netnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
395 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 389 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
396 int ret; 390 int ret;
397 391
398 e.cidr[0] = e.cidr[1] = HOST_MASK; 392 if (tb[IPSET_ATTR_LINENO])
393 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
394
399 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || 395 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
400 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 396 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
401 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
402 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
403 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
404 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
405 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
406 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
407 return -IPSET_ERR_PROTOCOL; 397 return -IPSET_ERR_PROTOCOL;
408 if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO])) 398 if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
409 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; 399 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
410 400
411 if (tb[IPSET_ATTR_LINENO]) 401 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]);
412 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 402 if (ret)
403 return ret;
413 404
414 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) || 405 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]);
415 ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) ||
416 ip_set_get_extensions(set, tb, &ext);
417 if (ret) 406 if (ret)
418 return ret; 407 return ret;
419 408
420 if (tb[IPSET_ATTR_CIDR]) 409 ret = ip_set_get_extensions(set, tb, &ext);
410 if (ret)
411 return ret;
412
413 if (tb[IPSET_ATTR_CIDR]) {
421 e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); 414 e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
415 if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
416 return -IPSET_ERR_INVALID_CIDR;
417 }
422 418
423 if (tb[IPSET_ATTR_CIDR2]) 419 if (tb[IPSET_ATTR_CIDR2]) {
424 e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); 420 e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
425 421 if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
426 if (!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] || 422 return -IPSET_ERR_INVALID_CIDR;
427 e.cidr[1] > HOST_MASK) 423 }
428 return -IPSET_ERR_INVALID_CIDR;
429 424
430 ip6_netmask(&e.ip[0], e.cidr[0]); 425 ip6_netmask(&e.ip[0], e.cidr[0]);
431 ip6_netmask(&e.ip[1], e.cidr[1]); 426 ip6_netmask(&e.ip[1], e.cidr[1]);
432 427
433 if (tb[IPSET_ATTR_CADT_FLAGS]) { 428 if (tb[IPSET_ATTR_CADT_FLAGS]) {
434 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 429 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
430
435 if (cadt_flags & IPSET_FLAG_NOMATCH) 431 if (cadt_flags & IPSET_FLAG_NOMATCH)
436 flags |= (IPSET_FLAG_NOMATCH << 16); 432 flags |= (IPSET_FLAG_NOMATCH << 16);
437 } 433 }
@@ -470,7 +466,8 @@ static struct ip_set_type hash_netnet_type __read_mostly = {
470 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, 466 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
471 [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, 467 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
472 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, 468 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
473 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, 469 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
470 .len = IPSET_MAX_COMMENT_SIZE },
474 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, 471 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
475 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, 472 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
476 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, 473 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -487,6 +484,7 @@ hash_netnet_init(void)
487static void __exit 484static void __exit
488hash_netnet_fini(void) 485hash_netnet_fini(void)
489{ 486{
487 rcu_barrier();
490 ip_set_type_unregister(&hash_netnet_type); 488 ip_set_type_unregister(&hash_netnet_type);
491} 489}
492 490
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index c0ddb58d19dc..731813e0f08c 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -110,10 +110,10 @@ hash_netport4_data_list(struct sk_buff *skb,
110 (flags && 110 (flags &&
111 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) 111 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
112 goto nla_put_failure; 112 goto nla_put_failure;
113 return 0; 113 return false;
114 114
115nla_put_failure: 115nla_put_failure:
116 return 1; 116 return true;
117} 117}
118 118
119static inline void 119static inline void
@@ -125,7 +125,6 @@ hash_netport4_data_next(struct hash_netport4_elem *next,
125} 125}
126 126
127#define MTYPE hash_netport4 127#define MTYPE hash_netport4
128#define PF 4
129#define HOST_MASK 32 128#define HOST_MASK 32
130#include "ip_set_hash_gen.h" 129#include "ip_set_hash_gen.h"
131 130
@@ -137,7 +136,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
137 const struct hash_netport *h = set->data; 136 const struct hash_netport *h = set->data;
138 ipset_adtfn adtfn = set->variant->adt[adt]; 137 ipset_adtfn adtfn = set->variant->adt[adt];
139 struct hash_netport4_elem e = { 138 struct hash_netport4_elem e = {
140 .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, 139 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
141 }; 140 };
142 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 141 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
143 142
@@ -167,23 +166,20 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
167 u8 cidr; 166 u8 cidr;
168 int ret; 167 int ret;
169 168
169 if (tb[IPSET_ATTR_LINENO])
170 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
171
170 if (unlikely(!tb[IPSET_ATTR_IP] || 172 if (unlikely(!tb[IPSET_ATTR_IP] ||
171 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 173 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
172 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 174 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
173 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 175 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
174 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
175 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
176 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
177 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
178 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
179 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
180 return -IPSET_ERR_PROTOCOL; 176 return -IPSET_ERR_PROTOCOL;
181 177
182 if (tb[IPSET_ATTR_LINENO]) 178 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
183 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 179 if (ret)
180 return ret;
184 181
185 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || 182 ret = ip_set_get_extensions(set, tb, &ext);
186 ip_set_get_extensions(set, tb, &ext);
187 if (ret) 183 if (ret)
188 return ret; 184 return ret;
189 185
@@ -194,10 +190,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
194 e.cidr = cidr - 1; 190 e.cidr = cidr - 1;
195 } 191 }
196 192
197 if (tb[IPSET_ATTR_PORT]) 193 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
198 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
199 else
200 return -IPSET_ERR_PROTOCOL;
201 194
202 if (tb[IPSET_ATTR_PROTO]) { 195 if (tb[IPSET_ATTR_PROTO]) {
203 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); 196 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -205,8 +198,9 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
205 198
206 if (e.proto == 0) 199 if (e.proto == 0)
207 return -IPSET_ERR_INVALID_PROTO; 200 return -IPSET_ERR_INVALID_PROTO;
208 } else 201 } else {
209 return -IPSET_ERR_MISSING_PROTO; 202 return -IPSET_ERR_MISSING_PROTO;
203 }
210 204
211 if (!(with_ports || e.proto == IPPROTO_ICMP)) 205 if (!(with_ports || e.proto == IPPROTO_ICMP))
212 e.port = 0; 206 e.port = 0;
@@ -215,6 +209,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
215 209
216 if (tb[IPSET_ATTR_CADT_FLAGS]) { 210 if (tb[IPSET_ATTR_CADT_FLAGS]) {
217 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 211 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
212
218 if (cadt_flags & IPSET_FLAG_NOMATCH) 213 if (cadt_flags & IPSET_FLAG_NOMATCH)
219 flags |= (IPSET_FLAG_NOMATCH << 16); 214 flags |= (IPSET_FLAG_NOMATCH << 16);
220 } 215 }
@@ -240,8 +235,9 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
240 swap(ip, ip_to); 235 swap(ip, ip_to);
241 if (ip + UINT_MAX == ip_to) 236 if (ip + UINT_MAX == ip_to)
242 return -IPSET_ERR_HASH_RANGE; 237 return -IPSET_ERR_HASH_RANGE;
243 } else 238 } else {
244 ip_set_mask_from_to(ip, ip_to, e.cidr + 1); 239 ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
240 }
245 241
246 if (retried) 242 if (retried)
247 ip = ntohl(h->next.ip); 243 ip = ntohl(h->next.ip);
@@ -257,8 +253,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
257 253
258 if (ret && !ip_set_eexist(ret, flags)) 254 if (ret && !ip_set_eexist(ret, flags))
259 return ret; 255 return ret;
260 else 256
261 ret = 0; 257 ret = 0;
262 } 258 }
263 ip = last + 1; 259 ip = last + 1;
264 } 260 }
@@ -326,10 +322,10 @@ hash_netport6_data_list(struct sk_buff *skb,
326 (flags && 322 (flags &&
327 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) 323 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
328 goto nla_put_failure; 324 goto nla_put_failure;
329 return 0; 325 return false;
330 326
331nla_put_failure: 327nla_put_failure:
332 return 1; 328 return true;
333} 329}
334 330
335static inline void 331static inline void
@@ -340,11 +336,9 @@ hash_netport6_data_next(struct hash_netport4_elem *next,
340} 336}
341 337
342#undef MTYPE 338#undef MTYPE
343#undef PF
344#undef HOST_MASK 339#undef HOST_MASK
345 340
346#define MTYPE hash_netport6 341#define MTYPE hash_netport6
347#define PF 6
348#define HOST_MASK 128 342#define HOST_MASK 128
349#define IP_SET_EMIT_CREATE 343#define IP_SET_EMIT_CREATE
350#include "ip_set_hash_gen.h" 344#include "ip_set_hash_gen.h"
@@ -357,7 +351,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
357 const struct hash_netport *h = set->data; 351 const struct hash_netport *h = set->data;
358 ipset_adtfn adtfn = set->variant->adt[adt]; 352 ipset_adtfn adtfn = set->variant->adt[adt];
359 struct hash_netport6_elem e = { 353 struct hash_netport6_elem e = {
360 .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, 354 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
361 }; 355 };
362 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 356 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
363 357
@@ -387,25 +381,22 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
387 u8 cidr; 381 u8 cidr;
388 int ret; 382 int ret;
389 383
384 if (tb[IPSET_ATTR_LINENO])
385 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
386
390 if (unlikely(!tb[IPSET_ATTR_IP] || 387 if (unlikely(!tb[IPSET_ATTR_IP] ||
391 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 388 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
392 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 389 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
393 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 390 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
394 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
395 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
396 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
397 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
398 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
399 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
400 return -IPSET_ERR_PROTOCOL; 391 return -IPSET_ERR_PROTOCOL;
401 if (unlikely(tb[IPSET_ATTR_IP_TO])) 392 if (unlikely(tb[IPSET_ATTR_IP_TO]))
402 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; 393 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
403 394
404 if (tb[IPSET_ATTR_LINENO]) 395 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
405 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 396 if (ret)
397 return ret;
406 398
407 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || 399 ret = ip_set_get_extensions(set, tb, &ext);
408 ip_set_get_extensions(set, tb, &ext);
409 if (ret) 400 if (ret)
410 return ret; 401 return ret;
411 402
@@ -417,10 +408,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
417 } 408 }
418 ip6_netmask(&e.ip, e.cidr + 1); 409 ip6_netmask(&e.ip, e.cidr + 1);
419 410
420 if (tb[IPSET_ATTR_PORT]) 411 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
421 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
422 else
423 return -IPSET_ERR_PROTOCOL;
424 412
425 if (tb[IPSET_ATTR_PROTO]) { 413 if (tb[IPSET_ATTR_PROTO]) {
426 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); 414 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -428,14 +416,16 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
428 416
429 if (e.proto == 0) 417 if (e.proto == 0)
430 return -IPSET_ERR_INVALID_PROTO; 418 return -IPSET_ERR_INVALID_PROTO;
431 } else 419 } else {
432 return -IPSET_ERR_MISSING_PROTO; 420 return -IPSET_ERR_MISSING_PROTO;
421 }
433 422
434 if (!(with_ports || e.proto == IPPROTO_ICMPV6)) 423 if (!(with_ports || e.proto == IPPROTO_ICMPV6))
435 e.port = 0; 424 e.port = 0;
436 425
437 if (tb[IPSET_ATTR_CADT_FLAGS]) { 426 if (tb[IPSET_ATTR_CADT_FLAGS]) {
438 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 427 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
428
439 if (cadt_flags & IPSET_FLAG_NOMATCH) 429 if (cadt_flags & IPSET_FLAG_NOMATCH)
440 flags |= (IPSET_FLAG_NOMATCH << 16); 430 flags |= (IPSET_FLAG_NOMATCH << 16);
441 } 431 }
@@ -459,8 +449,8 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
459 449
460 if (ret && !ip_set_eexist(ret, flags)) 450 if (ret && !ip_set_eexist(ret, flags))
461 return ret; 451 return ret;
462 else 452
463 ret = 0; 453 ret = 0;
464 } 454 }
465 return ret; 455 return ret;
466} 456}
@@ -495,7 +485,8 @@ static struct ip_set_type hash_netport_type __read_mostly = {
495 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, 485 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
496 [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, 486 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
497 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, 487 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
498 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, 488 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
489 .len = IPSET_MAX_COMMENT_SIZE },
499 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, 490 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
500 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, 491 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
501 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, 492 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -512,6 +503,7 @@ hash_netport_init(void)
512static void __exit 503static void __exit
513hash_netport_fini(void) 504hash_netport_fini(void)
514{ 505{
506 rcu_barrier();
515 ip_set_type_unregister(&hash_netport_type); 507 ip_set_type_unregister(&hash_netport_type);
516} 508}
517 509
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index bfaa94c7baa7..0c68734f5cc4 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -54,7 +54,7 @@ struct hash_netportnet4_elem {
54 u16 ccmp; 54 u16 ccmp;
55 }; 55 };
56 u16 padding; 56 u16 padding;
57 u8 nomatch:1; 57 u8 nomatch;
58 u8 proto; 58 u8 proto;
59}; 59};
60 60
@@ -62,8 +62,8 @@ struct hash_netportnet4_elem {
62 62
63static inline bool 63static inline bool
64hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1, 64hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1,
65 const struct hash_netportnet4_elem *ip2, 65 const struct hash_netportnet4_elem *ip2,
66 u32 *multi) 66 u32 *multi)
67{ 67{
68 return ip1->ipcmp == ip2->ipcmp && 68 return ip1->ipcmp == ip2->ipcmp &&
69 ip1->ccmp == ip2->ccmp && 69 ip1->ccmp == ip2->ccmp &&
@@ -91,7 +91,7 @@ hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags)
91 91
92static inline void 92static inline void
93hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem, 93hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem,
94 struct hash_netportnet4_elem *orig) 94 struct hash_netportnet4_elem *orig)
95{ 95{
96 elem->ip[1] = orig->ip[1]; 96 elem->ip[1] = orig->ip[1];
97} 97}
@@ -111,7 +111,7 @@ hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem,
111 111
112static bool 112static bool
113hash_netportnet4_data_list(struct sk_buff *skb, 113hash_netportnet4_data_list(struct sk_buff *skb,
114 const struct hash_netportnet4_elem *data) 114 const struct hash_netportnet4_elem *data)
115{ 115{
116 u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; 116 u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
117 117
@@ -124,37 +124,36 @@ hash_netportnet4_data_list(struct sk_buff *skb,
124 (flags && 124 (flags &&
125 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) 125 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
126 goto nla_put_failure; 126 goto nla_put_failure;
127 return 0; 127 return false;
128 128
129nla_put_failure: 129nla_put_failure:
130 return 1; 130 return true;
131} 131}
132 132
133static inline void 133static inline void
134hash_netportnet4_data_next(struct hash_netportnet4_elem *next, 134hash_netportnet4_data_next(struct hash_netportnet4_elem *next,
135 const struct hash_netportnet4_elem *d) 135 const struct hash_netportnet4_elem *d)
136{ 136{
137 next->ipcmp = d->ipcmp; 137 next->ipcmp = d->ipcmp;
138 next->port = d->port; 138 next->port = d->port;
139} 139}
140 140
141#define MTYPE hash_netportnet4 141#define MTYPE hash_netportnet4
142#define PF 4
143#define HOST_MASK 32 142#define HOST_MASK 32
144#include "ip_set_hash_gen.h" 143#include "ip_set_hash_gen.h"
145 144
146static int 145static int
147hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, 146hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
148 const struct xt_action_param *par, 147 const struct xt_action_param *par,
149 enum ipset_adt adt, struct ip_set_adt_opt *opt) 148 enum ipset_adt adt, struct ip_set_adt_opt *opt)
150{ 149{
151 const struct hash_netportnet *h = set->data; 150 const struct hash_netportnet *h = set->data;
152 ipset_adtfn adtfn = set->variant->adt[adt]; 151 ipset_adtfn adtfn = set->variant->adt[adt];
153 struct hash_netportnet4_elem e = { }; 152 struct hash_netportnet4_elem e = { };
154 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 153 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
155 154
156 e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); 155 e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
157 e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); 156 e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
158 if (adt == IPSET_TEST) 157 if (adt == IPSET_TEST)
159 e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; 158 e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK;
160 159
@@ -172,58 +171,51 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
172 171
173static int 172static int
174hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], 173hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
175 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 174 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
176{ 175{
177 const struct hash_netportnet *h = set->data; 176 const struct hash_netportnet *h = set->data;
178 ipset_adtfn adtfn = set->variant->adt[adt]; 177 ipset_adtfn adtfn = set->variant->adt[adt];
179 struct hash_netportnet4_elem e = { }; 178 struct hash_netportnet4_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
180 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 179 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
181 u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to; 180 u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to;
182 u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; 181 u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2;
183 bool with_ports = false; 182 bool with_ports = false;
184 u8 cidr, cidr2;
185 int ret; 183 int ret;
186 184
187 e.cidr[0] = e.cidr[1] = HOST_MASK; 185 if (tb[IPSET_ATTR_LINENO])
186 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
187
188 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || 188 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
189 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 189 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
190 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 190 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
191 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 191 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
192 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
193 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
194 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
195 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
196 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
197 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
198 return -IPSET_ERR_PROTOCOL; 192 return -IPSET_ERR_PROTOCOL;
199 193
200 if (tb[IPSET_ATTR_LINENO]) 194 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
201 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 195 if (ret)
196 return ret;
197
198 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from);
199 if (ret)
200 return ret;
202 201
203 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || 202 ret = ip_set_get_extensions(set, tb, &ext);
204 ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) ||
205 ip_set_get_extensions(set, tb, &ext);
206 if (ret) 203 if (ret)
207 return ret; 204 return ret;
208 205
209 if (tb[IPSET_ATTR_CIDR]) { 206 if (tb[IPSET_ATTR_CIDR]) {
210 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 207 e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
211 if (!cidr || cidr > HOST_MASK) 208 if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
212 return -IPSET_ERR_INVALID_CIDR; 209 return -IPSET_ERR_INVALID_CIDR;
213 e.cidr[0] = cidr;
214 } 210 }
215 211
216 if (tb[IPSET_ATTR_CIDR2]) { 212 if (tb[IPSET_ATTR_CIDR2]) {
217 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); 213 e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
218 if (!cidr || cidr > HOST_MASK) 214 if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
219 return -IPSET_ERR_INVALID_CIDR; 215 return -IPSET_ERR_INVALID_CIDR;
220 e.cidr[1] = cidr;
221 } 216 }
222 217
223 if (tb[IPSET_ATTR_PORT]) 218 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
224 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
225 else
226 return -IPSET_ERR_PROTOCOL;
227 219
228 if (tb[IPSET_ATTR_PROTO]) { 220 if (tb[IPSET_ATTR_PROTO]) {
229 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); 221 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -231,14 +223,16 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
231 223
232 if (e.proto == 0) 224 if (e.proto == 0)
233 return -IPSET_ERR_INVALID_PROTO; 225 return -IPSET_ERR_INVALID_PROTO;
234 } else 226 } else {
235 return -IPSET_ERR_MISSING_PROTO; 227 return -IPSET_ERR_MISSING_PROTO;
228 }
236 229
237 if (!(with_ports || e.proto == IPPROTO_ICMP)) 230 if (!(with_ports || e.proto == IPPROTO_ICMP))
238 e.port = 0; 231 e.port = 0;
239 232
240 if (tb[IPSET_ATTR_CADT_FLAGS]) { 233 if (tb[IPSET_ATTR_CADT_FLAGS]) {
241 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 234 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
235
242 if (cadt_flags & IPSET_FLAG_NOMATCH) 236 if (cadt_flags & IPSET_FLAG_NOMATCH)
243 flags |= (IPSET_FLAG_NOMATCH << 16); 237 flags |= (IPSET_FLAG_NOMATCH << 16);
244 } 238 }
@@ -262,8 +256,9 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
262 swap(ip, ip_to); 256 swap(ip, ip_to);
263 if (unlikely(ip + UINT_MAX == ip_to)) 257 if (unlikely(ip + UINT_MAX == ip_to))
264 return -IPSET_ERR_HASH_RANGE; 258 return -IPSET_ERR_HASH_RANGE;
265 } else 259 } else {
266 ip_set_mask_from_to(ip, ip_to, e.cidr[0]); 260 ip_set_mask_from_to(ip, ip_to, e.cidr[0]);
261 }
267 262
268 port_to = port = ntohs(e.port); 263 port_to = port = ntohs(e.port);
269 if (tb[IPSET_ATTR_PORT_TO]) { 264 if (tb[IPSET_ATTR_PORT_TO]) {
@@ -281,16 +276,16 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
281 swap(ip2_from, ip2_to); 276 swap(ip2_from, ip2_to);
282 if (unlikely(ip2_from + UINT_MAX == ip2_to)) 277 if (unlikely(ip2_from + UINT_MAX == ip2_to))
283 return -IPSET_ERR_HASH_RANGE; 278 return -IPSET_ERR_HASH_RANGE;
284 } else 279 } else {
285 ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); 280 ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
281 }
286 282
287 if (retried) 283 if (retried)
288 ip = ntohl(h->next.ip[0]); 284 ip = ntohl(h->next.ip[0]);
289 285
290 while (!after(ip, ip_to)) { 286 while (!after(ip, ip_to)) {
291 e.ip[0] = htonl(ip); 287 e.ip[0] = htonl(ip);
292 ip_last = ip_set_range_to_cidr(ip, ip_to, &cidr); 288 ip_last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
293 e.cidr[0] = cidr;
294 p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port) 289 p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port)
295 : port; 290 : port;
296 for (; p <= port_to; p++) { 291 for (; p <= port_to; p++) {
@@ -301,13 +296,12 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
301 while (!after(ip2, ip2_to)) { 296 while (!after(ip2, ip2_to)) {
302 e.ip[1] = htonl(ip2); 297 e.ip[1] = htonl(ip2);
303 ip2_last = ip_set_range_to_cidr(ip2, ip2_to, 298 ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
304 &cidr2); 299 &e.cidr[1]);
305 e.cidr[1] = cidr2;
306 ret = adtfn(set, &e, &ext, &ext, flags); 300 ret = adtfn(set, &e, &ext, &ext, flags);
307 if (ret && !ip_set_eexist(ret, flags)) 301 if (ret && !ip_set_eexist(ret, flags))
308 return ret; 302 return ret;
309 else 303
310 ret = 0; 304 ret = 0;
311 ip2 = ip2_last + 1; 305 ip2 = ip2_last + 1;
312 } 306 }
313 } 307 }
@@ -326,7 +320,7 @@ struct hash_netportnet6_elem {
326 u16 ccmp; 320 u16 ccmp;
327 }; 321 };
328 u16 padding; 322 u16 padding;
329 u8 nomatch:1; 323 u8 nomatch;
330 u8 proto; 324 u8 proto;
331}; 325};
332 326
@@ -334,8 +328,8 @@ struct hash_netportnet6_elem {
334 328
335static inline bool 329static inline bool
336hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1, 330hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1,
337 const struct hash_netportnet6_elem *ip2, 331 const struct hash_netportnet6_elem *ip2,
338 u32 *multi) 332 u32 *multi)
339{ 333{
340 return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) && 334 return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) &&
341 ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) && 335 ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) &&
@@ -364,7 +358,7 @@ hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags)
364 358
365static inline void 359static inline void
366hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem, 360hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem,
367 struct hash_netportnet6_elem *orig) 361 struct hash_netportnet6_elem *orig)
368{ 362{
369 elem->ip[1] = orig->ip[1]; 363 elem->ip[1] = orig->ip[1];
370} 364}
@@ -384,7 +378,7 @@ hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem,
384 378
385static bool 379static bool
386hash_netportnet6_data_list(struct sk_buff *skb, 380hash_netportnet6_data_list(struct sk_buff *skb,
387 const struct hash_netportnet6_elem *data) 381 const struct hash_netportnet6_elem *data)
388{ 382{
389 u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; 383 u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
390 384
@@ -397,41 +391,39 @@ hash_netportnet6_data_list(struct sk_buff *skb,
397 (flags && 391 (flags &&
398 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) 392 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
399 goto nla_put_failure; 393 goto nla_put_failure;
400 return 0; 394 return false;
401 395
402nla_put_failure: 396nla_put_failure:
403 return 1; 397 return true;
404} 398}
405 399
406static inline void 400static inline void
407hash_netportnet6_data_next(struct hash_netportnet4_elem *next, 401hash_netportnet6_data_next(struct hash_netportnet4_elem *next,
408 const struct hash_netportnet6_elem *d) 402 const struct hash_netportnet6_elem *d)
409{ 403{
410 next->port = d->port; 404 next->port = d->port;
411} 405}
412 406
413#undef MTYPE 407#undef MTYPE
414#undef PF
415#undef HOST_MASK 408#undef HOST_MASK
416 409
417#define MTYPE hash_netportnet6 410#define MTYPE hash_netportnet6
418#define PF 6
419#define HOST_MASK 128 411#define HOST_MASK 128
420#define IP_SET_EMIT_CREATE 412#define IP_SET_EMIT_CREATE
421#include "ip_set_hash_gen.h" 413#include "ip_set_hash_gen.h"
422 414
423static int 415static int
424hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, 416hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
425 const struct xt_action_param *par, 417 const struct xt_action_param *par,
426 enum ipset_adt adt, struct ip_set_adt_opt *opt) 418 enum ipset_adt adt, struct ip_set_adt_opt *opt)
427{ 419{
428 const struct hash_netportnet *h = set->data; 420 const struct hash_netportnet *h = set->data;
429 ipset_adtfn adtfn = set->variant->adt[adt]; 421 ipset_adtfn adtfn = set->variant->adt[adt];
430 struct hash_netportnet6_elem e = { }; 422 struct hash_netportnet6_elem e = { };
431 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 423 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
432 424
433 e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); 425 e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
434 e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); 426 e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
435 if (adt == IPSET_TEST) 427 if (adt == IPSET_TEST)
436 e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK; 428 e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK;
437 429
@@ -449,57 +441,55 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
449 441
450static int 442static int
451hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], 443hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
452 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 444 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
453{ 445{
454 const struct hash_netportnet *h = set->data; 446 const struct hash_netportnet *h = set->data;
455 ipset_adtfn adtfn = set->variant->adt[adt]; 447 ipset_adtfn adtfn = set->variant->adt[adt];
456 struct hash_netportnet6_elem e = { }; 448 struct hash_netportnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
457 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 449 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
458 u32 port, port_to; 450 u32 port, port_to;
459 bool with_ports = false; 451 bool with_ports = false;
460 int ret; 452 int ret;
461 453
462 e.cidr[0] = e.cidr[1] = HOST_MASK; 454 if (tb[IPSET_ATTR_LINENO])
455 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
456
463 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || 457 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
464 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 458 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
465 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 459 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
466 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 460 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
467 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
468 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
469 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
470 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
471 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
472 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
473 return -IPSET_ERR_PROTOCOL; 461 return -IPSET_ERR_PROTOCOL;
474 if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO])) 462 if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
475 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; 463 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
476 464
477 if (tb[IPSET_ATTR_LINENO]) 465 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]);
478 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 466 if (ret)
467 return ret;
479 468
480 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) || 469 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]);
481 ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) ||
482 ip_set_get_extensions(set, tb, &ext);
483 if (ret) 470 if (ret)
484 return ret; 471 return ret;
485 472
486 if (tb[IPSET_ATTR_CIDR]) 473 ret = ip_set_get_extensions(set, tb, &ext);
474 if (ret)
475 return ret;
476
477 if (tb[IPSET_ATTR_CIDR]) {
487 e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); 478 e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
479 if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
480 return -IPSET_ERR_INVALID_CIDR;
481 }
488 482
489 if (tb[IPSET_ATTR_CIDR2]) 483 if (tb[IPSET_ATTR_CIDR2]) {
490 e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); 484 e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
491 485 if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
492 if (unlikely(!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] || 486 return -IPSET_ERR_INVALID_CIDR;
493 e.cidr[1] > HOST_MASK)) 487 }
494 return -IPSET_ERR_INVALID_CIDR;
495 488
496 ip6_netmask(&e.ip[0], e.cidr[0]); 489 ip6_netmask(&e.ip[0], e.cidr[0]);
497 ip6_netmask(&e.ip[1], e.cidr[1]); 490 ip6_netmask(&e.ip[1], e.cidr[1]);
498 491
499 if (tb[IPSET_ATTR_PORT]) 492 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
500 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
501 else
502 return -IPSET_ERR_PROTOCOL;
503 493
504 if (tb[IPSET_ATTR_PROTO]) { 494 if (tb[IPSET_ATTR_PROTO]) {
505 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); 495 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -507,14 +497,16 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
507 497
508 if (e.proto == 0) 498 if (e.proto == 0)
509 return -IPSET_ERR_INVALID_PROTO; 499 return -IPSET_ERR_INVALID_PROTO;
510 } else 500 } else {
511 return -IPSET_ERR_MISSING_PROTO; 501 return -IPSET_ERR_MISSING_PROTO;
502 }
512 503
513 if (!(with_ports || e.proto == IPPROTO_ICMPV6)) 504 if (!(with_ports || e.proto == IPPROTO_ICMPV6))
514 e.port = 0; 505 e.port = 0;
515 506
516 if (tb[IPSET_ATTR_CADT_FLAGS]) { 507 if (tb[IPSET_ATTR_CADT_FLAGS]) {
517 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 508 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
509
518 if (cadt_flags & IPSET_FLAG_NOMATCH) 510 if (cadt_flags & IPSET_FLAG_NOMATCH)
519 flags |= (IPSET_FLAG_NOMATCH << 16); 511 flags |= (IPSET_FLAG_NOMATCH << 16);
520 } 512 }
@@ -538,8 +530,8 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
538 530
539 if (ret && !ip_set_eexist(ret, flags)) 531 if (ret && !ip_set_eexist(ret, flags))
540 return ret; 532 return ret;
541 else 533
542 ret = 0; 534 ret = 0;
543 } 535 }
544 return ret; 536 return ret;
545} 537}
@@ -577,7 +569,8 @@ static struct ip_set_type hash_netportnet_type __read_mostly = {
577 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 569 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
578 [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, 570 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
579 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, 571 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
580 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, 572 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
573 .len = IPSET_MAX_COMMENT_SIZE },
581 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, 574 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
582 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, 575 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
583 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, 576 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -594,6 +587,7 @@ hash_netportnet_init(void)
594static void __exit 587static void __exit
595hash_netportnet_fini(void) 588hash_netportnet_fini(void)
596{ 589{
590 rcu_barrier();
597 ip_set_type_unregister(&hash_netportnet_type); 591 ip_set_type_unregister(&hash_netportnet_type);
598} 592}
599 593
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index f8f682806e36..a1fe5377a2b3 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -9,6 +9,7 @@
9 9
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/ip.h> 11#include <linux/ip.h>
12#include <linux/rculist.h>
12#include <linux/skbuff.h> 13#include <linux/skbuff.h>
13#include <linux/errno.h> 14#include <linux/errno.h>
14 15
@@ -27,6 +28,8 @@ MODULE_ALIAS("ip_set_list:set");
27 28
28/* Member elements */ 29/* Member elements */
29struct set_elem { 30struct set_elem {
31 struct rcu_head rcu;
32 struct list_head list;
30 ip_set_id_t id; 33 ip_set_id_t id;
31}; 34};
32 35
@@ -41,12 +44,9 @@ struct list_set {
41 u32 size; /* size of set list array */ 44 u32 size; /* size of set list array */
42 struct timer_list gc; /* garbage collection */ 45 struct timer_list gc; /* garbage collection */
43 struct net *net; /* namespace */ 46 struct net *net; /* namespace */
44 struct set_elem members[0]; /* the set members */ 47 struct list_head members; /* the set members */
45}; 48};
46 49
47#define list_set_elem(set, map, id) \
48 (struct set_elem *)((void *)(map)->members + (id) * (set)->dsize)
49
50static int 50static int
51list_set_ktest(struct ip_set *set, const struct sk_buff *skb, 51list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
52 const struct xt_action_param *par, 52 const struct xt_action_param *par,
@@ -54,17 +54,14 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
54{ 54{
55 struct list_set *map = set->data; 55 struct list_set *map = set->data;
56 struct set_elem *e; 56 struct set_elem *e;
57 u32 i, cmdflags = opt->cmdflags; 57 u32 cmdflags = opt->cmdflags;
58 int ret; 58 int ret;
59 59
60 /* Don't lookup sub-counters at all */ 60 /* Don't lookup sub-counters at all */
61 opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS; 61 opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS;
62 if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE) 62 if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE)
63 opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE; 63 opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE;
64 for (i = 0; i < map->size; i++) { 64 list_for_each_entry_rcu(e, &map->members, list) {
65 e = list_set_elem(set, map, i);
66 if (e->id == IPSET_INVALID_ID)
67 return 0;
68 if (SET_WITH_TIMEOUT(set) && 65 if (SET_WITH_TIMEOUT(set) &&
69 ip_set_timeout_expired(ext_timeout(e, set))) 66 ip_set_timeout_expired(ext_timeout(e, set)))
70 continue; 67 continue;
@@ -91,13 +88,9 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb,
91{ 88{
92 struct list_set *map = set->data; 89 struct list_set *map = set->data;
93 struct set_elem *e; 90 struct set_elem *e;
94 u32 i;
95 int ret; 91 int ret;
96 92
97 for (i = 0; i < map->size; i++) { 93 list_for_each_entry(e, &map->members, list) {
98 e = list_set_elem(set, map, i);
99 if (e->id == IPSET_INVALID_ID)
100 return 0;
101 if (SET_WITH_TIMEOUT(set) && 94 if (SET_WITH_TIMEOUT(set) &&
102 ip_set_timeout_expired(ext_timeout(e, set))) 95 ip_set_timeout_expired(ext_timeout(e, set)))
103 continue; 96 continue;
@@ -115,13 +108,9 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb,
115{ 108{
116 struct list_set *map = set->data; 109 struct list_set *map = set->data;
117 struct set_elem *e; 110 struct set_elem *e;
118 u32 i;
119 int ret; 111 int ret;
120 112
121 for (i = 0; i < map->size; i++) { 113 list_for_each_entry(e, &map->members, list) {
122 e = list_set_elem(set, map, i);
123 if (e->id == IPSET_INVALID_ID)
124 return 0;
125 if (SET_WITH_TIMEOUT(set) && 114 if (SET_WITH_TIMEOUT(set) &&
126 ip_set_timeout_expired(ext_timeout(e, set))) 115 ip_set_timeout_expired(ext_timeout(e, set)))
127 continue; 116 continue;
@@ -138,110 +127,65 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
138 enum ipset_adt adt, struct ip_set_adt_opt *opt) 127 enum ipset_adt adt, struct ip_set_adt_opt *opt)
139{ 128{
140 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 129 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
130 int ret = -EINVAL;
141 131
132 rcu_read_lock();
142 switch (adt) { 133 switch (adt) {
143 case IPSET_TEST: 134 case IPSET_TEST:
144 return list_set_ktest(set, skb, par, opt, &ext); 135 ret = list_set_ktest(set, skb, par, opt, &ext);
136 break;
145 case IPSET_ADD: 137 case IPSET_ADD:
146 return list_set_kadd(set, skb, par, opt, &ext); 138 ret = list_set_kadd(set, skb, par, opt, &ext);
139 break;
147 case IPSET_DEL: 140 case IPSET_DEL:
148 return list_set_kdel(set, skb, par, opt, &ext); 141 ret = list_set_kdel(set, skb, par, opt, &ext);
142 break;
149 default: 143 default:
150 break; 144 break;
151 } 145 }
152 return -EINVAL; 146 rcu_read_unlock();
153}
154
155static bool
156id_eq(const struct ip_set *set, u32 i, ip_set_id_t id)
157{
158 const struct list_set *map = set->data;
159 const struct set_elem *e;
160
161 if (i >= map->size)
162 return 0;
163 147
164 e = list_set_elem(set, map, i); 148 return ret;
165 return !!(e->id == id &&
166 !(SET_WITH_TIMEOUT(set) &&
167 ip_set_timeout_expired(ext_timeout(e, set))));
168} 149}
169 150
170static int 151/* Userspace interfaces: we are protected by the nfnl mutex */
171list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
172 const struct ip_set_ext *ext)
173{
174 struct list_set *map = set->data;
175 struct set_elem *e = list_set_elem(set, map, i);
176 152
177 if (e->id != IPSET_INVALID_ID) { 153static void
178 if (i == map->size - 1) { 154__list_set_del(struct ip_set *set, struct set_elem *e)
179 /* Last element replaced: e.g. add new,before,last */
180 ip_set_put_byindex(map->net, e->id);
181 ip_set_ext_destroy(set, e);
182 } else {
183 struct set_elem *x = list_set_elem(set, map,
184 map->size - 1);
185
186 /* Last element pushed off */
187 if (x->id != IPSET_INVALID_ID) {
188 ip_set_put_byindex(map->net, x->id);
189 ip_set_ext_destroy(set, x);
190 }
191 memmove(list_set_elem(set, map, i + 1), e,
192 set->dsize * (map->size - (i + 1)));
193 /* Extensions must be initialized to zero */
194 memset(e, 0, set->dsize);
195 }
196 }
197
198 e->id = d->id;
199 if (SET_WITH_TIMEOUT(set))
200 ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
201 if (SET_WITH_COUNTER(set))
202 ip_set_init_counter(ext_counter(e, set), ext);
203 if (SET_WITH_COMMENT(set))
204 ip_set_init_comment(ext_comment(e, set), ext);
205 if (SET_WITH_SKBINFO(set))
206 ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
207 return 0;
208}
209
210static int
211list_set_del(struct ip_set *set, u32 i)
212{ 155{
213 struct list_set *map = set->data; 156 struct list_set *map = set->data;
214 struct set_elem *e = list_set_elem(set, map, i);
215 157
216 ip_set_put_byindex(map->net, e->id); 158 ip_set_put_byindex(map->net, e->id);
159 /* We may call it, because we don't have a to be destroyed
160 * extension which is used by the kernel.
161 */
217 ip_set_ext_destroy(set, e); 162 ip_set_ext_destroy(set, e);
163 kfree_rcu(e, rcu);
164}
218 165
219 if (i < map->size - 1) 166static inline void
220 memmove(e, list_set_elem(set, map, i + 1), 167list_set_del(struct ip_set *set, struct set_elem *e)
221 set->dsize * (map->size - (i + 1))); 168{
169 list_del_rcu(&e->list);
170 __list_set_del(set, e);
171}
222 172
223 /* Last element */ 173static inline void
224 e = list_set_elem(set, map, map->size - 1); 174list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
225 e->id = IPSET_INVALID_ID; 175{
226 return 0; 176 list_replace_rcu(&old->list, &e->list);
177 __list_set_del(set, old);
227} 178}
228 179
229static void 180static void
230set_cleanup_entries(struct ip_set *set) 181set_cleanup_entries(struct ip_set *set)
231{ 182{
232 struct list_set *map = set->data; 183 struct list_set *map = set->data;
233 struct set_elem *e; 184 struct set_elem *e, *n;
234 u32 i = 0;
235 185
236 while (i < map->size) { 186 list_for_each_entry_safe(e, n, &map->members, list)
237 e = list_set_elem(set, map, i); 187 if (ip_set_timeout_expired(ext_timeout(e, set)))
238 if (e->id != IPSET_INVALID_ID && 188 list_set_del(set, e);
239 ip_set_timeout_expired(ext_timeout(e, set)))
240 list_set_del(set, i);
241 /* Check element moved to position i in next loop */
242 else
243 i++;
244 }
245} 189}
246 190
247static int 191static int
@@ -250,31 +194,46 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
250{ 194{
251 struct list_set *map = set->data; 195 struct list_set *map = set->data;
252 struct set_adt_elem *d = value; 196 struct set_adt_elem *d = value;
253 struct set_elem *e; 197 struct set_elem *e, *next, *prev = NULL;
254 u32 i;
255 int ret; 198 int ret;
256 199
257 for (i = 0; i < map->size; i++) { 200 list_for_each_entry(e, &map->members, list) {
258 e = list_set_elem(set, map, i); 201 if (SET_WITH_TIMEOUT(set) &&
259 if (e->id == IPSET_INVALID_ID) 202 ip_set_timeout_expired(ext_timeout(e, set)))
260 return 0;
261 else if (SET_WITH_TIMEOUT(set) &&
262 ip_set_timeout_expired(ext_timeout(e, set)))
263 continue; 203 continue;
264 else if (e->id != d->id) 204 else if (e->id != d->id) {
205 prev = e;
265 continue; 206 continue;
207 }
266 208
267 if (d->before == 0) 209 if (d->before == 0) {
268 return 1; 210 ret = 1;
269 else if (d->before > 0) 211 } else if (d->before > 0) {
270 ret = id_eq(set, i + 1, d->refid); 212 next = list_next_entry(e, list);
271 else 213 ret = !list_is_last(&e->list, &map->members) &&
272 ret = i > 0 && id_eq(set, i - 1, d->refid); 214 next->id == d->refid;
215 } else {
216 ret = prev && prev->id == d->refid;
217 }
273 return ret; 218 return ret;
274 } 219 }
275 return 0; 220 return 0;
276} 221}
277 222
223static void
224list_set_init_extensions(struct ip_set *set, const struct ip_set_ext *ext,
225 struct set_elem *e)
226{
227 if (SET_WITH_COUNTER(set))
228 ip_set_init_counter(ext_counter(e, set), ext);
229 if (SET_WITH_COMMENT(set))
230 ip_set_init_comment(ext_comment(e, set), ext);
231 if (SET_WITH_SKBINFO(set))
232 ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
233 /* Update timeout last */
234 if (SET_WITH_TIMEOUT(set))
235 ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
236}
278 237
279static int 238static int
280list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, 239list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
@@ -282,60 +241,78 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
282{ 241{
283 struct list_set *map = set->data; 242 struct list_set *map = set->data;
284 struct set_adt_elem *d = value; 243 struct set_adt_elem *d = value;
285 struct set_elem *e; 244 struct set_elem *e, *n, *prev, *next;
286 bool flag_exist = flags & IPSET_FLAG_EXIST; 245 bool flag_exist = flags & IPSET_FLAG_EXIST;
287 u32 i, ret = 0;
288 246
289 if (SET_WITH_TIMEOUT(set)) 247 if (SET_WITH_TIMEOUT(set))
290 set_cleanup_entries(set); 248 set_cleanup_entries(set);
291 249
292 /* Check already added element */ 250 /* Find where to add the new entry */
293 for (i = 0; i < map->size; i++) { 251 n = prev = next = NULL;
294 e = list_set_elem(set, map, i); 252 list_for_each_entry(e, &map->members, list) {
295 if (e->id == IPSET_INVALID_ID) 253 if (SET_WITH_TIMEOUT(set) &&
296 goto insert; 254 ip_set_timeout_expired(ext_timeout(e, set)))
297 else if (e->id != d->id)
298 continue; 255 continue;
299 256 else if (d->id == e->id)
300 if ((d->before > 1 && !id_eq(set, i + 1, d->refid)) || 257 n = e;
301 (d->before < 0 && 258 else if (d->before == 0 || e->id != d->refid)
302 (i == 0 || !id_eq(set, i - 1, d->refid)))) 259 continue;
303 /* Before/after doesn't match */ 260 else if (d->before > 0)
261 next = e;
262 else
263 prev = e;
264 }
265 /* Re-add already existing element */
266 if (n) {
267 if ((d->before > 0 && !next) ||
268 (d->before < 0 && !prev))
304 return -IPSET_ERR_REF_EXIST; 269 return -IPSET_ERR_REF_EXIST;
305 if (!flag_exist) 270 if (!flag_exist)
306 /* Can't re-add */
307 return -IPSET_ERR_EXIST; 271 return -IPSET_ERR_EXIST;
308 /* Update extensions */ 272 /* Update extensions */
309 ip_set_ext_destroy(set, e); 273 ip_set_ext_destroy(set, n);
274 list_set_init_extensions(set, ext, n);
310 275
311 if (SET_WITH_TIMEOUT(set))
312 ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
313 if (SET_WITH_COUNTER(set))
314 ip_set_init_counter(ext_counter(e, set), ext);
315 if (SET_WITH_COMMENT(set))
316 ip_set_init_comment(ext_comment(e, set), ext);
317 if (SET_WITH_SKBINFO(set))
318 ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
319 /* Set is already added to the list */ 276 /* Set is already added to the list */
320 ip_set_put_byindex(map->net, d->id); 277 ip_set_put_byindex(map->net, d->id);
321 return 0; 278 return 0;
322 } 279 }
323insert: 280 /* Add new entry */
324 ret = -IPSET_ERR_LIST_FULL; 281 if (d->before == 0) {
325 for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) { 282 /* Append */
326 e = list_set_elem(set, map, i); 283 n = list_empty(&map->members) ? NULL :
327 if (e->id == IPSET_INVALID_ID) 284 list_last_entry(&map->members, struct set_elem, list);
328 ret = d->before != 0 ? -IPSET_ERR_REF_EXIST 285 } else if (d->before > 0) {
329 : list_set_add(set, i, d, ext); 286 /* Insert after next element */
330 else if (e->id != d->refid) 287 if (!list_is_last(&next->list, &map->members))
331 continue; 288 n = list_next_entry(next, list);
332 else if (d->before > 0) 289 } else {
333 ret = list_set_add(set, i, d, ext); 290 /* Insert before prev element */
334 else if (i + 1 < map->size) 291 if (prev->list.prev != &map->members)
335 ret = list_set_add(set, i + 1, d, ext); 292 n = list_prev_entry(prev, list);
336 } 293 }
294 /* Can we replace a timed out entry? */
295 if (n &&
296 !(SET_WITH_TIMEOUT(set) &&
297 ip_set_timeout_expired(ext_timeout(n, set))))
298 n = NULL;
299
300 e = kzalloc(set->dsize, GFP_KERNEL);
301 if (!e)
302 return -ENOMEM;
303 e->id = d->id;
304 INIT_LIST_HEAD(&e->list);
305 list_set_init_extensions(set, ext, e);
306 if (n)
307 list_set_replace(set, e, n);
308 else if (next)
309 list_add_tail_rcu(&e->list, &next->list);
310 else if (prev)
311 list_add_rcu(&e->list, &prev->list);
312 else
313 list_add_tail_rcu(&e->list, &map->members);
337 314
338 return ret; 315 return 0;
339} 316}
340 317
341static int 318static int
@@ -344,32 +321,30 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext,
344{ 321{
345 struct list_set *map = set->data; 322 struct list_set *map = set->data;
346 struct set_adt_elem *d = value; 323 struct set_adt_elem *d = value;
347 struct set_elem *e; 324 struct set_elem *e, *next, *prev = NULL;
348 u32 i; 325
349 326 list_for_each_entry(e, &map->members, list) {
350 for (i = 0; i < map->size; i++) { 327 if (SET_WITH_TIMEOUT(set) &&
351 e = list_set_elem(set, map, i); 328 ip_set_timeout_expired(ext_timeout(e, set)))
352 if (e->id == IPSET_INVALID_ID)
353 return d->before != 0 ? -IPSET_ERR_REF_EXIST
354 : -IPSET_ERR_EXIST;
355 else if (SET_WITH_TIMEOUT(set) &&
356 ip_set_timeout_expired(ext_timeout(e, set)))
357 continue; 329 continue;
358 else if (e->id != d->id) 330 else if (e->id != d->id) {
331 prev = e;
359 continue; 332 continue;
333 }
360 334
361 if (d->before == 0) 335 if (d->before > 0) {
362 return list_set_del(set, i); 336 next = list_next_entry(e, list);
363 else if (d->before > 0) { 337 if (list_is_last(&e->list, &map->members) ||
364 if (!id_eq(set, i + 1, d->refid)) 338 next->id != d->refid)
365 return -IPSET_ERR_REF_EXIST; 339 return -IPSET_ERR_REF_EXIST;
366 return list_set_del(set, i); 340 } else if (d->before < 0) {
367 } else if (i == 0 || !id_eq(set, i - 1, d->refid)) 341 if (!prev || prev->id != d->refid)
368 return -IPSET_ERR_REF_EXIST; 342 return -IPSET_ERR_REF_EXIST;
369 else 343 }
370 return list_set_del(set, i); 344 list_set_del(set, e);
345 return 0;
371 } 346 }
372 return -IPSET_ERR_EXIST; 347 return d->before != 0 ? -IPSET_ERR_REF_EXIST : -IPSET_ERR_EXIST;
373} 348}
374 349
375static int 350static int
@@ -383,19 +358,13 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
383 struct ip_set *s; 358 struct ip_set *s;
384 int ret = 0; 359 int ret = 0;
385 360
386 if (unlikely(!tb[IPSET_ATTR_NAME] ||
387 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
388 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
389 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
390 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
391 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
392 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
393 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
394 return -IPSET_ERR_PROTOCOL;
395
396 if (tb[IPSET_ATTR_LINENO]) 361 if (tb[IPSET_ATTR_LINENO])
397 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 362 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
398 363
364 if (unlikely(!tb[IPSET_ATTR_NAME] ||
365 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
366 return -IPSET_ERR_PROTOCOL;
367
399 ret = ip_set_get_extensions(set, tb, &ext); 368 ret = ip_set_get_extensions(set, tb, &ext);
400 if (ret) 369 if (ret)
401 return ret; 370 return ret;
@@ -410,6 +379,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
410 379
411 if (tb[IPSET_ATTR_CADT_FLAGS]) { 380 if (tb[IPSET_ATTR_CADT_FLAGS]) {
412 u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 381 u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
382
413 e.before = f & IPSET_FLAG_BEFORE; 383 e.before = f & IPSET_FLAG_BEFORE;
414 } 384 }
415 385
@@ -447,27 +417,26 @@ static void
447list_set_flush(struct ip_set *set) 417list_set_flush(struct ip_set *set)
448{ 418{
449 struct list_set *map = set->data; 419 struct list_set *map = set->data;
450 struct set_elem *e; 420 struct set_elem *e, *n;
451 u32 i; 421
452 422 list_for_each_entry_safe(e, n, &map->members, list)
453 for (i = 0; i < map->size; i++) { 423 list_set_del(set, e);
454 e = list_set_elem(set, map, i);
455 if (e->id != IPSET_INVALID_ID) {
456 ip_set_put_byindex(map->net, e->id);
457 ip_set_ext_destroy(set, e);
458 e->id = IPSET_INVALID_ID;
459 }
460 }
461} 424}
462 425
463static void 426static void
464list_set_destroy(struct ip_set *set) 427list_set_destroy(struct ip_set *set)
465{ 428{
466 struct list_set *map = set->data; 429 struct list_set *map = set->data;
430 struct set_elem *e, *n;
467 431
468 if (SET_WITH_TIMEOUT(set)) 432 if (SET_WITH_TIMEOUT(set))
469 del_timer_sync(&map->gc); 433 del_timer_sync(&map->gc);
470 list_set_flush(set); 434 list_for_each_entry_safe(e, n, &map->members, list) {
435 list_del(&e->list);
436 ip_set_put_byindex(map->net, e->id);
437 ip_set_ext_destroy(set, e);
438 kfree(e);
439 }
471 kfree(map); 440 kfree(map);
472 441
473 set->data = NULL; 442 set->data = NULL;
@@ -478,6 +447,11 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
478{ 447{
479 const struct list_set *map = set->data; 448 const struct list_set *map = set->data;
480 struct nlattr *nested; 449 struct nlattr *nested;
450 struct set_elem *e;
451 u32 n = 0;
452
453 list_for_each_entry(e, &map->members, list)
454 n++;
481 455
482 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 456 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
483 if (!nested) 457 if (!nested)
@@ -485,7 +459,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
485 if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || 459 if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
486 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || 460 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
487 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, 461 nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
488 htonl(sizeof(*map) + map->size * set->dsize))) 462 htonl(sizeof(*map) + n * set->dsize)))
489 goto nla_put_failure; 463 goto nla_put_failure;
490 if (unlikely(ip_set_put_flags(skb, set))) 464 if (unlikely(ip_set_put_flags(skb, set)))
491 goto nla_put_failure; 465 goto nla_put_failure;
@@ -502,18 +476,22 @@ list_set_list(const struct ip_set *set,
502{ 476{
503 const struct list_set *map = set->data; 477 const struct list_set *map = set->data;
504 struct nlattr *atd, *nested; 478 struct nlattr *atd, *nested;
505 u32 i, first = cb->args[IPSET_CB_ARG0]; 479 u32 i = 0, first = cb->args[IPSET_CB_ARG0];
506 const struct set_elem *e; 480 struct set_elem *e;
481 int ret = 0;
507 482
508 atd = ipset_nest_start(skb, IPSET_ATTR_ADT); 483 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
509 if (!atd) 484 if (!atd)
510 return -EMSGSIZE; 485 return -EMSGSIZE;
511 for (; cb->args[IPSET_CB_ARG0] < map->size; 486 list_for_each_entry(e, &map->members, list) {
512 cb->args[IPSET_CB_ARG0]++) { 487 if (i == first)
513 i = cb->args[IPSET_CB_ARG0]; 488 break;
514 e = list_set_elem(set, map, i); 489 i++;
515 if (e->id == IPSET_INVALID_ID) 490 }
516 goto finish; 491
492 rcu_read_lock();
493 list_for_each_entry_from(e, &map->members, list) {
494 i++;
517 if (SET_WITH_TIMEOUT(set) && 495 if (SET_WITH_TIMEOUT(set) &&
518 ip_set_timeout_expired(ext_timeout(e, set))) 496 ip_set_timeout_expired(ext_timeout(e, set)))
519 continue; 497 continue;
@@ -521,9 +499,10 @@ list_set_list(const struct ip_set *set,
521 if (!nested) { 499 if (!nested) {
522 if (i == first) { 500 if (i == first) {
523 nla_nest_cancel(skb, atd); 501 nla_nest_cancel(skb, atd);
524 return -EMSGSIZE; 502 ret = -EMSGSIZE;
525 } else 503 goto out;
526 goto nla_put_failure; 504 }
505 goto nla_put_failure;
527 } 506 }
528 if (nla_put_string(skb, IPSET_ATTR_NAME, 507 if (nla_put_string(skb, IPSET_ATTR_NAME,
529 ip_set_name_byindex(map->net, e->id))) 508 ip_set_name_byindex(map->net, e->id)))
@@ -532,20 +511,23 @@ list_set_list(const struct ip_set *set,
532 goto nla_put_failure; 511 goto nla_put_failure;
533 ipset_nest_end(skb, nested); 512 ipset_nest_end(skb, nested);
534 } 513 }
535finish: 514
536 ipset_nest_end(skb, atd); 515 ipset_nest_end(skb, atd);
537 /* Set listing finished */ 516 /* Set listing finished */
538 cb->args[IPSET_CB_ARG0] = 0; 517 cb->args[IPSET_CB_ARG0] = 0;
539 return 0; 518 goto out;
540 519
541nla_put_failure: 520nla_put_failure:
542 nla_nest_cancel(skb, nested); 521 nla_nest_cancel(skb, nested);
543 if (unlikely(i == first)) { 522 if (unlikely(i == first)) {
544 cb->args[IPSET_CB_ARG0] = 0; 523 cb->args[IPSET_CB_ARG0] = 0;
545 return -EMSGSIZE; 524 ret = -EMSGSIZE;
546 } 525 }
526 cb->args[IPSET_CB_ARG0] = i - 1;
547 ipset_nest_end(skb, atd); 527 ipset_nest_end(skb, atd);
548 return 0; 528out:
529 rcu_read_unlock();
530 return ret;
549} 531}
550 532
551static bool 533static bool
@@ -577,12 +559,12 @@ static const struct ip_set_type_variant set_variant = {
577static void 559static void
578list_set_gc(unsigned long ul_set) 560list_set_gc(unsigned long ul_set)
579{ 561{
580 struct ip_set *set = (struct ip_set *) ul_set; 562 struct ip_set *set = (struct ip_set *)ul_set;
581 struct list_set *map = set->data; 563 struct list_set *map = set->data;
582 564
583 write_lock_bh(&set->lock); 565 spin_lock_bh(&set->lock);
584 set_cleanup_entries(set); 566 set_cleanup_entries(set);
585 write_unlock_bh(&set->lock); 567 spin_unlock_bh(&set->lock);
586 568
587 map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; 569 map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
588 add_timer(&map->gc); 570 add_timer(&map->gc);
@@ -594,7 +576,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
594 struct list_set *map = set->data; 576 struct list_set *map = set->data;
595 577
596 init_timer(&map->gc); 578 init_timer(&map->gc);
597 map->gc.data = (unsigned long) set; 579 map->gc.data = (unsigned long)set;
598 map->gc.function = gc; 580 map->gc.function = gc;
599 map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; 581 map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
600 add_timer(&map->gc); 582 add_timer(&map->gc);
@@ -606,24 +588,16 @@ static bool
606init_list_set(struct net *net, struct ip_set *set, u32 size) 588init_list_set(struct net *net, struct ip_set *set, u32 size)
607{ 589{
608 struct list_set *map; 590 struct list_set *map;
609 struct set_elem *e;
610 u32 i;
611 591
612 map = kzalloc(sizeof(*map) + 592 map = kzalloc(sizeof(*map), GFP_KERNEL);
613 min_t(u32, size, IP_SET_LIST_MAX_SIZE) * set->dsize,
614 GFP_KERNEL);
615 if (!map) 593 if (!map)
616 return false; 594 return false;
617 595
618 map->size = size; 596 map->size = size;
619 map->net = net; 597 map->net = net;
598 INIT_LIST_HEAD(&map->members);
620 set->data = map; 599 set->data = map;
621 600
622 for (i = 0; i < size; i++) {
623 e = list_set_elem(set, map, i);
624 e->id = IPSET_INVALID_ID;
625 }
626
627 return true; 601 return true;
628} 602}
629 603
@@ -678,7 +652,8 @@ static struct ip_set_type list_set_type __read_mostly = {
678 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, 652 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
679 [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, 653 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
680 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, 654 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
681 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, 655 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
656 .len = IPSET_MAX_COMMENT_SIZE },
682 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, 657 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
683 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, 658 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
684 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, 659 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -695,6 +670,7 @@ list_set_init(void)
695static void __exit 670static void __exit
696list_set_fini(void) 671list_set_fini(void)
697{ 672{
673 rcu_barrier();
698 ip_set_type_unregister(&list_set_type); 674 ip_set_type_unregister(&list_set_type);
699} 675}
700 676
diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c
index 04d15fdc99ee..1c8a42c1056c 100644
--- a/net/netfilter/ipset/pfxlen.c
+++ b/net/netfilter/ipset/pfxlen.c
@@ -1,9 +1,7 @@
1#include <linux/export.h> 1#include <linux/export.h>
2#include <linux/netfilter/ipset/pfxlen.h> 2#include <linux/netfilter/ipset/pfxlen.h>
3 3
4/* 4/* Prefixlen maps for fast conversions, by Jan Engelhardt. */
5 * Prefixlen maps for fast conversions, by Jan Engelhardt.
6 */
7 5
8#define E(a, b, c, d) \ 6#define E(a, b, c, d) \
9 {.ip6 = { \ 7 {.ip6 = { \
@@ -11,8 +9,7 @@
11 htonl(c), htonl(d), \ 9 htonl(c), htonl(d), \
12 } } 10 } }
13 11
14/* 12/* This table works for both IPv4 and IPv6;
15 * This table works for both IPv4 and IPv6;
16 * just use prefixlen_netmask_map[prefixlength].ip. 13 * just use prefixlen_netmask_map[prefixlength].ip.
17 */ 14 */
18const union nf_inet_addr ip_set_netmask_map[] = { 15const union nf_inet_addr ip_set_netmask_map[] = {
@@ -149,13 +146,12 @@ const union nf_inet_addr ip_set_netmask_map[] = {
149EXPORT_SYMBOL_GPL(ip_set_netmask_map); 146EXPORT_SYMBOL_GPL(ip_set_netmask_map);
150 147
151#undef E 148#undef E
152#define E(a, b, c, d) \ 149#define E(a, b, c, d) \
153 {.ip6 = { (__force __be32) a, (__force __be32) b, \ 150 {.ip6 = { (__force __be32)a, (__force __be32)b, \
154 (__force __be32) c, (__force __be32) d, \ 151 (__force __be32)c, (__force __be32)d, \
155 } } 152 } }
156 153
157/* 154/* This table works for both IPv4 and IPv6;
158 * This table works for both IPv4 and IPv6;
159 * just use prefixlen_hostmask_map[prefixlength].ip. 155 * just use prefixlen_hostmask_map[prefixlength].ip.
160 */ 156 */
161const union nf_inet_addr ip_set_hostmask_map[] = { 157const union nf_inet_addr ip_set_hostmask_map[] = {
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 19b9cce6c210..b08ba9538d12 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1457,18 +1457,12 @@ static struct socket *make_send_sock(struct net *net, int id)
1457 struct socket *sock; 1457 struct socket *sock;
1458 int result; 1458 int result;
1459 1459
1460 /* First create a socket move it to right name space later */ 1460 /* First create a socket */
1461 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); 1461 result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1462 if (result < 0) { 1462 if (result < 0) {
1463 pr_err("Error during creation of socket; terminating\n"); 1463 pr_err("Error during creation of socket; terminating\n");
1464 return ERR_PTR(result); 1464 return ERR_PTR(result);
1465 } 1465 }
1466 /*
1467 * Kernel sockets that are a part of a namespace, should not
1468 * hold a reference to a namespace in order to allow to stop it.
1469 * After sk_change_net should be released using sk_release_kernel.
1470 */
1471 sk_change_net(sock->sk, net);
1472 result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); 1466 result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
1473 if (result < 0) { 1467 if (result < 0) {
1474 pr_err("Error setting outbound mcast interface\n"); 1468 pr_err("Error setting outbound mcast interface\n");
@@ -1497,7 +1491,7 @@ static struct socket *make_send_sock(struct net *net, int id)
1497 return sock; 1491 return sock;
1498 1492
1499error: 1493error:
1500 sk_release_kernel(sock->sk); 1494 sock_release(sock);
1501 return ERR_PTR(result); 1495 return ERR_PTR(result);
1502} 1496}
1503 1497
@@ -1518,17 +1512,11 @@ static struct socket *make_receive_sock(struct net *net, int id)
1518 int result; 1512 int result;
1519 1513
1520 /* First create a socket */ 1514 /* First create a socket */
1521 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); 1515 result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1522 if (result < 0) { 1516 if (result < 0) {
1523 pr_err("Error during creation of socket; terminating\n"); 1517 pr_err("Error during creation of socket; terminating\n");
1524 return ERR_PTR(result); 1518 return ERR_PTR(result);
1525 } 1519 }
1526 /*
1527 * Kernel sockets that are a part of a namespace, should not
1528 * hold a reference to a namespace in order to allow to stop it.
1529 * After sk_change_net should be released using sk_release_kernel.
1530 */
1531 sk_change_net(sock->sk, net);
1532 /* it is equivalent to the REUSEADDR option in user-space */ 1520 /* it is equivalent to the REUSEADDR option in user-space */
1533 sock->sk->sk_reuse = SK_CAN_REUSE; 1521 sock->sk->sk_reuse = SK_CAN_REUSE;
1534 result = sysctl_sync_sock_size(ipvs); 1522 result = sysctl_sync_sock_size(ipvs);
@@ -1554,7 +1542,7 @@ static struct socket *make_receive_sock(struct net *net, int id)
1554 return sock; 1542 return sock;
1555 1543
1556error: 1544error:
1557 sk_release_kernel(sock->sk); 1545 sock_release(sock);
1558 return ERR_PTR(result); 1546 return ERR_PTR(result);
1559} 1547}
1560 1548
@@ -1692,7 +1680,7 @@ done:
1692 ip_vs_sync_buff_release(sb); 1680 ip_vs_sync_buff_release(sb);
1693 1681
1694 /* release the sending multicast socket */ 1682 /* release the sending multicast socket */
1695 sk_release_kernel(tinfo->sock->sk); 1683 sock_release(tinfo->sock);
1696 kfree(tinfo); 1684 kfree(tinfo);
1697 1685
1698 return 0; 1686 return 0;
@@ -1729,7 +1717,7 @@ static int sync_thread_backup(void *data)
1729 } 1717 }
1730 1718
1731 /* release the sending multicast socket */ 1719 /* release the sending multicast socket */
1732 sk_release_kernel(tinfo->sock->sk); 1720 sock_release(tinfo->sock);
1733 kfree(tinfo->buf); 1721 kfree(tinfo->buf);
1734 kfree(tinfo); 1722 kfree(tinfo);
1735 1723
@@ -1854,11 +1842,11 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
1854 return 0; 1842 return 0;
1855 1843
1856outsocket: 1844outsocket:
1857 sk_release_kernel(sock->sk); 1845 sock_release(sock);
1858 1846
1859outtinfo: 1847outtinfo:
1860 if (tinfo) { 1848 if (tinfo) {
1861 sk_release_kernel(tinfo->sock->sk); 1849 sock_release(tinfo->sock);
1862 kfree(tinfo->buf); 1850 kfree(tinfo->buf);
1863 kfree(tinfo); 1851 kfree(tinfo);
1864 } 1852 }
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 19986ec5f21a..bf66a8657a5f 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -364,13 +364,16 @@ err_unreach:
364#ifdef CONFIG_IP_VS_IPV6 364#ifdef CONFIG_IP_VS_IPV6
365static struct dst_entry * 365static struct dst_entry *
366__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, 366__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
367 struct in6_addr *ret_saddr, int do_xfrm) 367 struct in6_addr *ret_saddr, int do_xfrm, int rt_mode)
368{ 368{
369 struct dst_entry *dst; 369 struct dst_entry *dst;
370 struct flowi6 fl6 = { 370 struct flowi6 fl6 = {
371 .daddr = *daddr, 371 .daddr = *daddr,
372 }; 372 };
373 373
374 if (rt_mode & IP_VS_RT_MODE_KNOWN_NH)
375 fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
376
374 dst = ip6_route_output(net, NULL, &fl6); 377 dst = ip6_route_output(net, NULL, &fl6);
375 if (dst->error) 378 if (dst->error)
376 goto out_err; 379 goto out_err;
@@ -427,7 +430,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
427 } 430 }
428 dst = __ip_vs_route_output_v6(net, &dest->addr.in6, 431 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
429 &dest_dst->dst_saddr.in6, 432 &dest_dst->dst_saddr.in6,
430 do_xfrm); 433 do_xfrm, rt_mode);
431 if (!dst) { 434 if (!dst) {
432 __ip_vs_dst_set(dest, NULL, NULL, 0); 435 __ip_vs_dst_set(dest, NULL, NULL, 0);
433 spin_unlock_bh(&dest->dst_lock); 436 spin_unlock_bh(&dest->dst_lock);
@@ -435,7 +438,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
435 goto err_unreach; 438 goto err_unreach;
436 } 439 }
437 rt = (struct rt6_info *) dst; 440 rt = (struct rt6_info *) dst;
438 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; 441 cookie = rt6_get_cookie(rt);
439 __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); 442 __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
440 spin_unlock_bh(&dest->dst_lock); 443 spin_unlock_bh(&dest->dst_lock);
441 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", 444 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
@@ -446,7 +449,8 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
446 *ret_saddr = dest_dst->dst_saddr.in6; 449 *ret_saddr = dest_dst->dst_saddr.in6;
447 } else { 450 } else {
448 noref = 0; 451 noref = 0;
449 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); 452 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm,
453 rt_mode);
450 if (!dst) 454 if (!dst)
451 goto err_unreach; 455 goto err_unreach;
452 rt = (struct rt6_info *) dst; 456 rt = (struct rt6_info *) dst;
@@ -781,7 +785,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
781 785
782 /* From world but DNAT to loopback address? */ 786 /* From world but DNAT to loopback address? */
783 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && 787 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
784 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { 788 ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) {
785 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, 789 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
786 "ip_vs_nat_xmit_v6(): " 790 "ip_vs_nat_xmit_v6(): "
787 "stopping DNAT to loopback address"); 791 "stopping DNAT to loopback address");
@@ -1164,7 +1168,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1164 local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, 1168 local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
1165 NULL, ipvsh, 0, 1169 NULL, ipvsh, 0,
1166 IP_VS_RT_MODE_LOCAL | 1170 IP_VS_RT_MODE_LOCAL |
1167 IP_VS_RT_MODE_NON_LOCAL); 1171 IP_VS_RT_MODE_NON_LOCAL |
1172 IP_VS_RT_MODE_KNOWN_NH);
1168 if (local < 0) 1173 if (local < 0)
1169 goto tx_error; 1174 goto tx_error;
1170 if (local) { 1175 if (local) {
@@ -1346,7 +1351,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1346 1351
1347 /* From world but DNAT to loopback address? */ 1352 /* From world but DNAT to loopback address? */
1348 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && 1353 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
1349 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { 1354 ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) {
1350 IP_VS_DBG(1, "%s(): " 1355 IP_VS_DBG(1, "%s(): "
1351 "stopping DNAT to loopback %pI6\n", 1356 "stopping DNAT to loopback %pI6\n",
1352 __func__, &cp->daddr.in6); 1357 __func__, &cp->daddr.in6);
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 1d69f5b9748f..9511af04dc81 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -779,8 +779,8 @@ static int callforward_do_filter(struct net *net,
779 flowi6_to_flowi(&fl1), false)) { 779 flowi6_to_flowi(&fl1), false)) {
780 if (!afinfo->route(net, (struct dst_entry **)&rt2, 780 if (!afinfo->route(net, (struct dst_entry **)&rt2,
781 flowi6_to_flowi(&fl2), false)) { 781 flowi6_to_flowi(&fl2), false)) {
782 if (ipv6_addr_equal(rt6_nexthop(rt1), 782 if (ipv6_addr_equal(rt6_nexthop(rt1, &fl1.daddr),
783 rt6_nexthop(rt2)) && 783 rt6_nexthop(rt2, &fl2.daddr)) &&
784 rt1->dst.dev == rt2->dst.dev) 784 rt1->dst.dev == rt2->dst.dev)
785 ret = 1; 785 ret = 1;
786 dst_release(&rt2->dst); 786 dst_release(&rt2->dst);
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 60865f110309..2281be419a74 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -90,7 +90,13 @@ static int generic_packet(struct nf_conn *ct,
90static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb, 90static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
91 unsigned int dataoff, unsigned int *timeouts) 91 unsigned int dataoff, unsigned int *timeouts)
92{ 92{
93 return nf_generic_should_process(nf_ct_protonum(ct)); 93 bool ret;
94
95 ret = nf_generic_should_process(nf_ct_protonum(ct));
96 if (!ret)
97 pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n",
98 nf_ct_protonum(ct));
99 return ret;
94} 100}
95 101
96#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 102#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index ea7f36784b3d..399210693c2a 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -19,6 +19,7 @@ unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb,
19/* nf_queue.c */ 19/* nf_queue.c */
20int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, 20int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem,
21 struct nf_hook_state *state, unsigned int queuenum); 21 struct nf_hook_state *state, unsigned int queuenum);
22void nf_queue_nf_hook_drop(struct nf_hook_ops *ops);
22int __init netfilter_queue_init(void); 23int __init netfilter_queue_init(void);
23 24
24/* nf_log.c */ 25/* nf_log.c */
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 2e88032cd5ad..8a8b2abc35ff 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -105,6 +105,23 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
105} 105}
106EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); 106EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
107 107
108void nf_queue_nf_hook_drop(struct nf_hook_ops *ops)
109{
110 const struct nf_queue_handler *qh;
111 struct net *net;
112
113 rtnl_lock();
114 rcu_read_lock();
115 qh = rcu_dereference(queue_handler);
116 if (qh) {
117 for_each_net(net) {
118 qh->nf_hook_drop(net, ops);
119 }
120 }
121 rcu_read_unlock();
122 rtnl_unlock();
123}
124
108/* 125/*
109 * Any packet that leaves via this function must come back 126 * Any packet that leaves via this function must come back
110 * through nf_reinject(). 127 * through nf_reinject().
@@ -196,7 +213,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
196 213
197 if (verdict == NF_ACCEPT) { 214 if (verdict == NF_ACCEPT) {
198 next_hook: 215 next_hook:
199 verdict = nf_iterate(&nf_hooks[entry->state.pf][entry->state.hook], 216 verdict = nf_iterate(entry->state.hook_list,
200 skb, &entry->state, &elem); 217 skb, &entry->state, &elem);
201 } 218 }
202 219
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 52e20c9a46a5..789feeae6c44 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -11,6 +11,7 @@
11#include <asm/unaligned.h> 11#include <asm/unaligned.h>
12#include <net/tcp.h> 12#include <net/tcp.h>
13#include <net/netns/generic.h> 13#include <net/netns/generic.h>
14#include <linux/proc_fs.h>
14 15
15#include <linux/netfilter_ipv4/ip_tables.h> 16#include <linux/netfilter_ipv4/ip_tables.h>
16#include <linux/netfilter/x_tables.h> 17#include <linux/netfilter/x_tables.h>
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 34ded09317e7..cfe636808541 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -127,13 +127,46 @@ static void nft_trans_destroy(struct nft_trans *trans)
127 kfree(trans); 127 kfree(trans);
128} 128}
129 129
130int nft_register_basechain(struct nft_base_chain *basechain,
131 unsigned int hook_nops)
132{
133 if (basechain->flags & NFT_BASECHAIN_DISABLED)
134 return 0;
135
136 return nf_register_hooks(basechain->ops, hook_nops);
137}
138EXPORT_SYMBOL_GPL(nft_register_basechain);
139
140void nft_unregister_basechain(struct nft_base_chain *basechain,
141 unsigned int hook_nops)
142{
143 if (basechain->flags & NFT_BASECHAIN_DISABLED)
144 return;
145
146 nf_unregister_hooks(basechain->ops, hook_nops);
147}
148EXPORT_SYMBOL_GPL(nft_unregister_basechain);
149
150static int nf_tables_register_hooks(const struct nft_table *table,
151 struct nft_chain *chain,
152 unsigned int hook_nops)
153{
154 if (table->flags & NFT_TABLE_F_DORMANT ||
155 !(chain->flags & NFT_BASE_CHAIN))
156 return 0;
157
158 return nft_register_basechain(nft_base_chain(chain), hook_nops);
159}
160
130static void nf_tables_unregister_hooks(const struct nft_table *table, 161static void nf_tables_unregister_hooks(const struct nft_table *table,
131 const struct nft_chain *chain, 162 struct nft_chain *chain,
132 unsigned int hook_nops) 163 unsigned int hook_nops)
133{ 164{
134 if (!(table->flags & NFT_TABLE_F_DORMANT) && 165 if (table->flags & NFT_TABLE_F_DORMANT ||
135 chain->flags & NFT_BASE_CHAIN) 166 !(chain->flags & NFT_BASE_CHAIN))
136 nf_unregister_hooks(nft_base_chain(chain)->ops, hook_nops); 167 return;
168
169 nft_unregister_basechain(nft_base_chain(chain), hook_nops);
137} 170}
138 171
139/* Internal table flags */ 172/* Internal table flags */
@@ -560,7 +593,7 @@ static int nf_tables_table_enable(const struct nft_af_info *afi,
560 if (!(chain->flags & NFT_BASE_CHAIN)) 593 if (!(chain->flags & NFT_BASE_CHAIN))
561 continue; 594 continue;
562 595
563 err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops); 596 err = nft_register_basechain(nft_base_chain(chain), afi->nops);
564 if (err < 0) 597 if (err < 0)
565 goto err; 598 goto err;
566 599
@@ -575,20 +608,20 @@ err:
575 if (i-- <= 0) 608 if (i-- <= 0)
576 break; 609 break;
577 610
578 nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops); 611 nft_unregister_basechain(nft_base_chain(chain), afi->nops);
579 } 612 }
580 return err; 613 return err;
581} 614}
582 615
583static void nf_tables_table_disable(const struct nft_af_info *afi, 616static void nf_tables_table_disable(const struct nft_af_info *afi,
584 struct nft_table *table) 617 struct nft_table *table)
585{ 618{
586 struct nft_chain *chain; 619 struct nft_chain *chain;
587 620
588 list_for_each_entry(chain, &table->chains, list) { 621 list_for_each_entry(chain, &table->chains, list) {
589 if (chain->flags & NFT_BASE_CHAIN) 622 if (chain->flags & NFT_BASE_CHAIN)
590 nf_unregister_hooks(nft_base_chain(chain)->ops, 623 nft_unregister_basechain(nft_base_chain(chain),
591 afi->nops); 624 afi->nops);
592 } 625 }
593} 626}
594 627
@@ -679,13 +712,14 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
679 return -EINVAL; 712 return -EINVAL;
680 } 713 }
681 714
715 err = -EAFNOSUPPORT;
682 if (!try_module_get(afi->owner)) 716 if (!try_module_get(afi->owner))
683 return -EAFNOSUPPORT; 717 goto err1;
684 718
685 err = -ENOMEM; 719 err = -ENOMEM;
686 table = kzalloc(sizeof(*table), GFP_KERNEL); 720 table = kzalloc(sizeof(*table), GFP_KERNEL);
687 if (table == NULL) 721 if (table == NULL)
688 goto err1; 722 goto err2;
689 723
690 nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN); 724 nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
691 INIT_LIST_HEAD(&table->chains); 725 INIT_LIST_HEAD(&table->chains);
@@ -695,14 +729,15 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
695 nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); 729 nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
696 err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); 730 err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
697 if (err < 0) 731 if (err < 0)
698 goto err2; 732 goto err3;
699 733
700 list_add_tail_rcu(&table->list, &afi->tables); 734 list_add_tail_rcu(&table->list, &afi->tables);
701 return 0; 735 return 0;
702err2: 736err3:
703 kfree(table); 737 kfree(table);
704err1: 738err2:
705 module_put(afi->owner); 739 module_put(afi->owner);
740err1:
706 return err; 741 return err;
707} 742}
708 743
@@ -881,6 +916,8 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
881static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { 916static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
882 [NFTA_HOOK_HOOKNUM] = { .type = NLA_U32 }, 917 [NFTA_HOOK_HOOKNUM] = { .type = NLA_U32 },
883 [NFTA_HOOK_PRIORITY] = { .type = NLA_U32 }, 918 [NFTA_HOOK_PRIORITY] = { .type = NLA_U32 },
919 [NFTA_HOOK_DEV] = { .type = NLA_STRING,
920 .len = IFNAMSIZ - 1 },
884}; 921};
885 922
886static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) 923static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats)
@@ -954,6 +991,9 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
954 goto nla_put_failure; 991 goto nla_put_failure;
955 if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority))) 992 if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
956 goto nla_put_failure; 993 goto nla_put_failure;
994 if (basechain->dev_name[0] &&
995 nla_put_string(skb, NFTA_HOOK_DEV, basechain->dev_name))
996 goto nla_put_failure;
957 nla_nest_end(skb, nest); 997 nla_nest_end(skb, nest);
958 998
959 if (nla_put_be32(skb, NFTA_CHAIN_POLICY, 999 if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
@@ -1165,9 +1205,13 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
1165 BUG_ON(chain->use > 0); 1205 BUG_ON(chain->use > 0);
1166 1206
1167 if (chain->flags & NFT_BASE_CHAIN) { 1207 if (chain->flags & NFT_BASE_CHAIN) {
1168 module_put(nft_base_chain(chain)->type->owner); 1208 struct nft_base_chain *basechain = nft_base_chain(chain);
1169 free_percpu(nft_base_chain(chain)->stats); 1209
1170 kfree(nft_base_chain(chain)); 1210 module_put(basechain->type->owner);
1211 free_percpu(basechain->stats);
1212 if (basechain->ops[0].dev != NULL)
1213 dev_put(basechain->ops[0].dev);
1214 kfree(basechain);
1171 } else { 1215 } else {
1172 kfree(chain); 1216 kfree(chain);
1173 } 1217 }
@@ -1186,6 +1230,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
1186 struct nlattr *ha[NFTA_HOOK_MAX + 1]; 1230 struct nlattr *ha[NFTA_HOOK_MAX + 1];
1187 struct net *net = sock_net(skb->sk); 1231 struct net *net = sock_net(skb->sk);
1188 int family = nfmsg->nfgen_family; 1232 int family = nfmsg->nfgen_family;
1233 struct net_device *dev = NULL;
1189 u8 policy = NF_ACCEPT; 1234 u8 policy = NF_ACCEPT;
1190 u64 handle = 0; 1235 u64 handle = 0;
1191 unsigned int i; 1236 unsigned int i;
@@ -1325,17 +1370,43 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
1325 return -ENOENT; 1370 return -ENOENT;
1326 hookfn = type->hooks[hooknum]; 1371 hookfn = type->hooks[hooknum];
1327 1372
1373 if (afi->flags & NFT_AF_NEEDS_DEV) {
1374 char ifname[IFNAMSIZ];
1375
1376 if (!ha[NFTA_HOOK_DEV]) {
1377 module_put(type->owner);
1378 return -EOPNOTSUPP;
1379 }
1380
1381 nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
1382 dev = dev_get_by_name(net, ifname);
1383 if (!dev) {
1384 module_put(type->owner);
1385 return -ENOENT;
1386 }
1387 } else if (ha[NFTA_HOOK_DEV]) {
1388 module_put(type->owner);
1389 return -EOPNOTSUPP;
1390 }
1391
1328 basechain = kzalloc(sizeof(*basechain), GFP_KERNEL); 1392 basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
1329 if (basechain == NULL) { 1393 if (basechain == NULL) {
1330 module_put(type->owner); 1394 module_put(type->owner);
1395 if (dev != NULL)
1396 dev_put(dev);
1331 return -ENOMEM; 1397 return -ENOMEM;
1332 } 1398 }
1333 1399
1400 if (dev != NULL)
1401 strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
1402
1334 if (nla[NFTA_CHAIN_COUNTERS]) { 1403 if (nla[NFTA_CHAIN_COUNTERS]) {
1335 stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); 1404 stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
1336 if (IS_ERR(stats)) { 1405 if (IS_ERR(stats)) {
1337 module_put(type->owner); 1406 module_put(type->owner);
1338 kfree(basechain); 1407 kfree(basechain);
1408 if (dev != NULL)
1409 dev_put(dev);
1339 return PTR_ERR(stats); 1410 return PTR_ERR(stats);
1340 } 1411 }
1341 basechain->stats = stats; 1412 basechain->stats = stats;
@@ -1344,6 +1415,8 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
1344 if (stats == NULL) { 1415 if (stats == NULL) {
1345 module_put(type->owner); 1416 module_put(type->owner);
1346 kfree(basechain); 1417 kfree(basechain);
1418 if (dev != NULL)
1419 dev_put(dev);
1347 return -ENOMEM; 1420 return -ENOMEM;
1348 } 1421 }
1349 rcu_assign_pointer(basechain->stats, stats); 1422 rcu_assign_pointer(basechain->stats, stats);
@@ -1361,6 +1434,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
1361 ops->priority = priority; 1434 ops->priority = priority;
1362 ops->priv = chain; 1435 ops->priv = chain;
1363 ops->hook = afi->hooks[ops->hooknum]; 1436 ops->hook = afi->hooks[ops->hooknum];
1437 ops->dev = dev;
1364 if (hookfn) 1438 if (hookfn)
1365 ops->hook = hookfn; 1439 ops->hook = hookfn;
1366 if (afi->hook_ops_init) 1440 if (afi->hook_ops_init)
@@ -1380,12 +1454,9 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
1380 chain->table = table; 1454 chain->table = table;
1381 nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); 1455 nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
1382 1456
1383 if (!(table->flags & NFT_TABLE_F_DORMANT) && 1457 err = nf_tables_register_hooks(table, chain, afi->nops);
1384 chain->flags & NFT_BASE_CHAIN) { 1458 if (err < 0)
1385 err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops); 1459 goto err1;
1386 if (err < 0)
1387 goto err1;
1388 }
1389 1460
1390 nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); 1461 nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
1391 err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN); 1462 err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index f153b07073af..f77bad46ac68 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -114,7 +114,8 @@ unsigned int
114nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) 114nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
115{ 115{
116 const struct nft_chain *chain = ops->priv, *basechain = chain; 116 const struct nft_chain *chain = ops->priv, *basechain = chain;
117 const struct net *net = read_pnet(&nft_base_chain(basechain)->pnet); 117 const struct net *chain_net = read_pnet(&nft_base_chain(basechain)->pnet);
118 const struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
118 const struct nft_rule *rule; 119 const struct nft_rule *rule;
119 const struct nft_expr *expr, *last; 120 const struct nft_expr *expr, *last;
120 struct nft_regs regs; 121 struct nft_regs regs;
@@ -124,6 +125,10 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
124 int rulenum; 125 int rulenum;
125 unsigned int gencursor = nft_genmask_cur(net); 126 unsigned int gencursor = nft_genmask_cur(net);
126 127
128 /* Ignore chains that are not for the current network namespace */
129 if (!net_eq(net, chain_net))
130 return NF_ACCEPT;
131
127do_chain: 132do_chain:
128 rulenum = 0; 133 rulenum = 0;
129 rule = list_entry(&chain->rules, struct nft_rule, list); 134 rule = list_entry(&chain->rules, struct nft_rule, list);
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
new file mode 100644
index 000000000000..2cae4d4a03b7
--- /dev/null
+++ b/net/netfilter/nf_tables_netdev.c
@@ -0,0 +1,258 @@
1/*
2 * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/init.h>
10#include <linux/module.h>
11#include <linux/netdevice.h>
12#include <net/netfilter/nf_tables.h>
13#include <linux/ip.h>
14#include <linux/ipv6.h>
15#include <net/netfilter/nf_tables_ipv4.h>
16#include <net/netfilter/nf_tables_ipv6.h>
17
18static inline void
19nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
20 const struct nf_hook_ops *ops, struct sk_buff *skb,
21 const struct nf_hook_state *state)
22{
23 struct iphdr *iph, _iph;
24 u32 len, thoff;
25
26 nft_set_pktinfo(pkt, ops, skb, state);
27
28 iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph),
29 &_iph);
30 if (!iph)
31 return;
32
33 iph = ip_hdr(skb);
34 if (iph->ihl < 5 || iph->version != 4)
35 return;
36
37 len = ntohs(iph->tot_len);
38 thoff = iph->ihl * 4;
39 if (skb->len < len)
40 return;
41 else if (len < thoff)
42 return;
43
44 pkt->tprot = iph->protocol;
45 pkt->xt.thoff = thoff;
46 pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET;
47}
48
49static inline void
50__nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
51 const struct nf_hook_ops *ops,
52 struct sk_buff *skb,
53 const struct nf_hook_state *state)
54{
55#if IS_ENABLED(CONFIG_IPV6)
56 struct ipv6hdr *ip6h, _ip6h;
57 unsigned int thoff = 0;
58 unsigned short frag_off;
59 int protohdr;
60 u32 pkt_len;
61
62 ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h),
63 &_ip6h);
64 if (!ip6h)
65 return;
66
67 if (ip6h->version != 6)
68 return;
69
70 pkt_len = ntohs(ip6h->payload_len);
71 if (pkt_len + sizeof(*ip6h) > skb->len)
72 return;
73
74 protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
75 if (protohdr < 0)
76 return;
77
78 pkt->tprot = protohdr;
79 pkt->xt.thoff = thoff;
80 pkt->xt.fragoff = frag_off;
81#endif
82}
83
84static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
85 const struct nf_hook_ops *ops,
86 struct sk_buff *skb,
87 const struct nf_hook_state *state)
88{
89 nft_set_pktinfo(pkt, ops, skb, state);
90 __nft_netdev_set_pktinfo_ipv6(pkt, ops, skb, state);
91}
92
93static unsigned int
94nft_do_chain_netdev(const struct nf_hook_ops *ops, struct sk_buff *skb,
95 const struct nf_hook_state *state)
96{
97 struct nft_pktinfo pkt;
98
99 switch (eth_hdr(skb)->h_proto) {
100 case htons(ETH_P_IP):
101 nft_netdev_set_pktinfo_ipv4(&pkt, ops, skb, state);
102 break;
103 case htons(ETH_P_IPV6):
104 nft_netdev_set_pktinfo_ipv6(&pkt, ops, skb, state);
105 break;
106 default:
107 nft_set_pktinfo(&pkt, ops, skb, state);
108 break;
109 }
110
111 return nft_do_chain(&pkt, ops);
112}
113
114static struct nft_af_info nft_af_netdev __read_mostly = {
115 .family = NFPROTO_NETDEV,
116 .nhooks = NF_NETDEV_NUMHOOKS,
117 .owner = THIS_MODULE,
118 .flags = NFT_AF_NEEDS_DEV,
119 .nops = 1,
120 .hooks = {
121 [NF_NETDEV_INGRESS] = nft_do_chain_netdev,
122 },
123};
124
125static int nf_tables_netdev_init_net(struct net *net)
126{
127 net->nft.netdev = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
128 if (net->nft.netdev == NULL)
129 return -ENOMEM;
130
131 memcpy(net->nft.netdev, &nft_af_netdev, sizeof(nft_af_netdev));
132
133 if (nft_register_afinfo(net, net->nft.netdev) < 0)
134 goto err;
135
136 return 0;
137err:
138 kfree(net->nft.netdev);
139 return -ENOMEM;
140}
141
142static void nf_tables_netdev_exit_net(struct net *net)
143{
144 nft_unregister_afinfo(net->nft.netdev);
145 kfree(net->nft.netdev);
146}
147
148static struct pernet_operations nf_tables_netdev_net_ops = {
149 .init = nf_tables_netdev_init_net,
150 .exit = nf_tables_netdev_exit_net,
151};
152
153static const struct nf_chain_type nft_filter_chain_netdev = {
154 .name = "filter",
155 .type = NFT_CHAIN_T_DEFAULT,
156 .family = NFPROTO_NETDEV,
157 .owner = THIS_MODULE,
158 .hook_mask = (1 << NF_NETDEV_INGRESS),
159};
160
161static void nft_netdev_event(unsigned long event, struct nft_af_info *afi,
162 struct net_device *dev, struct nft_table *table,
163 struct nft_base_chain *basechain)
164{
165 switch (event) {
166 case NETDEV_REGISTER:
167 if (strcmp(basechain->dev_name, dev->name) != 0)
168 return;
169
170 BUG_ON(!(basechain->flags & NFT_BASECHAIN_DISABLED));
171
172 dev_hold(dev);
173 basechain->ops[0].dev = dev;
174 basechain->flags &= ~NFT_BASECHAIN_DISABLED;
175 if (!(table->flags & NFT_TABLE_F_DORMANT))
176 nft_register_basechain(basechain, afi->nops);
177 break;
178 case NETDEV_UNREGISTER:
179 if (strcmp(basechain->dev_name, dev->name) != 0)
180 return;
181
182 BUG_ON(basechain->flags & NFT_BASECHAIN_DISABLED);
183
184 if (!(table->flags & NFT_TABLE_F_DORMANT))
185 nft_unregister_basechain(basechain, afi->nops);
186
187 dev_put(basechain->ops[0].dev);
188 basechain->ops[0].dev = NULL;
189 basechain->flags |= NFT_BASECHAIN_DISABLED;
190 break;
191 case NETDEV_CHANGENAME:
192 if (dev->ifindex != basechain->ops[0].dev->ifindex)
193 return;
194
195 strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
196 break;
197 }
198}
199
200static int nf_tables_netdev_event(struct notifier_block *this,
201 unsigned long event, void *ptr)
202{
203 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
204 struct nft_af_info *afi;
205 struct nft_table *table;
206 struct nft_chain *chain;
207
208 nfnl_lock(NFNL_SUBSYS_NFTABLES);
209 list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) {
210 if (afi->family != NFPROTO_NETDEV)
211 continue;
212
213 list_for_each_entry(table, &afi->tables, list) {
214 list_for_each_entry(chain, &table->chains, list) {
215 if (!(chain->flags & NFT_BASE_CHAIN))
216 continue;
217
218 nft_netdev_event(event, afi, dev, table,
219 nft_base_chain(chain));
220 }
221 }
222 }
223 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
224
225 return NOTIFY_DONE;
226}
227
228static struct notifier_block nf_tables_netdev_notifier = {
229 .notifier_call = nf_tables_netdev_event,
230};
231
232static int __init nf_tables_netdev_init(void)
233{
234 int ret;
235
236 nft_register_chain_type(&nft_filter_chain_netdev);
237 ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
238 if (ret < 0)
239 nft_unregister_chain_type(&nft_filter_chain_netdev);
240
241 register_netdevice_notifier(&nf_tables_netdev_notifier);
242
243 return ret;
244}
245
246static void __exit nf_tables_netdev_exit(void)
247{
248 unregister_netdevice_notifier(&nf_tables_netdev_notifier);
249 unregister_pernet_subsys(&nf_tables_netdev_net_ops);
250 nft_unregister_chain_type(&nft_filter_chain_netdev);
251}
252
253module_init(nf_tables_netdev_init);
254module_exit(nf_tables_netdev_exit);
255
256MODULE_LICENSE("GPL");
257MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
258MODULE_ALIAS_NFT_FAMILY(5); /* NFPROTO_NETDEV */
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 8b117c90ecd7..0c0e8ecf02ab 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -269,6 +269,12 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb)
269 } 269 }
270} 270}
271 271
272enum {
273 NFNL_BATCH_FAILURE = (1 << 0),
274 NFNL_BATCH_DONE = (1 << 1),
275 NFNL_BATCH_REPLAY = (1 << 2),
276};
277
272static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, 278static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
273 u_int16_t subsys_id) 279 u_int16_t subsys_id)
274{ 280{
@@ -276,13 +282,15 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
276 struct net *net = sock_net(skb->sk); 282 struct net *net = sock_net(skb->sk);
277 const struct nfnetlink_subsystem *ss; 283 const struct nfnetlink_subsystem *ss;
278 const struct nfnl_callback *nc; 284 const struct nfnl_callback *nc;
279 bool success = true, done = false;
280 static LIST_HEAD(err_list); 285 static LIST_HEAD(err_list);
286 u32 status;
281 int err; 287 int err;
282 288
283 if (subsys_id >= NFNL_SUBSYS_COUNT) 289 if (subsys_id >= NFNL_SUBSYS_COUNT)
284 return netlink_ack(skb, nlh, -EINVAL); 290 return netlink_ack(skb, nlh, -EINVAL);
285replay: 291replay:
292 status = 0;
293
286 skb = netlink_skb_clone(oskb, GFP_KERNEL); 294 skb = netlink_skb_clone(oskb, GFP_KERNEL);
287 if (!skb) 295 if (!skb)
288 return netlink_ack(oskb, nlh, -ENOMEM); 296 return netlink_ack(oskb, nlh, -ENOMEM);
@@ -336,10 +344,10 @@ replay:
336 if (type == NFNL_MSG_BATCH_BEGIN) { 344 if (type == NFNL_MSG_BATCH_BEGIN) {
337 /* Malformed: Batch begin twice */ 345 /* Malformed: Batch begin twice */
338 nfnl_err_reset(&err_list); 346 nfnl_err_reset(&err_list);
339 success = false; 347 status |= NFNL_BATCH_FAILURE;
340 goto done; 348 goto done;
341 } else if (type == NFNL_MSG_BATCH_END) { 349 } else if (type == NFNL_MSG_BATCH_END) {
342 done = true; 350 status |= NFNL_BATCH_DONE;
343 goto done; 351 goto done;
344 } else if (type < NLMSG_MIN_TYPE) { 352 } else if (type < NLMSG_MIN_TYPE) {
345 err = -EINVAL; 353 err = -EINVAL;
@@ -382,11 +390,8 @@ replay:
382 * original skb. 390 * original skb.
383 */ 391 */
384 if (err == -EAGAIN) { 392 if (err == -EAGAIN) {
385 nfnl_err_reset(&err_list); 393 status |= NFNL_BATCH_REPLAY;
386 ss->abort(oskb); 394 goto next;
387 nfnl_unlock(subsys_id);
388 kfree_skb(skb);
389 goto replay;
390 } 395 }
391 } 396 }
392ack: 397ack:
@@ -402,7 +407,7 @@ ack:
402 */ 407 */
403 nfnl_err_reset(&err_list); 408 nfnl_err_reset(&err_list);
404 netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM); 409 netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM);
405 success = false; 410 status |= NFNL_BATCH_FAILURE;
406 goto done; 411 goto done;
407 } 412 }
408 /* We don't stop processing the batch on errors, thus, 413 /* We don't stop processing the batch on errors, thus,
@@ -410,19 +415,26 @@ ack:
410 * triggers. 415 * triggers.
411 */ 416 */
412 if (err) 417 if (err)
413 success = false; 418 status |= NFNL_BATCH_FAILURE;
414 } 419 }
415 420next:
416 msglen = NLMSG_ALIGN(nlh->nlmsg_len); 421 msglen = NLMSG_ALIGN(nlh->nlmsg_len);
417 if (msglen > skb->len) 422 if (msglen > skb->len)
418 msglen = skb->len; 423 msglen = skb->len;
419 skb_pull(skb, msglen); 424 skb_pull(skb, msglen);
420 } 425 }
421done: 426done:
422 if (success && done) 427 if (status & NFNL_BATCH_REPLAY) {
428 ss->abort(oskb);
429 nfnl_err_reset(&err_list);
430 nfnl_unlock(subsys_id);
431 kfree_skb(skb);
432 goto replay;
433 } else if (status == NFNL_BATCH_DONE) {
423 ss->commit(oskb); 434 ss->commit(oskb);
424 else 435 } else {
425 ss->abort(oskb); 436 ss->abort(oskb);
437 }
426 438
427 nfnl_err_deliver(&err_list, oskb); 439 nfnl_err_deliver(&err_list, oskb);
428 nfnl_unlock(subsys_id); 440 nfnl_unlock(subsys_id);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 4ef1fae8445e..4670821b569d 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -598,8 +598,6 @@ nla_put_failure:
598 return -1; 598 return -1;
599} 599}
600 600
601#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
602
603static struct nf_loginfo default_loginfo = { 601static struct nf_loginfo default_loginfo = {
604 .type = NF_LOG_TYPE_ULOG, 602 .type = NF_LOG_TYPE_ULOG,
605 .u = { 603 .u = {
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 11c7682fa0ea..685cc6a17163 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -278,6 +278,23 @@ nla_put_failure:
278 return -1; 278 return -1;
279} 279}
280 280
281static u32 nfqnl_get_sk_secctx(struct sk_buff *skb, char **secdata)
282{
283 u32 seclen = 0;
284#if IS_ENABLED(CONFIG_NETWORK_SECMARK)
285 if (!skb || !sk_fullsock(skb->sk))
286 return 0;
287
288 read_lock_bh(&skb->sk->sk_callback_lock);
289
290 if (skb->secmark)
291 security_secid_to_secctx(skb->secmark, secdata, &seclen);
292
293 read_unlock_bh(&skb->sk->sk_callback_lock);
294#endif
295 return seclen;
296}
297
281static struct sk_buff * 298static struct sk_buff *
282nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, 299nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
283 struct nf_queue_entry *entry, 300 struct nf_queue_entry *entry,
@@ -297,6 +314,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
297 struct nf_conn *ct = NULL; 314 struct nf_conn *ct = NULL;
298 enum ip_conntrack_info uninitialized_var(ctinfo); 315 enum ip_conntrack_info uninitialized_var(ctinfo);
299 bool csum_verify; 316 bool csum_verify;
317 char *secdata = NULL;
318 u32 seclen = 0;
300 319
301 size = nlmsg_total_size(sizeof(struct nfgenmsg)) 320 size = nlmsg_total_size(sizeof(struct nfgenmsg))
302 + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) 321 + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
@@ -352,6 +371,12 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
352 + nla_total_size(sizeof(u_int32_t))); /* gid */ 371 + nla_total_size(sizeof(u_int32_t))); /* gid */
353 } 372 }
354 373
374 if ((queue->flags & NFQA_CFG_F_SECCTX) && entskb->sk) {
375 seclen = nfqnl_get_sk_secctx(entskb, &secdata);
376 if (seclen)
377 size += nla_total_size(seclen);
378 }
379
355 skb = nfnetlink_alloc_skb(net, size, queue->peer_portid, 380 skb = nfnetlink_alloc_skb(net, size, queue->peer_portid,
356 GFP_ATOMIC); 381 GFP_ATOMIC);
357 if (!skb) { 382 if (!skb) {
@@ -479,6 +504,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
479 nfqnl_put_sk_uidgid(skb, entskb->sk) < 0) 504 nfqnl_put_sk_uidgid(skb, entskb->sk) < 0)
480 goto nla_put_failure; 505 goto nla_put_failure;
481 506
507 if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata))
508 goto nla_put_failure;
509
482 if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) 510 if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
483 goto nla_put_failure; 511 goto nla_put_failure;
484 512
@@ -806,8 +834,6 @@ nfqnl_dev_drop(struct net *net, int ifindex)
806 rcu_read_unlock(); 834 rcu_read_unlock();
807} 835}
808 836
809#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
810
811static int 837static int
812nfqnl_rcv_dev_event(struct notifier_block *this, 838nfqnl_rcv_dev_event(struct notifier_block *this,
813 unsigned long event, void *ptr) 839 unsigned long event, void *ptr)
@@ -824,6 +850,27 @@ static struct notifier_block nfqnl_dev_notifier = {
824 .notifier_call = nfqnl_rcv_dev_event, 850 .notifier_call = nfqnl_rcv_dev_event,
825}; 851};
826 852
853static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long ops_ptr)
854{
855 return entry->elem == (struct nf_hook_ops *)ops_ptr;
856}
857
858static void nfqnl_nf_hook_drop(struct net *net, struct nf_hook_ops *hook)
859{
860 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
861 int i;
862
863 rcu_read_lock();
864 for (i = 0; i < INSTANCE_BUCKETS; i++) {
865 struct nfqnl_instance *inst;
866 struct hlist_head *head = &q->instance_table[i];
867
868 hlist_for_each_entry_rcu(inst, head, hlist)
869 nfqnl_flush(inst, nf_hook_cmp, (unsigned long)hook);
870 }
871 rcu_read_unlock();
872}
873
827static int 874static int
828nfqnl_rcv_nl_event(struct notifier_block *this, 875nfqnl_rcv_nl_event(struct notifier_block *this,
829 unsigned long event, void *ptr) 876 unsigned long event, void *ptr)
@@ -1031,7 +1078,8 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
1031}; 1078};
1032 1079
1033static const struct nf_queue_handler nfqh = { 1080static const struct nf_queue_handler nfqh = {
1034 .outfn = &nfqnl_enqueue_packet, 1081 .outfn = &nfqnl_enqueue_packet,
1082 .nf_hook_drop = &nfqnl_nf_hook_drop,
1035}; 1083};
1036 1084
1037static int 1085static int
@@ -1142,7 +1190,12 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
1142 ret = -EOPNOTSUPP; 1190 ret = -EOPNOTSUPP;
1143 goto err_out_unlock; 1191 goto err_out_unlock;
1144 } 1192 }
1145 1193#if !IS_ENABLED(CONFIG_NETWORK_SECMARK)
1194 if (flags & mask & NFQA_CFG_F_SECCTX) {
1195 ret = -EOPNOTSUPP;
1196 goto err_out_unlock;
1197 }
1198#endif
1146 spin_lock_bh(&queue->lock); 1199 spin_lock_bh(&queue->lock);
1147 queue->flags &= ~mask; 1200 queue->flags &= ~mask;
1148 queue->flags |= flags & mask; 1201 queue->flags |= flags & mask;
@@ -1257,7 +1310,7 @@ static int seq_show(struct seq_file *s, void *v)
1257 inst->copy_mode, inst->copy_range, 1310 inst->copy_mode, inst->copy_range,
1258 inst->queue_dropped, inst->queue_user_dropped, 1311 inst->queue_dropped, inst->queue_user_dropped,
1259 inst->id_sequence, 1); 1312 inst->id_sequence, 1);
1260 return seq_has_overflowed(s); 1313 return 0;
1261} 1314}
1262 1315
1263static const struct seq_operations nfqnl_seq_ops = { 1316static const struct seq_operations nfqnl_seq_ops = {
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 7f29cfc76349..66def315eb56 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -161,6 +161,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
161 par->hook_mask = 0; 161 par->hook_mask = 0;
162 } 162 }
163 par->family = ctx->afi->family; 163 par->family = ctx->afi->family;
164 par->nft_compat = true;
164} 165}
165 166
166static void target_compat_from_user(struct xt_target *t, void *in, void *out) 167static void target_compat_from_user(struct xt_target *t, void *in, void *out)
@@ -377,6 +378,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
377 par->hook_mask = 0; 378 par->hook_mask = 0;
378 } 379 }
379 par->family = ctx->afi->family; 380 par->family = ctx->afi->family;
381 par->nft_compat = true;
380} 382}
381 383
382static void match_compat_from_user(struct xt_match *m, void *in, void *out) 384static void match_compat_from_user(struct xt_match *m, void *in, void *out)
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 51a459c3c649..d324fe71260c 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -658,35 +658,23 @@ EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
658 658
659struct xt_table_info *xt_alloc_table_info(unsigned int size) 659struct xt_table_info *xt_alloc_table_info(unsigned int size)
660{ 660{
661 struct xt_table_info *newinfo; 661 struct xt_table_info *info = NULL;
662 int cpu; 662 size_t sz = sizeof(*info) + size;
663 663
664 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ 664 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
665 if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages) 665 if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
666 return NULL; 666 return NULL;
667 667
668 newinfo = kzalloc(XT_TABLE_INFO_SZ, GFP_KERNEL); 668 if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
669 if (!newinfo) 669 info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
670 return NULL; 670 if (!info) {
671 671 info = vmalloc(sz);
672 newinfo->size = size; 672 if (!info)
673
674 for_each_possible_cpu(cpu) {
675 if (size <= PAGE_SIZE)
676 newinfo->entries[cpu] = kmalloc_node(size,
677 GFP_KERNEL,
678 cpu_to_node(cpu));
679 else
680 newinfo->entries[cpu] = vmalloc_node(size,
681 cpu_to_node(cpu));
682
683 if (newinfo->entries[cpu] == NULL) {
684 xt_free_table_info(newinfo);
685 return NULL; 673 return NULL;
686 }
687 } 674 }
688 675 memset(info, 0, sizeof(*info));
689 return newinfo; 676 info->size = size;
677 return info;
690} 678}
691EXPORT_SYMBOL(xt_alloc_table_info); 679EXPORT_SYMBOL(xt_alloc_table_info);
692 680
@@ -694,9 +682,6 @@ void xt_free_table_info(struct xt_table_info *info)
694{ 682{
695 int cpu; 683 int cpu;
696 684
697 for_each_possible_cpu(cpu)
698 kvfree(info->entries[cpu]);
699
700 if (info->jumpstack != NULL) { 685 if (info->jumpstack != NULL) {
701 for_each_possible_cpu(cpu) 686 for_each_possible_cpu(cpu)
702 kvfree(info->jumpstack[cpu]); 687 kvfree(info->jumpstack[cpu]);
@@ -705,7 +690,7 @@ void xt_free_table_info(struct xt_table_info *info)
705 690
706 free_percpu(info->stackptr); 691 free_percpu(info->stackptr);
707 692
708 kfree(info); 693 kvfree(info);
709} 694}
710EXPORT_SYMBOL(xt_free_table_info); 695EXPORT_SYMBOL(xt_free_table_info);
711 696
@@ -947,11 +932,9 @@ static int xt_table_seq_show(struct seq_file *seq, void *v)
947{ 932{
948 struct xt_table *table = list_entry(v, struct xt_table, list); 933 struct xt_table *table = list_entry(v, struct xt_table, list);
949 934
950 if (strlen(table->name)) { 935 if (*table->name)
951 seq_printf(seq, "%s\n", table->name); 936 seq_printf(seq, "%s\n", table->name);
952 return seq_has_overflowed(seq); 937 return 0;
953 } else
954 return 0;
955} 938}
956 939
957static const struct seq_operations xt_table_seq_ops = { 940static const struct seq_operations xt_table_seq_ops = {
@@ -1087,10 +1070,8 @@ static int xt_match_seq_show(struct seq_file *seq, void *v)
1087 if (trav->curr == trav->head) 1070 if (trav->curr == trav->head)
1088 return 0; 1071 return 0;
1089 match = list_entry(trav->curr, struct xt_match, list); 1072 match = list_entry(trav->curr, struct xt_match, list);
1090 if (*match->name == '\0') 1073 if (*match->name)
1091 return 0; 1074 seq_printf(seq, "%s\n", match->name);
1092 seq_printf(seq, "%s\n", match->name);
1093 return seq_has_overflowed(seq);
1094 } 1075 }
1095 return 0; 1076 return 0;
1096} 1077}
@@ -1142,10 +1123,8 @@ static int xt_target_seq_show(struct seq_file *seq, void *v)
1142 if (trav->curr == trav->head) 1123 if (trav->curr == trav->head)
1143 return 0; 1124 return 0;
1144 target = list_entry(trav->curr, struct xt_target, list); 1125 target = list_entry(trav->curr, struct xt_target, list);
1145 if (*target->name == '\0') 1126 if (*target->name)
1146 return 0; 1127 seq_printf(seq, "%s\n", target->name);
1147 seq_printf(seq, "%s\n", target->name);
1148 return seq_has_overflowed(seq);
1149 } 1128 }
1150 return 0; 1129 return 0;
1151} 1130}
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index e762de5ee89b..8c3190e2fc6a 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -277,6 +277,9 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
277 "FORWARD, OUTPUT and POSTROUTING hooks\n"); 277 "FORWARD, OUTPUT and POSTROUTING hooks\n");
278 return -EINVAL; 278 return -EINVAL;
279 } 279 }
280 if (par->nft_compat)
281 return 0;
282
280 xt_ematch_foreach(ematch, e) 283 xt_ematch_foreach(ematch, e)
281 if (find_syn_match(ematch)) 284 if (find_syn_match(ematch))
282 return 0; 285 return 0;
@@ -299,6 +302,9 @@ static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
299 "FORWARD, OUTPUT and POSTROUTING hooks\n"); 302 "FORWARD, OUTPUT and POSTROUTING hooks\n");
300 return -EINVAL; 303 return -EINVAL;
301 } 304 }
305 if (par->nft_compat)
306 return 0;
307
302 xt_ematch_foreach(ematch, e) 308 xt_ematch_foreach(ematch, e)
303 if (find_syn_match(ematch)) 309 if (find_syn_match(ematch))
304 return 0; 310 return 0;
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 292934d23482..a747eb475b68 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -152,6 +152,7 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
152 fl6.daddr = info->gw.in6; 152 fl6.daddr = info->gw.in6;
153 fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | 153 fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
154 (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; 154 (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
155 fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
155 dst = ip6_route_output(net, NULL, &fl6); 156 dst = ip6_route_output(net, NULL, &fl6);
156 if (dst->error) { 157 if (dst->error) {
157 dst_release(dst); 158 dst_release(dst);
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index fab6eea1bf38..5b4743cc0436 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -73,7 +73,7 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
73 73
74 if (dev == NULL && rt->rt6i_flags & RTF_LOCAL) 74 if (dev == NULL && rt->rt6i_flags & RTF_LOCAL)
75 ret |= XT_ADDRTYPE_LOCAL; 75 ret |= XT_ADDRTYPE_LOCAL;
76 if (rt->rt6i_flags & RTF_ANYCAST) 76 if (ipv6_anycast_destination((struct dst_entry *)rt, addr))
77 ret |= XT_ADDRTYPE_ANYCAST; 77 ret |= XT_ADDRTYPE_ANYCAST;
78 78
79 dst_release(&rt->dst); 79 dst_release(&rt->dst);
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 23345238711b..ebd41dc501e5 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -23,6 +23,7 @@ MODULE_ALIAS("ipt_mark");
23MODULE_ALIAS("ip6t_mark"); 23MODULE_ALIAS("ip6t_mark");
24MODULE_ALIAS("ipt_MARK"); 24MODULE_ALIAS("ipt_MARK");
25MODULE_ALIAS("ip6t_MARK"); 25MODULE_ALIAS("ip6t_MARK");
26MODULE_ALIAS("arpt_MARK");
26 27
27static unsigned int 28static unsigned int
28mark_tg(struct sk_buff *skb, const struct xt_action_param *par) 29mark_tg(struct sk_buff *skb, const struct xt_action_param *par)
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 89045982ec94..5669e5b453f4 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -9,14 +9,16 @@
9 */ 9 */
10 10
11/* Kernel module which implements the set match and SET target 11/* Kernel module which implements the set match and SET target
12 * for netfilter/iptables. */ 12 * for netfilter/iptables.
13 */
13 14
14#include <linux/module.h> 15#include <linux/module.h>
15#include <linux/skbuff.h> 16#include <linux/skbuff.h>
16 17
17#include <linux/netfilter/x_tables.h> 18#include <linux/netfilter/x_tables.h>
18#include <linux/netfilter/xt_set.h> 19#include <linux/netfilter/ipset/ip_set.h>
19#include <linux/netfilter/ipset/ip_set_timeout.h> 20#include <linux/netfilter/ipset/ip_set_timeout.h>
21#include <uapi/linux/netfilter/xt_set.h>
20 22
21MODULE_LICENSE("GPL"); 23MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); 24MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -52,6 +54,7 @@ static bool
52set_match_v0(const struct sk_buff *skb, struct xt_action_param *par) 54set_match_v0(const struct sk_buff *skb, struct xt_action_param *par)
53{ 55{
54 const struct xt_set_info_match_v0 *info = par->matchinfo; 56 const struct xt_set_info_match_v0 *info = par->matchinfo;
57
55 ADT_OPT(opt, par->family, info->match_set.u.compat.dim, 58 ADT_OPT(opt, par->family, info->match_set.u.compat.dim,
56 info->match_set.u.compat.flags, 0, UINT_MAX); 59 info->match_set.u.compat.flags, 0, UINT_MAX);
57 60
@@ -68,10 +71,10 @@ compat_flags(struct xt_set_info_v0 *info)
68 info->u.compat.dim = IPSET_DIM_ZERO; 71 info->u.compat.dim = IPSET_DIM_ZERO;
69 if (info->u.flags[0] & IPSET_MATCH_INV) 72 if (info->u.flags[0] & IPSET_MATCH_INV)
70 info->u.compat.flags |= IPSET_INV_MATCH; 73 info->u.compat.flags |= IPSET_INV_MATCH;
71 for (i = 0; i < IPSET_DIM_MAX-1 && info->u.flags[i]; i++) { 74 for (i = 0; i < IPSET_DIM_MAX - 1 && info->u.flags[i]; i++) {
72 info->u.compat.dim++; 75 info->u.compat.dim++;
73 if (info->u.flags[i] & IPSET_SRC) 76 if (info->u.flags[i] & IPSET_SRC)
74 info->u.compat.flags |= (1<<info->u.compat.dim); 77 info->u.compat.flags |= (1 << info->u.compat.dim);
75 } 78 }
76} 79}
77 80
@@ -88,7 +91,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
88 info->match_set.index); 91 info->match_set.index);
89 return -ENOENT; 92 return -ENOENT;
90 } 93 }
91 if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) { 94 if (info->match_set.u.flags[IPSET_DIM_MAX - 1] != 0) {
92 pr_warn("Protocol error: set match dimension is over the limit!\n"); 95 pr_warn("Protocol error: set match dimension is over the limit!\n");
93 ip_set_nfnl_put(par->net, info->match_set.index); 96 ip_set_nfnl_put(par->net, info->match_set.index);
94 return -ERANGE; 97 return -ERANGE;
@@ -114,6 +117,7 @@ static bool
114set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) 117set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
115{ 118{
116 const struct xt_set_info_match_v1 *info = par->matchinfo; 119 const struct xt_set_info_match_v1 *info = par->matchinfo;
120
117 ADT_OPT(opt, par->family, info->match_set.dim, 121 ADT_OPT(opt, par->family, info->match_set.dim,
118 info->match_set.flags, 0, UINT_MAX); 122 info->match_set.flags, 0, UINT_MAX);
119 123
@@ -178,9 +182,10 @@ static bool
178set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) 182set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
179{ 183{
180 const struct xt_set_info_match_v3 *info = par->matchinfo; 184 const struct xt_set_info_match_v3 *info = par->matchinfo;
185 int ret;
186
181 ADT_OPT(opt, par->family, info->match_set.dim, 187 ADT_OPT(opt, par->family, info->match_set.dim,
182 info->match_set.flags, info->flags, UINT_MAX); 188 info->match_set.flags, info->flags, UINT_MAX);
183 int ret;
184 189
185 if (info->packets.op != IPSET_COUNTER_NONE || 190 if (info->packets.op != IPSET_COUNTER_NONE ||
186 info->bytes.op != IPSET_COUNTER_NONE) 191 info->bytes.op != IPSET_COUNTER_NONE)
@@ -224,9 +229,10 @@ static bool
224set_match_v4(const struct sk_buff *skb, struct xt_action_param *par) 229set_match_v4(const struct sk_buff *skb, struct xt_action_param *par)
225{ 230{
226 const struct xt_set_info_match_v4 *info = par->matchinfo; 231 const struct xt_set_info_match_v4 *info = par->matchinfo;
232 int ret;
233
227 ADT_OPT(opt, par->family, info->match_set.dim, 234 ADT_OPT(opt, par->family, info->match_set.dim,
228 info->match_set.flags, info->flags, UINT_MAX); 235 info->match_set.flags, info->flags, UINT_MAX);
229 int ret;
230 236
231 if (info->packets.op != IPSET_COUNTER_NONE || 237 if (info->packets.op != IPSET_COUNTER_NONE ||
232 info->bytes.op != IPSET_COUNTER_NONE) 238 info->bytes.op != IPSET_COUNTER_NONE)
@@ -252,6 +258,7 @@ static unsigned int
252set_target_v0(struct sk_buff *skb, const struct xt_action_param *par) 258set_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
253{ 259{
254 const struct xt_set_info_target_v0 *info = par->targinfo; 260 const struct xt_set_info_target_v0 *info = par->targinfo;
261
255 ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim, 262 ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim,
256 info->add_set.u.compat.flags, 0, UINT_MAX); 263 info->add_set.u.compat.flags, 0, UINT_MAX);
257 ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim, 264 ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim,
@@ -290,8 +297,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
290 return -ENOENT; 297 return -ENOENT;
291 } 298 }
292 } 299 }
293 if (info->add_set.u.flags[IPSET_DIM_MAX-1] != 0 || 300 if (info->add_set.u.flags[IPSET_DIM_MAX - 1] != 0 ||
294 info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) { 301 info->del_set.u.flags[IPSET_DIM_MAX - 1] != 0) {
295 pr_warn("Protocol error: SET target dimension is over the limit!\n"); 302 pr_warn("Protocol error: SET target dimension is over the limit!\n");
296 if (info->add_set.index != IPSET_INVALID_ID) 303 if (info->add_set.index != IPSET_INVALID_ID)
297 ip_set_nfnl_put(par->net, info->add_set.index); 304 ip_set_nfnl_put(par->net, info->add_set.index);
@@ -324,6 +331,7 @@ static unsigned int
324set_target_v1(struct sk_buff *skb, const struct xt_action_param *par) 331set_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
325{ 332{
326 const struct xt_set_info_target_v1 *info = par->targinfo; 333 const struct xt_set_info_target_v1 *info = par->targinfo;
334
327 ADT_OPT(add_opt, par->family, info->add_set.dim, 335 ADT_OPT(add_opt, par->family, info->add_set.dim,
328 info->add_set.flags, 0, UINT_MAX); 336 info->add_set.flags, 0, UINT_MAX);
329 ADT_OPT(del_opt, par->family, info->del_set.dim, 337 ADT_OPT(del_opt, par->family, info->del_set.dim,
@@ -392,6 +400,7 @@ static unsigned int
392set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) 400set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
393{ 401{
394 const struct xt_set_info_target_v2 *info = par->targinfo; 402 const struct xt_set_info_target_v2 *info = par->targinfo;
403
395 ADT_OPT(add_opt, par->family, info->add_set.dim, 404 ADT_OPT(add_opt, par->family, info->add_set.dim,
396 info->add_set.flags, info->flags, info->timeout); 405 info->add_set.flags, info->flags, info->timeout);
397 ADT_OPT(del_opt, par->family, info->del_set.dim, 406 ADT_OPT(del_opt, par->family, info->del_set.dim,
@@ -399,8 +408,8 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
399 408
400 /* Normalize to fit into jiffies */ 409 /* Normalize to fit into jiffies */
401 if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && 410 if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
402 add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC) 411 add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC)
403 add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC; 412 add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC;
404 if (info->add_set.index != IPSET_INVALID_ID) 413 if (info->add_set.index != IPSET_INVALID_ID)
405 ip_set_add(info->add_set.index, skb, par, &add_opt); 414 ip_set_add(info->add_set.index, skb, par, &add_opt);
406 if (info->del_set.index != IPSET_INVALID_ID) 415 if (info->del_set.index != IPSET_INVALID_ID)
@@ -418,6 +427,8 @@ static unsigned int
418set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) 427set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
419{ 428{
420 const struct xt_set_info_target_v3 *info = par->targinfo; 429 const struct xt_set_info_target_v3 *info = par->targinfo;
430 int ret;
431
421 ADT_OPT(add_opt, par->family, info->add_set.dim, 432 ADT_OPT(add_opt, par->family, info->add_set.dim,
422 info->add_set.flags, info->flags, info->timeout); 433 info->add_set.flags, info->flags, info->timeout);
423 ADT_OPT(del_opt, par->family, info->del_set.dim, 434 ADT_OPT(del_opt, par->family, info->del_set.dim,
@@ -425,12 +436,10 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
425 ADT_OPT(map_opt, par->family, info->map_set.dim, 436 ADT_OPT(map_opt, par->family, info->map_set.dim,
426 info->map_set.flags, 0, UINT_MAX); 437 info->map_set.flags, 0, UINT_MAX);
427 438
428 int ret;
429
430 /* Normalize to fit into jiffies */ 439 /* Normalize to fit into jiffies */
431 if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && 440 if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
432 add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC) 441 add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC)
433 add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC; 442 add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC;
434 if (info->add_set.index != IPSET_INVALID_ID) 443 if (info->add_set.index != IPSET_INVALID_ID)
435 ip_set_add(info->add_set.index, skb, par, &add_opt); 444 ip_set_add(info->add_set.index, skb, par, &add_opt);
436 if (info->del_set.index != IPSET_INVALID_ID) 445 if (info->del_set.index != IPSET_INVALID_ID)
@@ -456,7 +465,6 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
456 return XT_CONTINUE; 465 return XT_CONTINUE;
457} 466}
458 467
459
460static int 468static int
461set_target_v3_checkentry(const struct xt_tgchk_param *par) 469set_target_v3_checkentry(const struct xt_tgchk_param *par)
462{ 470{
@@ -496,8 +504,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
496 !(par->hook_mask & (1 << NF_INET_FORWARD | 504 !(par->hook_mask & (1 << NF_INET_FORWARD |
497 1 << NF_INET_LOCAL_OUT | 505 1 << NF_INET_LOCAL_OUT |
498 1 << NF_INET_POST_ROUTING))) { 506 1 << NF_INET_POST_ROUTING))) {
499 pr_warn("mapping of prio or/and queue is allowed only" 507 pr_warn("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n");
500 "from OUTPUT/FORWARD/POSTROUTING chains\n");
501 return -EINVAL; 508 return -EINVAL;
502 } 509 }
503 index = ip_set_nfnl_get_byindex(par->net, 510 index = ip_set_nfnl_get_byindex(par->net,
@@ -518,8 +525,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
518 if (info->add_set.dim > IPSET_DIM_MAX || 525 if (info->add_set.dim > IPSET_DIM_MAX ||
519 info->del_set.dim > IPSET_DIM_MAX || 526 info->del_set.dim > IPSET_DIM_MAX ||
520 info->map_set.dim > IPSET_DIM_MAX) { 527 info->map_set.dim > IPSET_DIM_MAX) {
521 pr_warn("Protocol error: SET target dimension " 528 pr_warn("Protocol error: SET target dimension is over the limit!\n");
522 "is over the limit!\n");
523 if (info->add_set.index != IPSET_INVALID_ID) 529 if (info->add_set.index != IPSET_INVALID_ID)
524 ip_set_nfnl_put(par->net, info->add_set.index); 530 ip_set_nfnl_put(par->net, info->add_set.index);
525 if (info->del_set.index != IPSET_INVALID_ID) 531 if (info->del_set.index != IPSET_INVALID_ID)
@@ -545,7 +551,6 @@ set_target_v3_destroy(const struct xt_tgdtor_param *par)
545 ip_set_nfnl_put(par->net, info->map_set.index); 551 ip_set_nfnl_put(par->net, info->map_set.index);
546} 552}
547 553
548
549static struct xt_match set_matches[] __read_mostly = { 554static struct xt_match set_matches[] __read_mostly = {
550 { 555 {
551 .name = "set", 556 .name = "set",
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index e092cb046326..43e26c881100 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -205,6 +205,7 @@ static bool
205socket_match(const struct sk_buff *skb, struct xt_action_param *par, 205socket_match(const struct sk_buff *skb, struct xt_action_param *par,
206 const struct xt_socket_mtinfo1 *info) 206 const struct xt_socket_mtinfo1 *info)
207{ 207{
208 struct sk_buff *pskb = (struct sk_buff *)skb;
208 struct sock *sk = skb->sk; 209 struct sock *sk = skb->sk;
209 210
210 if (!sk) 211 if (!sk)
@@ -226,6 +227,10 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
226 if (info->flags & XT_SOCKET_TRANSPARENT) 227 if (info->flags & XT_SOCKET_TRANSPARENT)
227 transparent = xt_socket_sk_is_transparent(sk); 228 transparent = xt_socket_sk_is_transparent(sk);
228 229
230 if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
231 transparent)
232 pskb->mark = sk->sk_mark;
233
229 if (sk != skb->sk) 234 if (sk != skb->sk)
230 sock_gen_put(sk); 235 sock_gen_put(sk);
231 236
@@ -247,7 +252,7 @@ socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par)
247} 252}
248 253
249static bool 254static bool
250socket_mt4_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) 255socket_mt4_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
251{ 256{
252 return socket_match(skb, par, par->matchinfo); 257 return socket_match(skb, par, par->matchinfo);
253} 258}
@@ -371,9 +376,10 @@ static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb,
371} 376}
372 377
373static bool 378static bool
374socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) 379socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
375{ 380{
376 const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; 381 const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
382 struct sk_buff *pskb = (struct sk_buff *)skb;
377 struct sock *sk = skb->sk; 383 struct sock *sk = skb->sk;
378 384
379 if (!sk) 385 if (!sk)
@@ -395,6 +401,10 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
395 if (info->flags & XT_SOCKET_TRANSPARENT) 401 if (info->flags & XT_SOCKET_TRANSPARENT)
396 transparent = xt_socket_sk_is_transparent(sk); 402 transparent = xt_socket_sk_is_transparent(sk);
397 403
404 if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
405 transparent)
406 pskb->mark = sk->sk_mark;
407
398 if (sk != skb->sk) 408 if (sk != skb->sk)
399 sock_gen_put(sk); 409 sock_gen_put(sk);
400 410
@@ -428,6 +438,19 @@ static int socket_mt_v2_check(const struct xt_mtchk_param *par)
428 return 0; 438 return 0;
429} 439}
430 440
441static int socket_mt_v3_check(const struct xt_mtchk_param *par)
442{
443 const struct xt_socket_mtinfo3 *info =
444 (struct xt_socket_mtinfo3 *)par->matchinfo;
445
446 if (info->flags & ~XT_SOCKET_FLAGS_V3) {
447 pr_info("unknown flags 0x%x\n",
448 info->flags & ~XT_SOCKET_FLAGS_V3);
449 return -EINVAL;
450 }
451 return 0;
452}
453
431static struct xt_match socket_mt_reg[] __read_mostly = { 454static struct xt_match socket_mt_reg[] __read_mostly = {
432 { 455 {
433 .name = "socket", 456 .name = "socket",
@@ -442,7 +465,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
442 .name = "socket", 465 .name = "socket",
443 .revision = 1, 466 .revision = 1,
444 .family = NFPROTO_IPV4, 467 .family = NFPROTO_IPV4,
445 .match = socket_mt4_v1_v2, 468 .match = socket_mt4_v1_v2_v3,
446 .checkentry = socket_mt_v1_check, 469 .checkentry = socket_mt_v1_check,
447 .matchsize = sizeof(struct xt_socket_mtinfo1), 470 .matchsize = sizeof(struct xt_socket_mtinfo1),
448 .hooks = (1 << NF_INET_PRE_ROUTING) | 471 .hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -454,7 +477,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
454 .name = "socket", 477 .name = "socket",
455 .revision = 1, 478 .revision = 1,
456 .family = NFPROTO_IPV6, 479 .family = NFPROTO_IPV6,
457 .match = socket_mt6_v1_v2, 480 .match = socket_mt6_v1_v2_v3,
458 .checkentry = socket_mt_v1_check, 481 .checkentry = socket_mt_v1_check,
459 .matchsize = sizeof(struct xt_socket_mtinfo1), 482 .matchsize = sizeof(struct xt_socket_mtinfo1),
460 .hooks = (1 << NF_INET_PRE_ROUTING) | 483 .hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -466,7 +489,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
466 .name = "socket", 489 .name = "socket",
467 .revision = 2, 490 .revision = 2,
468 .family = NFPROTO_IPV4, 491 .family = NFPROTO_IPV4,
469 .match = socket_mt4_v1_v2, 492 .match = socket_mt4_v1_v2_v3,
470 .checkentry = socket_mt_v2_check, 493 .checkentry = socket_mt_v2_check,
471 .matchsize = sizeof(struct xt_socket_mtinfo1), 494 .matchsize = sizeof(struct xt_socket_mtinfo1),
472 .hooks = (1 << NF_INET_PRE_ROUTING) | 495 .hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -478,7 +501,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
478 .name = "socket", 501 .name = "socket",
479 .revision = 2, 502 .revision = 2,
480 .family = NFPROTO_IPV6, 503 .family = NFPROTO_IPV6,
481 .match = socket_mt6_v1_v2, 504 .match = socket_mt6_v1_v2_v3,
482 .checkentry = socket_mt_v2_check, 505 .checkentry = socket_mt_v2_check,
483 .matchsize = sizeof(struct xt_socket_mtinfo1), 506 .matchsize = sizeof(struct xt_socket_mtinfo1),
484 .hooks = (1 << NF_INET_PRE_ROUTING) | 507 .hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -486,6 +509,30 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
486 .me = THIS_MODULE, 509 .me = THIS_MODULE,
487 }, 510 },
488#endif 511#endif
512 {
513 .name = "socket",
514 .revision = 3,
515 .family = NFPROTO_IPV4,
516 .match = socket_mt4_v1_v2_v3,
517 .checkentry = socket_mt_v3_check,
518 .matchsize = sizeof(struct xt_socket_mtinfo1),
519 .hooks = (1 << NF_INET_PRE_ROUTING) |
520 (1 << NF_INET_LOCAL_IN),
521 .me = THIS_MODULE,
522 },
523#ifdef XT_SOCKET_HAVE_IPV6
524 {
525 .name = "socket",
526 .revision = 3,
527 .family = NFPROTO_IPV6,
528 .match = socket_mt6_v1_v2_v3,
529 .checkentry = socket_mt_v3_check,
530 .matchsize = sizeof(struct xt_socket_mtinfo1),
531 .hooks = (1 << NF_INET_PRE_ROUTING) |
532 (1 << NF_INET_LOCAL_IN),
533 .me = THIS_MODULE,
534 },
535#endif
489}; 536};
490 537
491static int __init socket_mt_init(void) 538static int __init socket_mt_init(void)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index bf6e76643f78..9a0ae7172f92 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -76,17 +76,18 @@ struct listeners {
76}; 76};
77 77
78/* state bits */ 78/* state bits */
79#define NETLINK_CONGESTED 0x0 79#define NETLINK_S_CONGESTED 0x0
80 80
81/* flags */ 81/* flags */
82#define NETLINK_KERNEL_SOCKET 0x1 82#define NETLINK_F_KERNEL_SOCKET 0x1
83#define NETLINK_RECV_PKTINFO 0x2 83#define NETLINK_F_RECV_PKTINFO 0x2
84#define NETLINK_BROADCAST_SEND_ERROR 0x4 84#define NETLINK_F_BROADCAST_SEND_ERROR 0x4
85#define NETLINK_RECV_NO_ENOBUFS 0x8 85#define NETLINK_F_RECV_NO_ENOBUFS 0x8
86#define NETLINK_F_LISTEN_ALL_NSID 0x10
86 87
87static inline int netlink_is_kernel(struct sock *sk) 88static inline int netlink_is_kernel(struct sock *sk)
88{ 89{
89 return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; 90 return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET;
90} 91}
91 92
92struct netlink_table *nl_table __read_mostly; 93struct netlink_table *nl_table __read_mostly;
@@ -157,7 +158,7 @@ static int __netlink_remove_tap(struct netlink_tap *nt)
157out: 158out:
158 spin_unlock(&netlink_tap_lock); 159 spin_unlock(&netlink_tap_lock);
159 160
160 if (found && nt->module) 161 if (found)
161 module_put(nt->module); 162 module_put(nt->module);
162 163
163 return found ? 0 : -ENODEV; 164 return found ? 0 : -ENODEV;
@@ -256,8 +257,9 @@ static void netlink_overrun(struct sock *sk)
256{ 257{
257 struct netlink_sock *nlk = nlk_sk(sk); 258 struct netlink_sock *nlk = nlk_sk(sk);
258 259
259 if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) { 260 if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) {
260 if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) { 261 if (!test_and_set_bit(NETLINK_S_CONGESTED,
262 &nlk_sk(sk)->state)) {
261 sk->sk_err = ENOBUFS; 263 sk->sk_err = ENOBUFS;
262 sk->sk_error_report(sk); 264 sk->sk_error_report(sk);
263 } 265 }
@@ -270,8 +272,8 @@ static void netlink_rcv_wake(struct sock *sk)
270 struct netlink_sock *nlk = nlk_sk(sk); 272 struct netlink_sock *nlk = nlk_sk(sk);
271 273
272 if (skb_queue_empty(&sk->sk_receive_queue)) 274 if (skb_queue_empty(&sk->sk_receive_queue))
273 clear_bit(NETLINK_CONGESTED, &nlk->state); 275 clear_bit(NETLINK_S_CONGESTED, &nlk->state);
274 if (!test_bit(NETLINK_CONGESTED, &nlk->state)) 276 if (!test_bit(NETLINK_S_CONGESTED, &nlk->state))
275 wake_up_interruptible(&nlk->wait); 277 wake_up_interruptible(&nlk->wait);
276} 278}
277 279
@@ -1118,14 +1120,15 @@ static struct proto netlink_proto = {
1118}; 1120};
1119 1121
1120static int __netlink_create(struct net *net, struct socket *sock, 1122static int __netlink_create(struct net *net, struct socket *sock,
1121 struct mutex *cb_mutex, int protocol) 1123 struct mutex *cb_mutex, int protocol,
1124 int kern)
1122{ 1125{
1123 struct sock *sk; 1126 struct sock *sk;
1124 struct netlink_sock *nlk; 1127 struct netlink_sock *nlk;
1125 1128
1126 sock->ops = &netlink_ops; 1129 sock->ops = &netlink_ops;
1127 1130
1128 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto); 1131 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern);
1129 if (!sk) 1132 if (!sk)
1130 return -ENOMEM; 1133 return -ENOMEM;
1131 1134
@@ -1187,7 +1190,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
1187 if (err < 0) 1190 if (err < 0)
1188 goto out; 1191 goto out;
1189 1192
1190 err = __netlink_create(net, sock, cb_mutex, protocol); 1193 err = __netlink_create(net, sock, cb_mutex, protocol, kern);
1191 if (err < 0) 1194 if (err < 0)
1192 goto out_module; 1195 goto out_module;
1193 1196
@@ -1297,20 +1300,24 @@ static int netlink_autobind(struct socket *sock)
1297 struct netlink_table *table = &nl_table[sk->sk_protocol]; 1300 struct netlink_table *table = &nl_table[sk->sk_protocol];
1298 s32 portid = task_tgid_vnr(current); 1301 s32 portid = task_tgid_vnr(current);
1299 int err; 1302 int err;
1300 static s32 rover = -4097; 1303 s32 rover = -4096;
1304 bool ok;
1301 1305
1302retry: 1306retry:
1303 cond_resched(); 1307 cond_resched();
1304 rcu_read_lock(); 1308 rcu_read_lock();
1305 if (__netlink_lookup(table, portid, net)) { 1309 ok = !__netlink_lookup(table, portid, net);
1310 rcu_read_unlock();
1311 if (!ok) {
1306 /* Bind collision, search negative portid values. */ 1312 /* Bind collision, search negative portid values. */
1307 portid = rover--; 1313 if (rover == -4096)
1308 if (rover > -4097) 1314 /* rover will be in range [S32_MIN, -4097] */
1315 rover = S32_MIN + prandom_u32_max(-4096 - S32_MIN);
1316 else if (rover >= -4096)
1309 rover = -4097; 1317 rover = -4097;
1310 rcu_read_unlock(); 1318 portid = rover--;
1311 goto retry; 1319 goto retry;
1312 } 1320 }
1313 rcu_read_unlock();
1314 1321
1315 err = netlink_insert(sk, portid); 1322 err = netlink_insert(sk, portid);
1316 if (err == -EADDRINUSE) 1323 if (err == -EADDRINUSE)
@@ -1657,7 +1664,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
1657 nlk = nlk_sk(sk); 1664 nlk = nlk_sk(sk);
1658 1665
1659 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1666 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1660 test_bit(NETLINK_CONGESTED, &nlk->state)) && 1667 test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
1661 !netlink_skb_is_mmaped(skb)) { 1668 !netlink_skb_is_mmaped(skb)) {
1662 DECLARE_WAITQUEUE(wait, current); 1669 DECLARE_WAITQUEUE(wait, current);
1663 if (!*timeo) { 1670 if (!*timeo) {
@@ -1672,7 +1679,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
1672 add_wait_queue(&nlk->wait, &wait); 1679 add_wait_queue(&nlk->wait, &wait);
1673 1680
1674 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1681 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1675 test_bit(NETLINK_CONGESTED, &nlk->state)) && 1682 test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
1676 !sock_flag(sk, SOCK_DEAD)) 1683 !sock_flag(sk, SOCK_DEAD))
1677 *timeo = schedule_timeout(*timeo); 1684 *timeo = schedule_timeout(*timeo);
1678 1685
@@ -1896,7 +1903,7 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
1896 struct netlink_sock *nlk = nlk_sk(sk); 1903 struct netlink_sock *nlk = nlk_sk(sk);
1897 1904
1898 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 1905 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
1899 !test_bit(NETLINK_CONGESTED, &nlk->state)) { 1906 !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
1900 netlink_skb_set_owner_r(skb, sk); 1907 netlink_skb_set_owner_r(skb, sk);
1901 __netlink_sendskb(sk, skb); 1908 __netlink_sendskb(sk, skb);
1902 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); 1909 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
@@ -1932,8 +1939,17 @@ static void do_one_broadcast(struct sock *sk,
1932 !test_bit(p->group - 1, nlk->groups)) 1939 !test_bit(p->group - 1, nlk->groups))
1933 return; 1940 return;
1934 1941
1935 if (!net_eq(sock_net(sk), p->net)) 1942 if (!net_eq(sock_net(sk), p->net)) {
1936 return; 1943 if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID))
1944 return;
1945
1946 if (!peernet_has_id(sock_net(sk), p->net))
1947 return;
1948
1949 if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns,
1950 CAP_NET_BROADCAST))
1951 return;
1952 }
1937 1953
1938 if (p->failure) { 1954 if (p->failure) {
1939 netlink_overrun(sk); 1955 netlink_overrun(sk);
@@ -1957,23 +1973,33 @@ static void do_one_broadcast(struct sock *sk,
1957 netlink_overrun(sk); 1973 netlink_overrun(sk);
1958 /* Clone failed. Notify ALL listeners. */ 1974 /* Clone failed. Notify ALL listeners. */
1959 p->failure = 1; 1975 p->failure = 1;
1960 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) 1976 if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
1961 p->delivery_failure = 1; 1977 p->delivery_failure = 1;
1962 } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) { 1978 goto out;
1979 }
1980 if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1963 kfree_skb(p->skb2); 1981 kfree_skb(p->skb2);
1964 p->skb2 = NULL; 1982 p->skb2 = NULL;
1965 } else if (sk_filter(sk, p->skb2)) { 1983 goto out;
1984 }
1985 if (sk_filter(sk, p->skb2)) {
1966 kfree_skb(p->skb2); 1986 kfree_skb(p->skb2);
1967 p->skb2 = NULL; 1987 p->skb2 = NULL;
1968 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { 1988 goto out;
1989 }
1990 NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net);
1991 NETLINK_CB(p->skb2).nsid_is_set = true;
1992 val = netlink_broadcast_deliver(sk, p->skb2);
1993 if (val < 0) {
1969 netlink_overrun(sk); 1994 netlink_overrun(sk);
1970 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) 1995 if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
1971 p->delivery_failure = 1; 1996 p->delivery_failure = 1;
1972 } else { 1997 } else {
1973 p->congested |= val; 1998 p->congested |= val;
1974 p->delivered = 1; 1999 p->delivered = 1;
1975 p->skb2 = NULL; 2000 p->skb2 = NULL;
1976 } 2001 }
2002out:
1977 sock_put(sk); 2003 sock_put(sk);
1978} 2004}
1979 2005
@@ -2058,7 +2084,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
2058 !test_bit(p->group - 1, nlk->groups)) 2084 !test_bit(p->group - 1, nlk->groups))
2059 goto out; 2085 goto out;
2060 2086
2061 if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) { 2087 if (p->code == ENOBUFS && nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) {
2062 ret = 1; 2088 ret = 1;
2063 goto out; 2089 goto out;
2064 } 2090 }
@@ -2077,7 +2103,7 @@ out:
2077 * @code: error code, must be negative (as usual in kernelspace) 2103 * @code: error code, must be negative (as usual in kernelspace)
2078 * 2104 *
2079 * This function returns the number of broadcast listeners that have set the 2105 * This function returns the number of broadcast listeners that have set the
2080 * NETLINK_RECV_NO_ENOBUFS socket option. 2106 * NETLINK_NO_ENOBUFS socket option.
2081 */ 2107 */
2082int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) 2108int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
2083{ 2109{
@@ -2137,9 +2163,9 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
2137 switch (optname) { 2163 switch (optname) {
2138 case NETLINK_PKTINFO: 2164 case NETLINK_PKTINFO:
2139 if (val) 2165 if (val)
2140 nlk->flags |= NETLINK_RECV_PKTINFO; 2166 nlk->flags |= NETLINK_F_RECV_PKTINFO;
2141 else 2167 else
2142 nlk->flags &= ~NETLINK_RECV_PKTINFO; 2168 nlk->flags &= ~NETLINK_F_RECV_PKTINFO;
2143 err = 0; 2169 err = 0;
2144 break; 2170 break;
2145 case NETLINK_ADD_MEMBERSHIP: 2171 case NETLINK_ADD_MEMBERSHIP:
@@ -2168,18 +2194,18 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
2168 } 2194 }
2169 case NETLINK_BROADCAST_ERROR: 2195 case NETLINK_BROADCAST_ERROR:
2170 if (val) 2196 if (val)
2171 nlk->flags |= NETLINK_BROADCAST_SEND_ERROR; 2197 nlk->flags |= NETLINK_F_BROADCAST_SEND_ERROR;
2172 else 2198 else
2173 nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR; 2199 nlk->flags &= ~NETLINK_F_BROADCAST_SEND_ERROR;
2174 err = 0; 2200 err = 0;
2175 break; 2201 break;
2176 case NETLINK_NO_ENOBUFS: 2202 case NETLINK_NO_ENOBUFS:
2177 if (val) { 2203 if (val) {
2178 nlk->flags |= NETLINK_RECV_NO_ENOBUFS; 2204 nlk->flags |= NETLINK_F_RECV_NO_ENOBUFS;
2179 clear_bit(NETLINK_CONGESTED, &nlk->state); 2205 clear_bit(NETLINK_S_CONGESTED, &nlk->state);
2180 wake_up_interruptible(&nlk->wait); 2206 wake_up_interruptible(&nlk->wait);
2181 } else { 2207 } else {
2182 nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS; 2208 nlk->flags &= ~NETLINK_F_RECV_NO_ENOBUFS;
2183 } 2209 }
2184 err = 0; 2210 err = 0;
2185 break; 2211 break;
@@ -2202,6 +2228,16 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
2202 break; 2228 break;
2203 } 2229 }
2204#endif /* CONFIG_NETLINK_MMAP */ 2230#endif /* CONFIG_NETLINK_MMAP */
2231 case NETLINK_LISTEN_ALL_NSID:
2232 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
2233 return -EPERM;
2234
2235 if (val)
2236 nlk->flags |= NETLINK_F_LISTEN_ALL_NSID;
2237 else
2238 nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID;
2239 err = 0;
2240 break;
2205 default: 2241 default:
2206 err = -ENOPROTOOPT; 2242 err = -ENOPROTOOPT;
2207 } 2243 }
@@ -2228,7 +2264,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
2228 if (len < sizeof(int)) 2264 if (len < sizeof(int))
2229 return -EINVAL; 2265 return -EINVAL;
2230 len = sizeof(int); 2266 len = sizeof(int);
2231 val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0; 2267 val = nlk->flags & NETLINK_F_RECV_PKTINFO ? 1 : 0;
2232 if (put_user(len, optlen) || 2268 if (put_user(len, optlen) ||
2233 put_user(val, optval)) 2269 put_user(val, optval))
2234 return -EFAULT; 2270 return -EFAULT;
@@ -2238,7 +2274,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
2238 if (len < sizeof(int)) 2274 if (len < sizeof(int))
2239 return -EINVAL; 2275 return -EINVAL;
2240 len = sizeof(int); 2276 len = sizeof(int);
2241 val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0; 2277 val = nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR ? 1 : 0;
2242 if (put_user(len, optlen) || 2278 if (put_user(len, optlen) ||
2243 put_user(val, optval)) 2279 put_user(val, optval))
2244 return -EFAULT; 2280 return -EFAULT;
@@ -2248,12 +2284,34 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
2248 if (len < sizeof(int)) 2284 if (len < sizeof(int))
2249 return -EINVAL; 2285 return -EINVAL;
2250 len = sizeof(int); 2286 len = sizeof(int);
2251 val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0; 2287 val = nlk->flags & NETLINK_F_RECV_NO_ENOBUFS ? 1 : 0;
2252 if (put_user(len, optlen) || 2288 if (put_user(len, optlen) ||
2253 put_user(val, optval)) 2289 put_user(val, optval))
2254 return -EFAULT; 2290 return -EFAULT;
2255 err = 0; 2291 err = 0;
2256 break; 2292 break;
2293 case NETLINK_LIST_MEMBERSHIPS: {
2294 int pos, idx, shift;
2295
2296 err = 0;
2297 netlink_table_grab();
2298 for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) {
2299 if (len - pos < sizeof(u32))
2300 break;
2301
2302 idx = pos / sizeof(unsigned long);
2303 shift = (pos % sizeof(unsigned long)) * 8;
2304 if (put_user((u32)(nlk->groups[idx] >> shift),
2305 (u32 __user *)(optval + pos))) {
2306 err = -EFAULT;
2307 break;
2308 }
2309 }
2310 if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen))
2311 err = -EFAULT;
2312 netlink_table_ungrab();
2313 break;
2314 }
2257 default: 2315 default:
2258 err = -ENOPROTOOPT; 2316 err = -ENOPROTOOPT;
2259 } 2317 }
@@ -2268,6 +2326,16 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
2268 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); 2326 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
2269} 2327}
2270 2328
2329static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg,
2330 struct sk_buff *skb)
2331{
2332 if (!NETLINK_CB(skb).nsid_is_set)
2333 return;
2334
2335 put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int),
2336 &NETLINK_CB(skb).nsid);
2337}
2338
2271static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 2339static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
2272{ 2340{
2273 struct sock *sk = sock->sk; 2341 struct sock *sk = sock->sk;
@@ -2419,8 +2487,10 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
2419 msg->msg_namelen = sizeof(*addr); 2487 msg->msg_namelen = sizeof(*addr);
2420 } 2488 }
2421 2489
2422 if (nlk->flags & NETLINK_RECV_PKTINFO) 2490 if (nlk->flags & NETLINK_F_RECV_PKTINFO)
2423 netlink_cmsg_recv_pktinfo(msg, skb); 2491 netlink_cmsg_recv_pktinfo(msg, skb);
2492 if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)
2493 netlink_cmsg_listen_all_nsid(sk, msg, skb);
2424 2494
2425 memset(&scm, 0, sizeof(scm)); 2495 memset(&scm, 0, sizeof(scm));
2426 scm.creds = *NETLINK_CREDS(skb); 2496 scm.creds = *NETLINK_CREDS(skb);
@@ -2474,17 +2544,10 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
2474 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 2544 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
2475 return NULL; 2545 return NULL;
2476 2546
2477 /* 2547 if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0)
2478 * We have to just have a reference on the net from sk, but don't
2479 * get_net it. Besides, we cannot get and then put the net here.
2480 * So we create one inside init_net and the move it to net.
2481 */
2482
2483 if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
2484 goto out_sock_release_nosk; 2548 goto out_sock_release_nosk;
2485 2549
2486 sk = sock->sk; 2550 sk = sock->sk;
2487 sk_change_net(sk, net);
2488 2551
2489 if (!cfg || cfg->groups < 32) 2552 if (!cfg || cfg->groups < 32)
2490 groups = 32; 2553 groups = 32;
@@ -2503,7 +2566,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
2503 goto out_sock_release; 2566 goto out_sock_release;
2504 2567
2505 nlk = nlk_sk(sk); 2568 nlk = nlk_sk(sk);
2506 nlk->flags |= NETLINK_KERNEL_SOCKET; 2569 nlk->flags |= NETLINK_F_KERNEL_SOCKET;
2507 2570
2508 netlink_table_grab(); 2571 netlink_table_grab();
2509 if (!nl_table[unit].registered) { 2572 if (!nl_table[unit].registered) {
@@ -2540,7 +2603,10 @@ EXPORT_SYMBOL(__netlink_kernel_create);
2540void 2603void
2541netlink_kernel_release(struct sock *sk) 2604netlink_kernel_release(struct sock *sk)
2542{ 2605{
2543 sk_release_kernel(sk); 2606 if (sk == NULL || sk->sk_socket == NULL)
2607 return;
2608
2609 sock_release(sk->sk_socket);
2544} 2610}
2545EXPORT_SYMBOL(netlink_kernel_release); 2611EXPORT_SYMBOL(netlink_kernel_release);
2546 2612
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index b987fd56c3c5..ed212ffc1d9d 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -433,7 +433,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol,
433 if (sock->type != SOCK_SEQPACKET || protocol != 0) 433 if (sock->type != SOCK_SEQPACKET || protocol != 0)
434 return -ESOCKTNOSUPPORT; 434 return -ESOCKTNOSUPPORT;
435 435
436 sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto); 436 sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, kern);
437 if (sk == NULL) 437 if (sk == NULL)
438 return -ENOMEM; 438 return -ENOMEM;
439 439
@@ -476,7 +476,7 @@ static struct sock *nr_make_new(struct sock *osk)
476 if (osk->sk_type != SOCK_SEQPACKET) 476 if (osk->sk_type != SOCK_SEQPACKET)
477 return NULL; 477 return NULL;
478 478
479 sk = sk_alloc(sock_net(osk), PF_NETROM, GFP_ATOMIC, osk->sk_prot); 479 sk = sk_alloc(sock_net(osk), PF_NETROM, GFP_ATOMIC, osk->sk_prot, 0);
480 if (sk == NULL) 480 if (sk == NULL)
481 return NULL; 481 return NULL;
482 482
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 96b64d2f6dbf..d72a4f1558f2 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -31,7 +31,6 @@
31#include <linux/mm.h> 31#include <linux/mm.h>
32#include <linux/interrupt.h> 32#include <linux/interrupt.h>
33#include <linux/notifier.h> 33#include <linux/notifier.h>
34#include <linux/netfilter.h>
35#include <linux/init.h> 34#include <linux/init.h>
36#include <linux/spinlock.h> 35#include <linux/spinlock.h>
37#include <net/netrom.h> 36#include <net/netrom.h>
diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c
index 2277276f52bc..54e40fa47822 100644
--- a/net/nfc/af_nfc.c
+++ b/net/nfc/af_nfc.c
@@ -40,7 +40,7 @@ static int nfc_sock_create(struct net *net, struct socket *sock, int proto,
40 40
41 read_lock(&proto_tab_lock); 41 read_lock(&proto_tab_lock);
42 if (proto_tab[proto] && try_module_get(proto_tab[proto]->owner)) { 42 if (proto_tab[proto] && try_module_get(proto_tab[proto]->owner)) {
43 rc = proto_tab[proto]->create(net, sock, proto_tab[proto]); 43 rc = proto_tab[proto]->create(net, sock, proto_tab[proto], kern);
44 module_put(proto_tab[proto]->owner); 44 module_put(proto_tab[proto]->owner);
45 } 45 }
46 read_unlock(&proto_tab_lock); 46 read_unlock(&proto_tab_lock);
diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h
index de1789e3cc82..1f68724d44d3 100644
--- a/net/nfc/llcp.h
+++ b/net/nfc/llcp.h
@@ -225,7 +225,7 @@ void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local,
225 struct sk_buff *skb, u8 direction); 225 struct sk_buff *skb, u8 direction);
226 226
227/* Sock API */ 227/* Sock API */
228struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp); 228struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp, int kern);
229void nfc_llcp_sock_free(struct nfc_llcp_sock *sock); 229void nfc_llcp_sock_free(struct nfc_llcp_sock *sock);
230void nfc_llcp_accept_unlink(struct sock *sk); 230void nfc_llcp_accept_unlink(struct sock *sk);
231void nfc_llcp_accept_enqueue(struct sock *parent, struct sock *sk); 231void nfc_llcp_accept_enqueue(struct sock *parent, struct sock *sk);
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index b18f07ccb504..98876274a1ee 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -934,7 +934,7 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
934 sock->ssap = ssap; 934 sock->ssap = ssap;
935 } 935 }
936 936
937 new_sk = nfc_llcp_sock_alloc(NULL, parent->sk_type, GFP_ATOMIC); 937 new_sk = nfc_llcp_sock_alloc(NULL, parent->sk_type, GFP_ATOMIC, 0);
938 if (new_sk == NULL) { 938 if (new_sk == NULL) {
939 reason = LLCP_DM_REJ; 939 reason = LLCP_DM_REJ;
940 release_sock(&sock->sk); 940 release_sock(&sock->sk);
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 9578bd6a4f3e..b7de0da46acd 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -942,12 +942,12 @@ static void llcp_sock_destruct(struct sock *sk)
942 } 942 }
943} 943}
944 944
945struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp) 945struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp, int kern)
946{ 946{
947 struct sock *sk; 947 struct sock *sk;
948 struct nfc_llcp_sock *llcp_sock; 948 struct nfc_llcp_sock *llcp_sock;
949 949
950 sk = sk_alloc(&init_net, PF_NFC, gfp, &llcp_sock_proto); 950 sk = sk_alloc(&init_net, PF_NFC, gfp, &llcp_sock_proto, kern);
951 if (!sk) 951 if (!sk)
952 return NULL; 952 return NULL;
953 953
@@ -993,7 +993,7 @@ void nfc_llcp_sock_free(struct nfc_llcp_sock *sock)
993} 993}
994 994
995static int llcp_sock_create(struct net *net, struct socket *sock, 995static int llcp_sock_create(struct net *net, struct socket *sock,
996 const struct nfc_protocol *nfc_proto) 996 const struct nfc_protocol *nfc_proto, int kern)
997{ 997{
998 struct sock *sk; 998 struct sock *sk;
999 999
@@ -1009,7 +1009,7 @@ static int llcp_sock_create(struct net *net, struct socket *sock,
1009 else 1009 else
1010 sock->ops = &llcp_sock_ops; 1010 sock->ops = &llcp_sock_ops;
1011 1011
1012 sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC); 1012 sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC, kern);
1013 if (sk == NULL) 1013 if (sk == NULL)
1014 return -ENOMEM; 1014 return -ENOMEM;
1015 1015
diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig
index a4f1e42e3481..901c1ddba841 100644
--- a/net/nfc/nci/Kconfig
+++ b/net/nfc/nci/Kconfig
@@ -19,3 +19,10 @@ config NFC_NCI_SPI
19 an NFC Controller (NFCC) and a Device Host (DH). 19 an NFC Controller (NFCC) and a Device Host (DH).
20 20
21 Say yes if you use an NCI driver that requires SPI link layer. 21 Say yes if you use an NCI driver that requires SPI link layer.
22
23config NFC_NCI_UART
24 depends on NFC_NCI && TTY
25 tristate "NCI over UART protocol support"
26 default n
27 help
28 Say yes if you use an NCI driver that requires UART link layer.
diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile
index 7ed8949266cc..b4b85b82e988 100644
--- a/net/nfc/nci/Makefile
+++ b/net/nfc/nci/Makefile
@@ -7,3 +7,6 @@ obj-$(CONFIG_NFC_NCI) += nci.o
7nci-objs := core.o data.o lib.o ntf.o rsp.o hci.o 7nci-objs := core.o data.o lib.o ntf.o rsp.o hci.o
8 8
9nci-$(CONFIG_NFC_NCI_SPI) += spi.o 9nci-$(CONFIG_NFC_NCI_SPI) += spi.o
10
11nci_uart-y += uart.o
12obj-$(CONFIG_NFC_NCI_UART) += nci_uart.o
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 49ff32106080..95af2d24d5be 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -28,6 +28,7 @@
28#define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ 28#define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__
29 29
30#include <linux/module.h> 30#include <linux/module.h>
31#include <linux/kernel.h>
31#include <linux/types.h> 32#include <linux/types.h>
32#include <linux/workqueue.h> 33#include <linux/workqueue.h>
33#include <linux/completion.h> 34#include <linux/completion.h>
@@ -73,6 +74,7 @@ void nci_req_complete(struct nci_dev *ndev, int result)
73 complete(&ndev->req_completion); 74 complete(&ndev->req_completion);
74 } 75 }
75} 76}
77EXPORT_SYMBOL(nci_req_complete);
76 78
77static void nci_req_cancel(struct nci_dev *ndev, int err) 79static void nci_req_cancel(struct nci_dev *ndev, int err)
78{ 80{
@@ -323,6 +325,32 @@ static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt)
323 sizeof(struct nci_rf_deactivate_cmd), &cmd); 325 sizeof(struct nci_rf_deactivate_cmd), &cmd);
324} 326}
325 327
328struct nci_prop_cmd_param {
329 __u16 opcode;
330 size_t len;
331 __u8 *payload;
332};
333
334static void nci_prop_cmd_req(struct nci_dev *ndev, unsigned long opt)
335{
336 struct nci_prop_cmd_param *param = (struct nci_prop_cmd_param *)opt;
337
338 nci_send_cmd(ndev, param->opcode, param->len, param->payload);
339}
340
341int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload)
342{
343 struct nci_prop_cmd_param param;
344
345 param.opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, oid);
346 param.len = len;
347 param.payload = payload;
348
349 return __nci_request(ndev, nci_prop_cmd_req, (unsigned long)&param,
350 msecs_to_jiffies(NCI_CMD_TIMEOUT));
351}
352EXPORT_SYMBOL(nci_prop_cmd);
353
326static int nci_open_device(struct nci_dev *ndev) 354static int nci_open_device(struct nci_dev *ndev)
327{ 355{
328 int rc = 0; 356 int rc = 0;
@@ -343,11 +371,17 @@ static int nci_open_device(struct nci_dev *ndev)
343 371
344 set_bit(NCI_INIT, &ndev->flags); 372 set_bit(NCI_INIT, &ndev->flags);
345 373
346 rc = __nci_request(ndev, nci_reset_req, 0, 374 if (ndev->ops->init)
347 msecs_to_jiffies(NCI_RESET_TIMEOUT)); 375 rc = ndev->ops->init(ndev);
348 376
349 if (ndev->ops->setup) 377 if (!rc) {
350 ndev->ops->setup(ndev); 378 rc = __nci_request(ndev, nci_reset_req, 0,
379 msecs_to_jiffies(NCI_RESET_TIMEOUT));
380 }
381
382 if (!rc && ndev->ops->setup) {
383 rc = ndev->ops->setup(ndev);
384 }
351 385
352 if (!rc) { 386 if (!rc) {
353 rc = __nci_request(ndev, nci_init_req, 0, 387 rc = __nci_request(ndev, nci_init_req, 0,
@@ -407,6 +441,12 @@ static int nci_close_device(struct nci_dev *ndev)
407 set_bit(NCI_INIT, &ndev->flags); 441 set_bit(NCI_INIT, &ndev->flags);
408 __nci_request(ndev, nci_reset_req, 0, 442 __nci_request(ndev, nci_reset_req, 0,
409 msecs_to_jiffies(NCI_RESET_TIMEOUT)); 443 msecs_to_jiffies(NCI_RESET_TIMEOUT));
444
445 /* After this point our queues are empty
446 * and no works are scheduled.
447 */
448 ndev->ops->close(ndev);
449
410 clear_bit(NCI_INIT, &ndev->flags); 450 clear_bit(NCI_INIT, &ndev->flags);
411 451
412 del_timer_sync(&ndev->cmd_timer); 452 del_timer_sync(&ndev->cmd_timer);
@@ -414,10 +454,6 @@ static int nci_close_device(struct nci_dev *ndev)
414 /* Flush cmd wq */ 454 /* Flush cmd wq */
415 flush_workqueue(ndev->cmd_wq); 455 flush_workqueue(ndev->cmd_wq);
416 456
417 /* After this point our queues are empty
418 * and no works are scheduled. */
419 ndev->ops->close(ndev);
420
421 /* Clear flags */ 457 /* Clear flags */
422 ndev->flags = 0; 458 ndev->flags = 0;
423 459
@@ -762,7 +798,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
762 798
763 if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) { 799 if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) {
764 nci_request(ndev, nci_rf_deactivate_req, 800 nci_request(ndev, nci_rf_deactivate_req,
765 NCI_DEACTIVATE_TYPE_SLEEP_MODE, 801 NCI_DEACTIVATE_TYPE_IDLE_MODE,
766 msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); 802 msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
767 } 803 }
768} 804}
@@ -961,6 +997,14 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops,
961 return NULL; 997 return NULL;
962 998
963 ndev->ops = ops; 999 ndev->ops = ops;
1000
1001 if (ops->n_prop_ops > NCI_MAX_PROPRIETARY_CMD) {
1002 pr_err("Too many proprietary commands: %zd\n",
1003 ops->n_prop_ops);
1004 ops->prop_ops = NULL;
1005 ops->n_prop_ops = 0;
1006 }
1007
964 ndev->tx_headroom = tx_headroom; 1008 ndev->tx_headroom = tx_headroom;
965 ndev->tx_tailroom = tx_tailroom; 1009 ndev->tx_tailroom = tx_tailroom;
966 init_completion(&ndev->req_completion); 1010 init_completion(&ndev->req_completion);
@@ -1165,6 +1209,49 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
1165 return 0; 1209 return 0;
1166} 1210}
1167 1211
1212/* Proprietary commands API */
1213static struct nci_prop_ops *prop_cmd_lookup(struct nci_dev *ndev,
1214 __u16 opcode)
1215{
1216 size_t i;
1217 struct nci_prop_ops *prop_op;
1218
1219 if (!ndev->ops->prop_ops || !ndev->ops->n_prop_ops)
1220 return NULL;
1221
1222 for (i = 0; i < ndev->ops->n_prop_ops; i++) {
1223 prop_op = &ndev->ops->prop_ops[i];
1224 if (prop_op->opcode == opcode)
1225 return prop_op;
1226 }
1227
1228 return NULL;
1229}
1230
1231int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode,
1232 struct sk_buff *skb)
1233{
1234 struct nci_prop_ops *prop_op;
1235
1236 prop_op = prop_cmd_lookup(ndev, rsp_opcode);
1237 if (!prop_op || !prop_op->rsp)
1238 return -ENOTSUPP;
1239
1240 return prop_op->rsp(ndev, skb);
1241}
1242
1243int nci_prop_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode,
1244 struct sk_buff *skb)
1245{
1246 struct nci_prop_ops *prop_op;
1247
1248 prop_op = prop_cmd_lookup(ndev, ntf_opcode);
1249 if (!prop_op || !prop_op->ntf)
1250 return -ENOTSUPP;
1251
1252 return prop_op->ntf(ndev, skb);
1253}
1254
1168/* ---- NCI TX Data worker thread ---- */ 1255/* ---- NCI TX Data worker thread ---- */
1169 1256
1170static void nci_tx_work(struct work_struct *work) 1257static void nci_tx_work(struct work_struct *work)
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index ed54ec533836..af002df640c7 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -639,22 +639,19 @@ int nci_hci_dev_session_init(struct nci_dev *ndev)
639 ndev->hci_dev->init_data.gates[0].gate, 639 ndev->hci_dev->init_data.gates[0].gate,
640 ndev->hci_dev->init_data.gates[0].pipe); 640 ndev->hci_dev->init_data.gates[0].pipe);
641 if (r < 0) 641 if (r < 0)
642 goto exit; 642 return r;
643 643
644 r = nci_hci_get_param(ndev, NCI_HCI_ADMIN_GATE, 644 r = nci_hci_get_param(ndev, NCI_HCI_ADMIN_GATE,
645 NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY, &skb); 645 NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY, &skb);
646 if (r < 0) 646 if (r < 0)
647 goto exit; 647 return r;
648 648
649 if (skb->len && 649 if (skb->len &&
650 skb->len == strlen(ndev->hci_dev->init_data.session_id) && 650 skb->len == strlen(ndev->hci_dev->init_data.session_id) &&
651 memcmp(ndev->hci_dev->init_data.session_id, 651 !memcmp(ndev->hci_dev->init_data.session_id, skb->data, skb->len) &&
652 skb->data, skb->len) == 0 &&
653 ndev->ops->hci_load_session) { 652 ndev->ops->hci_load_session) {
654 /* Restore gate<->pipe table from some proprietary location. */ 653 /* Restore gate<->pipe table from some proprietary location. */
655 r = ndev->ops->hci_load_session(ndev); 654 r = ndev->ops->hci_load_session(ndev);
656 if (r < 0)
657 goto exit;
658 } else { 655 } else {
659 r = nci_hci_dev_connect_gates(ndev, 656 r = nci_hci_dev_connect_gates(ndev,
660 ndev->hci_dev->init_data.gate_count, 657 ndev->hci_dev->init_data.gate_count,
@@ -667,8 +664,6 @@ int nci_hci_dev_session_init(struct nci_dev *ndev)
667 ndev->hci_dev->init_data.session_id, 664 ndev->hci_dev->init_data.session_id,
668 strlen(ndev->hci_dev->init_data.session_id)); 665 strlen(ndev->hci_dev->init_data.session_id));
669 } 666 }
670 if (r == 0)
671 goto exit;
672 667
673exit: 668exit:
674 kfree_skb(skb); 669 kfree_skb(skb);
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 3218071072ac..5d1c2e391c56 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -758,6 +758,15 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
758 /* strip the nci control header */ 758 /* strip the nci control header */
759 skb_pull(skb, NCI_CTRL_HDR_SIZE); 759 skb_pull(skb, NCI_CTRL_HDR_SIZE);
760 760
761 if (nci_opcode_gid(ntf_opcode) == NCI_GID_PROPRIETARY) {
762 if (nci_prop_ntf_packet(ndev, ntf_opcode, skb)) {
763 pr_err("unsupported ntf opcode 0x%x\n",
764 ntf_opcode);
765 }
766
767 goto end;
768 }
769
761 switch (ntf_opcode) { 770 switch (ntf_opcode) {
762 case NCI_OP_CORE_CONN_CREDITS_NTF: 771 case NCI_OP_CORE_CONN_CREDITS_NTF:
763 nci_core_conn_credits_ntf_packet(ndev, skb); 772 nci_core_conn_credits_ntf_packet(ndev, skb);
@@ -796,5 +805,6 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
796 break; 805 break;
797 } 806 }
798 807
808end:
799 kfree_skb(skb); 809 kfree_skb(skb);
800} 810}
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 02486bc2ceea..408bd8f857ab 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -296,6 +296,15 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
296 /* strip the nci control header */ 296 /* strip the nci control header */
297 skb_pull(skb, NCI_CTRL_HDR_SIZE); 297 skb_pull(skb, NCI_CTRL_HDR_SIZE);
298 298
299 if (nci_opcode_gid(rsp_opcode) == NCI_GID_PROPRIETARY) {
300 if (nci_prop_rsp_packet(ndev, rsp_opcode, skb) == -ENOTSUPP) {
301 pr_err("unsupported rsp opcode 0x%x\n",
302 rsp_opcode);
303 }
304
305 goto end;
306 }
307
299 switch (rsp_opcode) { 308 switch (rsp_opcode) {
300 case NCI_OP_CORE_RESET_RSP: 309 case NCI_OP_CORE_RESET_RSP:
301 nci_core_reset_rsp_packet(ndev, skb); 310 nci_core_reset_rsp_packet(ndev, skb);
@@ -346,6 +355,7 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
346 break; 355 break;
347 } 356 }
348 357
358end:
349 kfree_skb(skb); 359 kfree_skb(skb);
350 360
351 /* trigger the next cmd */ 361 /* trigger the next cmd */
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
new file mode 100644
index 000000000000..21d8875673a4
--- /dev/null
+++ b/net/nfc/nci/uart.c
@@ -0,0 +1,494 @@
1/*
2 * Copyright (C) 2015, Marvell International Ltd.
3 *
4 * This software file (the "File") is distributed by Marvell International
5 * Ltd. under the terms of the GNU General Public License Version 2, June 1991
6 * (the "License"). You may use, redistribute and/or modify this File in
7 * accordance with the terms and conditions of the License, a copy of which
8 * is available on the worldwide web at
9 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
10 *
11 * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
12 * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13 * ARE EXPRESSLY DISCLAIMED. The License provides additional details about
14 * this warranty disclaimer.
15 */
16
17/* Inspired (hugely) by HCI LDISC implementation in Bluetooth.
18 *
19 * Copyright (C) 2000-2001 Qualcomm Incorporated
20 * Copyright (C) 2002-2003 Maxim Krasnyansky <maxk@qualcomm.com>
21 * Copyright (C) 2004-2005 Marcel Holtmann <marcel@holtmann.org>
22 */
23
24#include <linux/module.h>
25
26#include <linux/kernel.h>
27#include <linux/init.h>
28#include <linux/types.h>
29#include <linux/fcntl.h>
30#include <linux/interrupt.h>
31#include <linux/ptrace.h>
32#include <linux/poll.h>
33
34#include <linux/slab.h>
35#include <linux/tty.h>
36#include <linux/errno.h>
37#include <linux/string.h>
38#include <linux/signal.h>
39#include <linux/ioctl.h>
40#include <linux/skbuff.h>
41
42#include <net/nfc/nci.h>
43#include <net/nfc/nci_core.h>
44
45/* TX states */
46#define NCI_UART_SENDING 1
47#define NCI_UART_TX_WAKEUP 2
48
49static struct nci_uart *nci_uart_drivers[NCI_UART_DRIVER_MAX];
50
51static inline struct sk_buff *nci_uart_dequeue(struct nci_uart *nu)
52{
53 struct sk_buff *skb = nu->tx_skb;
54
55 if (!skb)
56 skb = skb_dequeue(&nu->tx_q);
57 else
58 nu->tx_skb = NULL;
59
60 return skb;
61}
62
63static inline int nci_uart_queue_empty(struct nci_uart *nu)
64{
65 if (nu->tx_skb)
66 return 0;
67
68 return skb_queue_empty(&nu->tx_q);
69}
70
71static int nci_uart_tx_wakeup(struct nci_uart *nu)
72{
73 if (test_and_set_bit(NCI_UART_SENDING, &nu->tx_state)) {
74 set_bit(NCI_UART_TX_WAKEUP, &nu->tx_state);
75 return 0;
76 }
77
78 schedule_work(&nu->write_work);
79
80 return 0;
81}
82
83static void nci_uart_write_work(struct work_struct *work)
84{
85 struct nci_uart *nu = container_of(work, struct nci_uart, write_work);
86 struct tty_struct *tty = nu->tty;
87 struct sk_buff *skb;
88
89restart:
90 clear_bit(NCI_UART_TX_WAKEUP, &nu->tx_state);
91
92 if (nu->ops.tx_start)
93 nu->ops.tx_start(nu);
94
95 while ((skb = nci_uart_dequeue(nu))) {
96 int len;
97
98 set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
99 len = tty->ops->write(tty, skb->data, skb->len);
100 skb_pull(skb, len);
101 if (skb->len) {
102 nu->tx_skb = skb;
103 break;
104 }
105 kfree_skb(skb);
106 }
107
108 if (test_bit(NCI_UART_TX_WAKEUP, &nu->tx_state))
109 goto restart;
110
111 if (nu->ops.tx_done && nci_uart_queue_empty(nu))
112 nu->ops.tx_done(nu);
113
114 clear_bit(NCI_UART_SENDING, &nu->tx_state);
115}
116
117static int nci_uart_set_driver(struct tty_struct *tty, unsigned int driver)
118{
119 struct nci_uart *nu = NULL;
120 int ret;
121
122 if (driver >= NCI_UART_DRIVER_MAX)
123 return -EINVAL;
124
125 if (!nci_uart_drivers[driver])
126 return -ENOENT;
127
128 nu = kzalloc(sizeof(*nu), GFP_KERNEL);
129 if (!nu)
130 return -ENOMEM;
131
132 memcpy(nu, nci_uart_drivers[driver], sizeof(struct nci_uart));
133 nu->tty = tty;
134 tty->disc_data = nu;
135 skb_queue_head_init(&nu->tx_q);
136 INIT_WORK(&nu->write_work, nci_uart_write_work);
137 spin_lock_init(&nu->rx_lock);
138
139 ret = nu->ops.open(nu);
140 if (ret) {
141 tty->disc_data = NULL;
142 kfree(nu);
143 } else if (!try_module_get(nu->owner)) {
144 nu->ops.close(nu);
145 tty->disc_data = NULL;
146 kfree(nu);
147 return -ENOENT;
148 }
149 return ret;
150}
151
152/* ------ LDISC part ------ */
153
154/* nci_uart_tty_open
155 *
156 * Called when line discipline changed to NCI_UART.
157 *
158 * Arguments:
159 * tty pointer to tty info structure
160 * Return Value:
161 * 0 if success, otherwise error code
162 */
163static int nci_uart_tty_open(struct tty_struct *tty)
164{
165 /* Error if the tty has no write op instead of leaving an exploitable
166 * hole
167 */
168 if (!tty->ops->write)
169 return -EOPNOTSUPP;
170
171 tty->disc_data = NULL;
172 tty->receive_room = 65536;
173
174 /* Flush any pending characters in the driver and line discipline. */
175
176 /* FIXME: why is this needed. Note don't use ldisc_ref here as the
177 * open path is before the ldisc is referencable.
178 */
179
180 if (tty->ldisc->ops->flush_buffer)
181 tty->ldisc->ops->flush_buffer(tty);
182 tty_driver_flush_buffer(tty);
183
184 return 0;
185}
186
187/* nci_uart_tty_close()
188 *
189 * Called when the line discipline is changed to something
190 * else, the tty is closed, or the tty detects a hangup.
191 */
192static void nci_uart_tty_close(struct tty_struct *tty)
193{
194 struct nci_uart *nu = (void *)tty->disc_data;
195
196 /* Detach from the tty */
197 tty->disc_data = NULL;
198
199 if (!nu)
200 return;
201
202 if (nu->tx_skb)
203 kfree_skb(nu->tx_skb);
204 if (nu->rx_skb)
205 kfree_skb(nu->rx_skb);
206
207 skb_queue_purge(&nu->tx_q);
208
209 nu->ops.close(nu);
210 nu->tty = NULL;
211 module_put(nu->owner);
212
213 cancel_work_sync(&nu->write_work);
214
215 kfree(nu);
216}
217
218/* nci_uart_tty_wakeup()
219 *
220 * Callback for transmit wakeup. Called when low level
221 * device driver can accept more send data.
222 *
223 * Arguments: tty pointer to associated tty instance data
224 * Return Value: None
225 */
226static void nci_uart_tty_wakeup(struct tty_struct *tty)
227{
228 struct nci_uart *nu = (void *)tty->disc_data;
229
230 if (!nu)
231 return;
232
233 clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
234
235 if (tty != nu->tty)
236 return;
237
238 nci_uart_tx_wakeup(nu);
239}
240
241/* nci_uart_tty_receive()
242 *
243 * Called by tty low level driver when receive data is
244 * available.
245 *
246 * Arguments: tty pointer to tty isntance data
247 * data pointer to received data
248 * flags pointer to flags for data
249 * count count of received data in bytes
250 *
251 * Return Value: None
252 */
253static void nci_uart_tty_receive(struct tty_struct *tty, const u8 *data,
254 char *flags, int count)
255{
256 struct nci_uart *nu = (void *)tty->disc_data;
257
258 if (!nu || tty != nu->tty)
259 return;
260
261 spin_lock(&nu->rx_lock);
262 nu->ops.recv_buf(nu, (void *)data, flags, count);
263 spin_unlock(&nu->rx_lock);
264
265 tty_unthrottle(tty);
266}
267
268/* nci_uart_tty_ioctl()
269 *
270 * Process IOCTL system call for the tty device.
271 *
272 * Arguments:
273 *
274 * tty pointer to tty instance data
275 * file pointer to open file object for device
276 * cmd IOCTL command code
277 * arg argument for IOCTL call (cmd dependent)
278 *
279 * Return Value: Command dependent
280 */
281static int nci_uart_tty_ioctl(struct tty_struct *tty, struct file *file,
282 unsigned int cmd, unsigned long arg)
283{
284 struct nci_uart *nu = (void *)tty->disc_data;
285 int err = 0;
286
287 switch (cmd) {
288 case NCIUARTSETDRIVER:
289 if (!nu)
290 return nci_uart_set_driver(tty, (unsigned int)arg);
291 else
292 return -EBUSY;
293 break;
294 default:
295 err = n_tty_ioctl_helper(tty, file, cmd, arg);
296 break;
297 }
298
299 return err;
300}
301
302/* We don't provide read/write/poll interface for user space. */
303static ssize_t nci_uart_tty_read(struct tty_struct *tty, struct file *file,
304 unsigned char __user *buf, size_t nr)
305{
306 return 0;
307}
308
309static ssize_t nci_uart_tty_write(struct tty_struct *tty, struct file *file,
310 const unsigned char *data, size_t count)
311{
312 return 0;
313}
314
315static unsigned int nci_uart_tty_poll(struct tty_struct *tty,
316 struct file *filp, poll_table *wait)
317{
318 return 0;
319}
320
321static int nci_uart_send(struct nci_uart *nu, struct sk_buff *skb)
322{
323 /* Queue TX packet */
324 skb_queue_tail(&nu->tx_q, skb);
325
326 /* Try to start TX (if possible) */
327 nci_uart_tx_wakeup(nu);
328
329 return 0;
330}
331
332/* -- Default recv_buf handler --
333 *
334 * This handler supposes that NCI frames are sent over UART link without any
335 * framing. It reads NCI header, retrieve the packet size and once all packet
336 * bytes are received it passes it to nci_uart driver for processing.
337 */
338static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data,
339 char *flags, int count)
340{
341 int chunk_len;
342
343 if (!nu->ndev) {
344 nfc_err(nu->tty->dev,
345 "receive data from tty but no NCI dev is attached yet, drop buffer\n");
346 return 0;
347 }
348
349 /* Decode all incoming data in packets
350 * and enqueue then for processing.
351 */
352 while (count > 0) {
353 /* If this is the first data of a packet, allocate a buffer */
354 if (!nu->rx_skb) {
355 nu->rx_packet_len = -1;
356 nu->rx_skb = nci_skb_alloc(nu->ndev,
357 NCI_MAX_PACKET_SIZE,
358 GFP_KERNEL);
359 if (!nu->rx_skb)
360 return -ENOMEM;
361 }
362
363 /* Eat byte after byte till full packet header is received */
364 if (nu->rx_skb->len < NCI_CTRL_HDR_SIZE) {
365 *skb_put(nu->rx_skb, 1) = *data++;
366 --count;
367 continue;
368 }
369
370 /* Header was received but packet len was not read */
371 if (nu->rx_packet_len < 0)
372 nu->rx_packet_len = NCI_CTRL_HDR_SIZE +
373 nci_plen(nu->rx_skb->data);
374
375 /* Compute how many bytes are missing and how many bytes can
376 * be consumed.
377 */
378 chunk_len = nu->rx_packet_len - nu->rx_skb->len;
379 if (count < chunk_len)
380 chunk_len = count;
381 memcpy(skb_put(nu->rx_skb, chunk_len), data, chunk_len);
382 data += chunk_len;
383 count -= chunk_len;
384
385 /* Chcek if packet is fully received */
386 if (nu->rx_packet_len == nu->rx_skb->len) {
387 /* Pass RX packet to driver */
388 if (nu->ops.recv(nu, nu->rx_skb) != 0)
389 nfc_err(nu->tty->dev, "corrupted RX packet\n");
390 /* Next packet will be a new one */
391 nu->rx_skb = NULL;
392 }
393 }
394
395 return 0;
396}
397
398/* -- Default recv handler -- */
399static int nci_uart_default_recv(struct nci_uart *nu, struct sk_buff *skb)
400{
401 return nci_recv_frame(nu->ndev, skb);
402}
403
404int nci_uart_register(struct nci_uart *nu)
405{
406 if (!nu || !nu->ops.open ||
407 !nu->ops.recv || !nu->ops.close)
408 return -EINVAL;
409
410 /* Set the send callback */
411 nu->ops.send = nci_uart_send;
412
413 /* Install default handlers if not overridden */
414 if (!nu->ops.recv_buf)
415 nu->ops.recv_buf = nci_uart_default_recv_buf;
416 if (!nu->ops.recv)
417 nu->ops.recv = nci_uart_default_recv;
418
419 /* Add this driver in the driver list */
420 if (nci_uart_drivers[nu->driver]) {
421 pr_err("driver %d is already registered\n", nu->driver);
422 return -EBUSY;
423 }
424 nci_uart_drivers[nu->driver] = nu;
425
426 pr_info("NCI uart driver '%s [%d]' registered\n", nu->name, nu->driver);
427
428 return 0;
429}
430EXPORT_SYMBOL_GPL(nci_uart_register);
431
432void nci_uart_unregister(struct nci_uart *nu)
433{
434 pr_info("NCI uart driver '%s [%d]' unregistered\n", nu->name,
435 nu->driver);
436
437 /* Remove this driver from the driver list */
438 nci_uart_drivers[nu->driver] = NULL;
439}
440EXPORT_SYMBOL_GPL(nci_uart_unregister);
441
442void nci_uart_set_config(struct nci_uart *nu, int baudrate, int flow_ctrl)
443{
444 struct ktermios new_termios;
445
446 if (!nu->tty)
447 return;
448
449 down_read(&nu->tty->termios_rwsem);
450 new_termios = nu->tty->termios;
451 up_read(&nu->tty->termios_rwsem);
452 tty_termios_encode_baud_rate(&new_termios, baudrate, baudrate);
453
454 if (flow_ctrl)
455 new_termios.c_cflag |= CRTSCTS;
456 else
457 new_termios.c_cflag &= ~CRTSCTS;
458
459 tty_set_termios(nu->tty, &new_termios);
460}
461EXPORT_SYMBOL_GPL(nci_uart_set_config);
462
463static struct tty_ldisc_ops nci_uart_ldisc = {
464 .magic = TTY_LDISC_MAGIC,
465 .owner = THIS_MODULE,
466 .name = "n_nci",
467 .open = nci_uart_tty_open,
468 .close = nci_uart_tty_close,
469 .read = nci_uart_tty_read,
470 .write = nci_uart_tty_write,
471 .poll = nci_uart_tty_poll,
472 .receive_buf = nci_uart_tty_receive,
473 .write_wakeup = nci_uart_tty_wakeup,
474 .ioctl = nci_uart_tty_ioctl,
475};
476
477static int __init nci_uart_init(void)
478{
479 memset(nci_uart_drivers, 0, sizeof(nci_uart_drivers));
480 return tty_register_ldisc(N_NCI, &nci_uart_ldisc);
481}
482
483static void __exit nci_uart_exit(void)
484{
485 tty_unregister_ldisc(N_NCI);
486}
487
488module_init(nci_uart_init);
489module_exit(nci_uart_exit);
490
491MODULE_AUTHOR("Marvell International Ltd.");
492MODULE_DESCRIPTION("NFC NCI UART driver");
493MODULE_LICENSE("GPL");
494MODULE_ALIAS_LDISC(N_NCI);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 3763036710ae..f85f37ed19b2 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -5,6 +5,12 @@
5 * Lauro Ramos Venancio <lauro.venancio@openbossa.org> 5 * Lauro Ramos Venancio <lauro.venancio@openbossa.org>
6 * Aloisio Almeida Jr <aloisio.almeida@openbossa.org> 6 * Aloisio Almeida Jr <aloisio.almeida@openbossa.org>
7 * 7 *
8 * Vendor commands implementation based on net/wireless/nl80211.c
9 * which is:
10 *
11 * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
12 * Copyright 2013-2014 Intel Mobile Communications GmbH
13 *
8 * This program is free software; you can redistribute it and/or modify 14 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by 15 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or 16 * the Free Software Foundation; either version 2 of the License, or
@@ -1489,6 +1495,50 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
1489 return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx); 1495 return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
1490} 1496}
1491 1497
1498static int nfc_genl_vendor_cmd(struct sk_buff *skb,
1499 struct genl_info *info)
1500{
1501 struct nfc_dev *dev;
1502 struct nfc_vendor_cmd *cmd;
1503 u32 dev_idx, vid, subcmd;
1504 u8 *data;
1505 size_t data_len;
1506 int i;
1507
1508 if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
1509 !info->attrs[NFC_ATTR_VENDOR_ID] ||
1510 !info->attrs[NFC_ATTR_VENDOR_SUBCMD])
1511 return -EINVAL;
1512
1513 dev_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
1514 vid = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_ID]);
1515 subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]);
1516
1517 dev = nfc_get_device(dev_idx);
1518 if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds)
1519 return -ENODEV;
1520
1521 data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]);
1522 if (data) {
1523 data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]);
1524 if (data_len == 0)
1525 return -EINVAL;
1526 } else {
1527 data_len = 0;
1528 }
1529
1530 for (i = 0; i < dev->n_vendor_cmds; i++) {
1531 cmd = &dev->vendor_cmds[i];
1532
1533 if (cmd->vendor_id != vid || cmd->subcmd != subcmd)
1534 continue;
1535
1536 return cmd->doit(dev, data, data_len);
1537 }
1538
1539 return -EOPNOTSUPP;
1540}
1541
1492static const struct genl_ops nfc_genl_ops[] = { 1542static const struct genl_ops nfc_genl_ops[] = {
1493 { 1543 {
1494 .cmd = NFC_CMD_GET_DEVICE, 1544 .cmd = NFC_CMD_GET_DEVICE,
@@ -1579,6 +1629,11 @@ static const struct genl_ops nfc_genl_ops[] = {
1579 .doit = nfc_genl_activate_target, 1629 .doit = nfc_genl_activate_target,
1580 .policy = nfc_genl_policy, 1630 .policy = nfc_genl_policy,
1581 }, 1631 },
1632 {
1633 .cmd = NFC_CMD_VENDOR,
1634 .doit = nfc_genl_vendor_cmd,
1635 .policy = nfc_genl_policy,
1636 },
1582}; 1637};
1583 1638
1584 1639
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index a8ce80b47720..5c93e8412a26 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -30,7 +30,7 @@ struct nfc_protocol {
30 struct proto *proto; 30 struct proto *proto;
31 struct module *owner; 31 struct module *owner;
32 int (*create)(struct net *net, struct socket *sock, 32 int (*create)(struct net *net, struct socket *sock,
33 const struct nfc_protocol *nfc_proto); 33 const struct nfc_protocol *nfc_proto, int kern);
34}; 34};
35 35
36struct nfc_rawsock { 36struct nfc_rawsock {
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 82b4e8024778..e9a91488fe3d 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -334,7 +334,7 @@ static void rawsock_destruct(struct sock *sk)
334} 334}
335 335
336static int rawsock_create(struct net *net, struct socket *sock, 336static int rawsock_create(struct net *net, struct socket *sock,
337 const struct nfc_protocol *nfc_proto) 337 const struct nfc_protocol *nfc_proto, int kern)
338{ 338{
339 struct sock *sk; 339 struct sock *sk;
340 340
@@ -348,7 +348,7 @@ static int rawsock_create(struct net *net, struct socket *sock,
348 else 348 else
349 sock->ops = &rawsock_ops; 349 sock->ops = &rawsock_ops;
350 350
351 sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto); 351 sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto, kern);
352 if (!sk) 352 if (!sk)
353 return -ENOMEM; 353 return -ENOMEM;
354 354
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index ed6b0f8dd1bb..15840401a2ce 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -59,7 +59,7 @@ config OPENVSWITCH_VXLAN
59config OPENVSWITCH_GENEVE 59config OPENVSWITCH_GENEVE
60 tristate "Open vSwitch Geneve tunneling support" 60 tristate "Open vSwitch Geneve tunneling support"
61 depends on OPENVSWITCH 61 depends on OPENVSWITCH
62 depends on GENEVE 62 depends on GENEVE_CORE
63 default OPENVSWITCH 63 default OPENVSWITCH
64 ---help--- 64 ---help---
65 If you say Y here, then the Open vSwitch will be able create geneve vport. 65 If you say Y here, then the Open vSwitch will be able create geneve vport.
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index b491c1c296fe..8a8c0b8b4f63 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -608,17 +608,16 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
608} 608}
609 609
610static int output_userspace(struct datapath *dp, struct sk_buff *skb, 610static int output_userspace(struct datapath *dp, struct sk_buff *skb,
611 struct sw_flow_key *key, const struct nlattr *attr) 611 struct sw_flow_key *key, const struct nlattr *attr,
612 const struct nlattr *actions, int actions_len)
612{ 613{
613 struct ovs_tunnel_info info; 614 struct ovs_tunnel_info info;
614 struct dp_upcall_info upcall; 615 struct dp_upcall_info upcall;
615 const struct nlattr *a; 616 const struct nlattr *a;
616 int rem; 617 int rem;
617 618
619 memset(&upcall, 0, sizeof(upcall));
618 upcall.cmd = OVS_PACKET_CMD_ACTION; 620 upcall.cmd = OVS_PACKET_CMD_ACTION;
619 upcall.userdata = NULL;
620 upcall.portid = 0;
621 upcall.egress_tun_info = NULL;
622 621
623 for (a = nla_data(attr), rem = nla_len(attr); rem > 0; 622 for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
624 a = nla_next(a, &rem)) { 623 a = nla_next(a, &rem)) {
@@ -647,6 +646,13 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
647 break; 646 break;
648 } 647 }
649 648
649 case OVS_USERSPACE_ATTR_ACTIONS: {
650 /* Include actions. */
651 upcall.actions = actions;
652 upcall.actions_len = actions_len;
653 break;
654 }
655
650 } /* End of switch. */ 656 } /* End of switch. */
651 } 657 }
652 658
@@ -654,7 +660,8 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
654} 660}
655 661
656static int sample(struct datapath *dp, struct sk_buff *skb, 662static int sample(struct datapath *dp, struct sk_buff *skb,
657 struct sw_flow_key *key, const struct nlattr *attr) 663 struct sw_flow_key *key, const struct nlattr *attr,
664 const struct nlattr *actions, int actions_len)
658{ 665{
659 const struct nlattr *acts_list = NULL; 666 const struct nlattr *acts_list = NULL;
660 const struct nlattr *a; 667 const struct nlattr *a;
@@ -688,7 +695,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
688 */ 695 */
689 if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE && 696 if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
690 nla_is_last(a, rem))) 697 nla_is_last(a, rem)))
691 return output_userspace(dp, skb, key, a); 698 return output_userspace(dp, skb, key, a, actions, actions_len);
692 699
693 skb = skb_clone(skb, GFP_ATOMIC); 700 skb = skb_clone(skb, GFP_ATOMIC);
694 if (!skb) 701 if (!skb)
@@ -872,7 +879,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
872 break; 879 break;
873 880
874 case OVS_ACTION_ATTR_USERSPACE: 881 case OVS_ACTION_ATTR_USERSPACE:
875 output_userspace(dp, skb, key, a); 882 output_userspace(dp, skb, key, a, attr, len);
876 break; 883 break;
877 884
878 case OVS_ACTION_ATTR_HASH: 885 case OVS_ACTION_ATTR_HASH:
@@ -916,7 +923,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
916 break; 923 break;
917 924
918 case OVS_ACTION_ATTR_SAMPLE: 925 case OVS_ACTION_ATTR_SAMPLE:
919 err = sample(dp, skb, key, a); 926 err = sample(dp, skb, key, a, attr, len);
920 break; 927 break;
921 } 928 }
922 929
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 096c6276e6b9..ff8c4a4c1609 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -272,10 +272,9 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
272 struct dp_upcall_info upcall; 272 struct dp_upcall_info upcall;
273 int error; 273 int error;
274 274
275 memset(&upcall, 0, sizeof(upcall));
275 upcall.cmd = OVS_PACKET_CMD_MISS; 276 upcall.cmd = OVS_PACKET_CMD_MISS;
276 upcall.userdata = NULL;
277 upcall.portid = ovs_vport_find_upcall_portid(p, skb); 277 upcall.portid = ovs_vport_find_upcall_portid(p, skb);
278 upcall.egress_tun_info = NULL;
279 error = ovs_dp_upcall(dp, skb, key, &upcall); 278 error = ovs_dp_upcall(dp, skb, key, &upcall);
280 if (unlikely(error)) 279 if (unlikely(error))
281 kfree_skb(skb); 280 kfree_skb(skb);
@@ -397,6 +396,10 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
397 if (upcall_info->egress_tun_info) 396 if (upcall_info->egress_tun_info)
398 size += nla_total_size(ovs_tun_key_attr_size()); 397 size += nla_total_size(ovs_tun_key_attr_size());
399 398
399 /* OVS_PACKET_ATTR_ACTIONS */
400 if (upcall_info->actions_len)
401 size += nla_total_size(upcall_info->actions_len);
402
400 return size; 403 return size;
401} 404}
402 405
@@ -478,6 +481,17 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
478 nla_nest_end(user_skb, nla); 481 nla_nest_end(user_skb, nla);
479 } 482 }
480 483
484 if (upcall_info->actions_len) {
485 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
486 err = ovs_nla_put_actions(upcall_info->actions,
487 upcall_info->actions_len,
488 user_skb);
489 if (!err)
490 nla_nest_end(user_skb, nla);
491 else
492 nla_nest_cancel(user_skb, nla);
493 }
494
481 /* Only reserve room for attribute header, packet data is added 495 /* Only reserve room for attribute header, packet data is added
482 * in skb_zerocopy() */ 496 * in skb_zerocopy() */
483 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { 497 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
@@ -545,7 +559,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
545 /* Normally, setting the skb 'protocol' field would be handled by a 559 /* Normally, setting the skb 'protocol' field would be handled by a
546 * call to eth_type_trans(), but it assumes there's a sending 560 * call to eth_type_trans(), but it assumes there's a sending
547 * device, which we may not have. */ 561 * device, which we may not have. */
548 if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) 562 if (eth_proto_is_802_3(eth->h_proto))
549 packet->protocol = eth->h_proto; 563 packet->protocol = eth->h_proto;
550 else 564 else
551 packet->protocol = htons(ETH_P_802_2); 565 packet->protocol = htons(ETH_P_802_2);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 4ec4a480b147..cd691e935e08 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -116,6 +116,8 @@ struct ovs_skb_cb {
116struct dp_upcall_info { 116struct dp_upcall_info {
117 const struct ovs_tunnel_info *egress_tun_info; 117 const struct ovs_tunnel_info *egress_tun_info;
118 const struct nlattr *userdata; 118 const struct nlattr *userdata;
119 const struct nlattr *actions;
120 int actions_len;
119 u32 portid; 121 u32 portid;
120 u8 cmd; 122 u8 cmd;
121}; 123};
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 2dacc7b5af23..bc7b0aba994a 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -332,7 +332,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
332 proto = *(__be16 *) skb->data; 332 proto = *(__be16 *) skb->data;
333 __skb_pull(skb, sizeof(__be16)); 333 __skb_pull(skb, sizeof(__be16));
334 334
335 if (ntohs(proto) >= ETH_P_802_3_MIN) 335 if (eth_proto_is_802_3(proto))
336 return proto; 336 return proto;
337 337
338 if (skb->len < sizeof(struct llc_snap_hdr)) 338 if (skb->len < sizeof(struct llc_snap_hdr))
@@ -349,7 +349,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
349 349
350 __skb_pull(skb, sizeof(struct llc_snap_hdr)); 350 __skb_pull(skb, sizeof(struct llc_snap_hdr));
351 351
352 if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN) 352 if (eth_proto_is_802_3(llc->ethertype))
353 return llc->ethertype; 353 return llc->ethertype;
354 354
355 return htons(ETH_P_802_2); 355 return htons(ETH_P_802_2);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index c691b1a1eee0..624e41c4267f 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -816,7 +816,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
816 if (is_mask) { 816 if (is_mask) {
817 /* Always exact match EtherType. */ 817 /* Always exact match EtherType. */
818 eth_type = htons(0xffff); 818 eth_type = htons(0xffff);
819 } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { 819 } else if (!eth_proto_is_802_3(eth_type)) {
820 OVS_NLERR(log, "EtherType %x is less than min %x", 820 OVS_NLERR(log, "EtherType %x is less than min %x",
821 ntohs(eth_type), ETH_P_802_3_MIN); 821 ntohs(eth_type), ETH_P_802_3_MIN);
822 return -EINVAL; 822 return -EINVAL;
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index bf02fd5808c9..208c576bd1b6 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -46,11 +46,6 @@ static inline struct geneve_port *geneve_vport(const struct vport *vport)
46 return vport_priv(vport); 46 return vport_priv(vport);
47} 47}
48 48
49static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
50{
51 return (struct genevehdr *)(udp_hdr(skb) + 1);
52}
53
54/* Convert 64 bit tunnel ID to 24 bit VNI. */ 49/* Convert 64 bit tunnel ID to 24 bit VNI. */
55static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) 50static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
56{ 51{
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b5989c6ee551..c9e8741226c6 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -543,15 +543,11 @@ static void prb_init_blk_timer(struct packet_sock *po,
543 pkc->retire_blk_timer.expires = jiffies; 543 pkc->retire_blk_timer.expires = jiffies;
544} 544}
545 545
546static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring) 546static void prb_setup_retire_blk_timer(struct packet_sock *po)
547{ 547{
548 struct tpacket_kbdq_core *pkc; 548 struct tpacket_kbdq_core *pkc;
549 549
550 if (tx_ring) 550 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
551 BUG();
552
553 pkc = tx_ring ? GET_PBDQC_FROM_RB(&po->tx_ring) :
554 GET_PBDQC_FROM_RB(&po->rx_ring);
555 prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired); 551 prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired);
556} 552}
557 553
@@ -607,7 +603,7 @@ static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
607static void init_prb_bdqc(struct packet_sock *po, 603static void init_prb_bdqc(struct packet_sock *po,
608 struct packet_ring_buffer *rb, 604 struct packet_ring_buffer *rb,
609 struct pgv *pg_vec, 605 struct pgv *pg_vec,
610 union tpacket_req_u *req_u, int tx_ring) 606 union tpacket_req_u *req_u)
611{ 607{
612 struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb); 608 struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb);
613 struct tpacket_block_desc *pbd; 609 struct tpacket_block_desc *pbd;
@@ -634,7 +630,7 @@ static void init_prb_bdqc(struct packet_sock *po,
634 630
635 p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv); 631 p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
636 prb_init_ft_ops(p1, req_u); 632 prb_init_ft_ops(p1, req_u);
637 prb_setup_retire_blk_timer(po, tx_ring); 633 prb_setup_retire_blk_timer(po);
638 prb_open_block(p1, pbd); 634 prb_open_block(p1, pbd);
639} 635}
640 636
@@ -1234,27 +1230,81 @@ static void packet_free_pending(struct packet_sock *po)
1234 free_percpu(po->tx_ring.pending_refcnt); 1230 free_percpu(po->tx_ring.pending_refcnt);
1235} 1231}
1236 1232
1237static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) 1233#define ROOM_POW_OFF 2
1234#define ROOM_NONE 0x0
1235#define ROOM_LOW 0x1
1236#define ROOM_NORMAL 0x2
1237
1238static bool __tpacket_has_room(struct packet_sock *po, int pow_off)
1239{
1240 int idx, len;
1241
1242 len = po->rx_ring.frame_max + 1;
1243 idx = po->rx_ring.head;
1244 if (pow_off)
1245 idx += len >> pow_off;
1246 if (idx >= len)
1247 idx -= len;
1248 return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
1249}
1250
1251static bool __tpacket_v3_has_room(struct packet_sock *po, int pow_off)
1252{
1253 int idx, len;
1254
1255 len = po->rx_ring.prb_bdqc.knum_blocks;
1256 idx = po->rx_ring.prb_bdqc.kactive_blk_num;
1257 if (pow_off)
1258 idx += len >> pow_off;
1259 if (idx >= len)
1260 idx -= len;
1261 return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
1262}
1263
1264static int __packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
1238{ 1265{
1239 struct sock *sk = &po->sk; 1266 struct sock *sk = &po->sk;
1240 bool has_room; 1267 int ret = ROOM_NONE;
1268
1269 if (po->prot_hook.func != tpacket_rcv) {
1270 int avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc)
1271 - (skb ? skb->truesize : 0);
1272 if (avail > (sk->sk_rcvbuf >> ROOM_POW_OFF))
1273 return ROOM_NORMAL;
1274 else if (avail > 0)
1275 return ROOM_LOW;
1276 else
1277 return ROOM_NONE;
1278 }
1241 1279
1242 if (po->prot_hook.func != tpacket_rcv) 1280 if (po->tp_version == TPACKET_V3) {
1243 return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize) 1281 if (__tpacket_v3_has_room(po, ROOM_POW_OFF))
1244 <= sk->sk_rcvbuf; 1282 ret = ROOM_NORMAL;
1283 else if (__tpacket_v3_has_room(po, 0))
1284 ret = ROOM_LOW;
1285 } else {
1286 if (__tpacket_has_room(po, ROOM_POW_OFF))
1287 ret = ROOM_NORMAL;
1288 else if (__tpacket_has_room(po, 0))
1289 ret = ROOM_LOW;
1290 }
1245 1291
1246 spin_lock(&sk->sk_receive_queue.lock); 1292 return ret;
1247 if (po->tp_version == TPACKET_V3) 1293}
1248 has_room = prb_lookup_block(po, &po->rx_ring,
1249 po->rx_ring.prb_bdqc.kactive_blk_num,
1250 TP_STATUS_KERNEL);
1251 else
1252 has_room = packet_lookup_frame(po, &po->rx_ring,
1253 po->rx_ring.head,
1254 TP_STATUS_KERNEL);
1255 spin_unlock(&sk->sk_receive_queue.lock);
1256 1294
1257 return has_room; 1295static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
1296{
1297 int ret;
1298 bool has_room;
1299
1300 spin_lock_bh(&po->sk.sk_receive_queue.lock);
1301 ret = __packet_rcv_has_room(po, skb);
1302 has_room = ret == ROOM_NORMAL;
1303 if (po->pressure == has_room)
1304 po->pressure = !has_room;
1305 spin_unlock_bh(&po->sk.sk_receive_queue.lock);
1306
1307 return ret;
1258} 1308}
1259 1309
1260static void packet_sock_destruct(struct sock *sk) 1310static void packet_sock_destruct(struct sock *sk)
@@ -1272,14 +1322,18 @@ static void packet_sock_destruct(struct sock *sk)
1272 sk_refcnt_debug_dec(sk); 1322 sk_refcnt_debug_dec(sk);
1273} 1323}
1274 1324
1275static int fanout_rr_next(struct packet_fanout *f, unsigned int num) 1325static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb)
1276{ 1326{
1277 int x = atomic_read(&f->rr_cur) + 1; 1327 u32 rxhash;
1328 int i, count = 0;
1278 1329
1279 if (x >= num) 1330 rxhash = skb_get_hash(skb);
1280 x = 0; 1331 for (i = 0; i < ROLLOVER_HLEN; i++)
1332 if (po->rollover->history[i] == rxhash)
1333 count++;
1281 1334
1282 return x; 1335 po->rollover->history[prandom_u32() % ROLLOVER_HLEN] = rxhash;
1336 return count > (ROLLOVER_HLEN >> 1);
1283} 1337}
1284 1338
1285static unsigned int fanout_demux_hash(struct packet_fanout *f, 1339static unsigned int fanout_demux_hash(struct packet_fanout *f,
@@ -1293,13 +1347,9 @@ static unsigned int fanout_demux_lb(struct packet_fanout *f,
1293 struct sk_buff *skb, 1347 struct sk_buff *skb,
1294 unsigned int num) 1348 unsigned int num)
1295{ 1349{
1296 int cur, old; 1350 unsigned int val = atomic_inc_return(&f->rr_cur);
1297 1351
1298 cur = atomic_read(&f->rr_cur); 1352 return val % num;
1299 while ((old = atomic_cmpxchg(&f->rr_cur, cur,
1300 fanout_rr_next(f, num))) != cur)
1301 cur = old;
1302 return cur;
1303} 1353}
1304 1354
1305static unsigned int fanout_demux_cpu(struct packet_fanout *f, 1355static unsigned int fanout_demux_cpu(struct packet_fanout *f,
@@ -1318,22 +1368,40 @@ static unsigned int fanout_demux_rnd(struct packet_fanout *f,
1318 1368
1319static unsigned int fanout_demux_rollover(struct packet_fanout *f, 1369static unsigned int fanout_demux_rollover(struct packet_fanout *f,
1320 struct sk_buff *skb, 1370 struct sk_buff *skb,
1321 unsigned int idx, unsigned int skip, 1371 unsigned int idx, bool try_self,
1322 unsigned int num) 1372 unsigned int num)
1323{ 1373{
1324 unsigned int i, j; 1374 struct packet_sock *po, *po_next, *po_skip = NULL;
1375 unsigned int i, j, room = ROOM_NONE;
1325 1376
1326 i = j = min_t(int, f->next[idx], num - 1); 1377 po = pkt_sk(f->arr[idx]);
1378
1379 if (try_self) {
1380 room = packet_rcv_has_room(po, skb);
1381 if (room == ROOM_NORMAL ||
1382 (room == ROOM_LOW && !fanout_flow_is_huge(po, skb)))
1383 return idx;
1384 po_skip = po;
1385 }
1386
1387 i = j = min_t(int, po->rollover->sock, num - 1);
1327 do { 1388 do {
1328 if (i != skip && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) { 1389 po_next = pkt_sk(f->arr[i]);
1390 if (po_next != po_skip && !po_next->pressure &&
1391 packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
1329 if (i != j) 1392 if (i != j)
1330 f->next[idx] = i; 1393 po->rollover->sock = i;
1394 atomic_long_inc(&po->rollover->num);
1395 if (room == ROOM_LOW)
1396 atomic_long_inc(&po->rollover->num_huge);
1331 return i; 1397 return i;
1332 } 1398 }
1399
1333 if (++i == num) 1400 if (++i == num)
1334 i = 0; 1401 i = 0;
1335 } while (i != j); 1402 } while (i != j);
1336 1403
1404 atomic_long_inc(&po->rollover->num_failed);
1337 return idx; 1405 return idx;
1338} 1406}
1339 1407
@@ -1353,7 +1421,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1353 struct packet_type *pt, struct net_device *orig_dev) 1421 struct packet_type *pt, struct net_device *orig_dev)
1354{ 1422{
1355 struct packet_fanout *f = pt->af_packet_priv; 1423 struct packet_fanout *f = pt->af_packet_priv;
1356 unsigned int num = f->num_members; 1424 unsigned int num = READ_ONCE(f->num_members);
1357 struct packet_sock *po; 1425 struct packet_sock *po;
1358 unsigned int idx; 1426 unsigned int idx;
1359 1427
@@ -1386,17 +1454,14 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1386 idx = fanout_demux_qm(f, skb, num); 1454 idx = fanout_demux_qm(f, skb, num);
1387 break; 1455 break;
1388 case PACKET_FANOUT_ROLLOVER: 1456 case PACKET_FANOUT_ROLLOVER:
1389 idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num); 1457 idx = fanout_demux_rollover(f, skb, 0, false, num);
1390 break; 1458 break;
1391 } 1459 }
1392 1460
1393 po = pkt_sk(f->arr[idx]); 1461 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
1394 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER) && 1462 idx = fanout_demux_rollover(f, skb, idx, true, num);
1395 unlikely(!packet_rcv_has_room(po, skb))) {
1396 idx = fanout_demux_rollover(f, skb, idx, idx, num);
1397 po = pkt_sk(f->arr[idx]);
1398 }
1399 1463
1464 po = pkt_sk(f->arr[idx]);
1400 return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); 1465 return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
1401} 1466}
1402 1467
@@ -1467,6 +1532,16 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1467 if (po->fanout) 1532 if (po->fanout)
1468 return -EALREADY; 1533 return -EALREADY;
1469 1534
1535 if (type == PACKET_FANOUT_ROLLOVER ||
1536 (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) {
1537 po->rollover = kzalloc(sizeof(*po->rollover), GFP_KERNEL);
1538 if (!po->rollover)
1539 return -ENOMEM;
1540 atomic_long_set(&po->rollover->num, 0);
1541 atomic_long_set(&po->rollover->num_huge, 0);
1542 atomic_long_set(&po->rollover->num_failed, 0);
1543 }
1544
1470 mutex_lock(&fanout_mutex); 1545 mutex_lock(&fanout_mutex);
1471 match = NULL; 1546 match = NULL;
1472 list_for_each_entry(f, &fanout_list, list) { 1547 list_for_each_entry(f, &fanout_list, list) {
@@ -1515,6 +1590,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1515 } 1590 }
1516out: 1591out:
1517 mutex_unlock(&fanout_mutex); 1592 mutex_unlock(&fanout_mutex);
1593 if (err) {
1594 kfree(po->rollover);
1595 po->rollover = NULL;
1596 }
1518 return err; 1597 return err;
1519} 1598}
1520 1599
@@ -1536,6 +1615,9 @@ static void fanout_release(struct sock *sk)
1536 kfree(f); 1615 kfree(f);
1537 } 1616 }
1538 mutex_unlock(&fanout_mutex); 1617 mutex_unlock(&fanout_mutex);
1618
1619 if (po->rollover)
1620 kfree_rcu(po->rollover, rcu);
1539} 1621}
1540 1622
1541static const struct proto_ops packet_ops; 1623static const struct proto_ops packet_ops;
@@ -2835,7 +2917,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
2835 sock->state = SS_UNCONNECTED; 2917 sock->state = SS_UNCONNECTED;
2836 2918
2837 err = -ENOBUFS; 2919 err = -ENOBUFS;
2838 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto); 2920 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern);
2839 if (sk == NULL) 2921 if (sk == NULL)
2840 goto out; 2922 goto out;
2841 2923
@@ -2865,6 +2947,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
2865 2947
2866 spin_lock_init(&po->bind_lock); 2948 spin_lock_init(&po->bind_lock);
2867 mutex_init(&po->pg_vec_lock); 2949 mutex_init(&po->pg_vec_lock);
2950 po->rollover = NULL;
2868 po->prot_hook.func = packet_rcv; 2951 po->prot_hook.func = packet_rcv;
2869 2952
2870 if (sock->type == SOCK_PACKET) 2953 if (sock->type == SOCK_PACKET)
@@ -2942,6 +3025,9 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
2942 if (skb == NULL) 3025 if (skb == NULL)
2943 goto out; 3026 goto out;
2944 3027
3028 if (pkt_sk(sk)->pressure)
3029 packet_rcv_has_room(pkt_sk(sk), NULL);
3030
2945 if (pkt_sk(sk)->has_vnet_hdr) { 3031 if (pkt_sk(sk)->has_vnet_hdr) {
2946 struct virtio_net_hdr vnet_hdr = { 0 }; 3032 struct virtio_net_hdr vnet_hdr = { 0 };
2947 3033
@@ -3485,6 +3571,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
3485 struct packet_sock *po = pkt_sk(sk); 3571 struct packet_sock *po = pkt_sk(sk);
3486 void *data = &val; 3572 void *data = &val;
3487 union tpacket_stats_u st; 3573 union tpacket_stats_u st;
3574 struct tpacket_rollover_stats rstats;
3488 3575
3489 if (level != SOL_PACKET) 3576 if (level != SOL_PACKET)
3490 return -ENOPROTOOPT; 3577 return -ENOPROTOOPT;
@@ -3560,6 +3647,15 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
3560 ((u32)po->fanout->flags << 24)) : 3647 ((u32)po->fanout->flags << 24)) :
3561 0); 3648 0);
3562 break; 3649 break;
3650 case PACKET_ROLLOVER_STATS:
3651 if (!po->rollover)
3652 return -EINVAL;
3653 rstats.tp_all = atomic_long_read(&po->rollover->num);
3654 rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
3655 rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
3656 data = &rstats;
3657 lv = sizeof(rstats);
3658 break;
3563 case PACKET_TX_HAS_OFF: 3659 case PACKET_TX_HAS_OFF:
3564 val = po->tp_tx_has_off; 3660 val = po->tp_tx_has_off;
3565 break; 3661 break;
@@ -3697,6 +3793,8 @@ static unsigned int packet_poll(struct file *file, struct socket *sock,
3697 TP_STATUS_KERNEL)) 3793 TP_STATUS_KERNEL))
3698 mask |= POLLIN | POLLRDNORM; 3794 mask |= POLLIN | POLLRDNORM;
3699 } 3795 }
3796 if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
3797 po->pressure = 0;
3700 spin_unlock_bh(&sk->sk_receive_queue.lock); 3798 spin_unlock_bh(&sk->sk_receive_queue.lock);
3701 spin_lock_bh(&sk->sk_write_queue.lock); 3799 spin_lock_bh(&sk->sk_write_queue.lock);
3702 if (po->tx_ring.pg_vec) { 3800 if (po->tx_ring.pg_vec) {
@@ -3886,7 +3984,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
3886 * it above but just being paranoid 3984 * it above but just being paranoid
3887 */ 3985 */
3888 if (!tx_ring) 3986 if (!tx_ring)
3889 init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring); 3987 init_prb_bdqc(po, rb, pg_vec, req_u);
3890 break; 3988 break;
3891 default: 3989 default:
3892 break; 3990 break;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index fe6e20caea1d..e20b3e8829b8 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -82,12 +82,21 @@ struct packet_fanout {
82 atomic_t rr_cur; 82 atomic_t rr_cur;
83 struct list_head list; 83 struct list_head list;
84 struct sock *arr[PACKET_FANOUT_MAX]; 84 struct sock *arr[PACKET_FANOUT_MAX];
85 int next[PACKET_FANOUT_MAX];
86 spinlock_t lock; 85 spinlock_t lock;
87 atomic_t sk_ref; 86 atomic_t sk_ref;
88 struct packet_type prot_hook ____cacheline_aligned_in_smp; 87 struct packet_type prot_hook ____cacheline_aligned_in_smp;
89}; 88};
90 89
90struct packet_rollover {
91 int sock;
92 struct rcu_head rcu;
93 atomic_long_t num;
94 atomic_long_t num_huge;
95 atomic_long_t num_failed;
96#define ROLLOVER_HLEN (L1_CACHE_BYTES / sizeof(u32))
97 u32 history[ROLLOVER_HLEN] ____cacheline_aligned;
98} ____cacheline_aligned_in_smp;
99
91struct packet_sock { 100struct packet_sock {
92 /* struct sock has to be the first member of packet_sock */ 101 /* struct sock has to be the first member of packet_sock */
93 struct sock sk; 102 struct sock sk;
@@ -102,8 +111,10 @@ struct packet_sock {
102 auxdata:1, 111 auxdata:1,
103 origdev:1, 112 origdev:1,
104 has_vnet_hdr:1; 113 has_vnet_hdr:1;
114 int pressure;
105 int ifindex; /* bound device */ 115 int ifindex; /* bound device */
106 __be16 num; 116 __be16 num;
117 struct packet_rollover *rollover;
107 struct packet_mclist *mclist; 118 struct packet_mclist *mclist;
108 atomic_t mapped; 119 atomic_t mapped;
109 enum tpacket_versions tp_version; 120 enum tpacket_versions tp_version;
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 32ab87d34828..10d42f3220ab 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -97,7 +97,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol,
97 goto out; 97 goto out;
98 } 98 }
99 99
100 sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot); 100 sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot, kern);
101 if (sk == NULL) { 101 if (sk == NULL) {
102 err = -ENOMEM; 102 err = -ENOMEM;
103 goto out; 103 goto out;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 6de2aeb98a1f..850a86cde0b3 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -845,7 +845,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
845 } 845 }
846 846
847 /* Create a new to-be-accepted sock */ 847 /* Create a new to-be-accepted sock */
848 newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot); 848 newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, 0);
849 if (!newsk) { 849 if (!newsk) {
850 pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL); 850 pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL);
851 err = -ENOBUFS; 851 err = -ENOBUFS;
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 10443377fb9d..896834cd3b9a 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -40,15 +40,6 @@
40 40
41#include "rds.h" 41#include "rds.h"
42 42
43char *rds_str_array(char **array, size_t elements, size_t index)
44{
45 if ((index < elements) && array[index])
46 return array[index];
47 else
48 return "unknown";
49}
50EXPORT_SYMBOL(rds_str_array);
51
52/* this is just used for stats gathering :/ */ 43/* this is just used for stats gathering :/ */
53static DEFINE_SPINLOCK(rds_sock_lock); 44static DEFINE_SPINLOCK(rds_sock_lock);
54static unsigned long rds_sock_count; 45static unsigned long rds_sock_count;
@@ -270,6 +261,28 @@ static int rds_cong_monitor(struct rds_sock *rs, char __user *optval,
270 return ret; 261 return ret;
271} 262}
272 263
264static int rds_set_transport(struct rds_sock *rs, char __user *optval,
265 int optlen)
266{
267 int t_type;
268
269 if (rs->rs_transport)
270 return -EOPNOTSUPP; /* previously attached to transport */
271
272 if (optlen != sizeof(int))
273 return -EINVAL;
274
275 if (copy_from_user(&t_type, (int __user *)optval, sizeof(t_type)))
276 return -EFAULT;
277
278 if (t_type < 0 || t_type >= RDS_TRANS_COUNT)
279 return -EINVAL;
280
281 rs->rs_transport = rds_trans_get(t_type);
282
283 return rs->rs_transport ? 0 : -ENOPROTOOPT;
284}
285
273static int rds_setsockopt(struct socket *sock, int level, int optname, 286static int rds_setsockopt(struct socket *sock, int level, int optname,
274 char __user *optval, unsigned int optlen) 287 char __user *optval, unsigned int optlen)
275{ 288{
@@ -300,6 +313,11 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
300 case RDS_CONG_MONITOR: 313 case RDS_CONG_MONITOR:
301 ret = rds_cong_monitor(rs, optval, optlen); 314 ret = rds_cong_monitor(rs, optval, optlen);
302 break; 315 break;
316 case SO_RDS_TRANSPORT:
317 lock_sock(sock->sk);
318 ret = rds_set_transport(rs, optval, optlen);
319 release_sock(sock->sk);
320 break;
303 default: 321 default:
304 ret = -ENOPROTOOPT; 322 ret = -ENOPROTOOPT;
305 } 323 }
@@ -312,6 +330,7 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
312{ 330{
313 struct rds_sock *rs = rds_sk_to_rs(sock->sk); 331 struct rds_sock *rs = rds_sk_to_rs(sock->sk);
314 int ret = -ENOPROTOOPT, len; 332 int ret = -ENOPROTOOPT, len;
333 int trans;
315 334
316 if (level != SOL_RDS) 335 if (level != SOL_RDS)
317 goto out; 336 goto out;
@@ -337,6 +356,19 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
337 else 356 else
338 ret = 0; 357 ret = 0;
339 break; 358 break;
359 case SO_RDS_TRANSPORT:
360 if (len < sizeof(int)) {
361 ret = -EINVAL;
362 break;
363 }
364 trans = (rs->rs_transport ? rs->rs_transport->t_type :
365 RDS_TRANS_NONE); /* unbound */
366 if (put_user(trans, (int __user *)optval) ||
367 put_user(sizeof(int), optlen))
368 ret = -EFAULT;
369 else
370 ret = 0;
371 break;
340 default: 372 default:
341 break; 373 break;
342 } 374 }
@@ -440,7 +472,7 @@ static int rds_create(struct net *net, struct socket *sock, int protocol,
440 if (sock->type != SOCK_SEQPACKET || protocol) 472 if (sock->type != SOCK_SEQPACKET || protocol)
441 return -ESOCKTNOSUPPORT; 473 return -ESOCKTNOSUPPORT;
442 474
443 sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto); 475 sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto, kern);
444 if (!sk) 476 if (!sk)
445 return -ENOMEM; 477 return -ENOMEM;
446 478
diff --git a/net/rds/bind.c b/net/rds/bind.c
index a2e6562da751..4ebd29c128b6 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -181,6 +181,10 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
181 if (ret) 181 if (ret)
182 goto out; 182 goto out;
183 183
184 if (rs->rs_transport) { /* previously bound */
185 ret = 0;
186 goto out;
187 }
184 trans = rds_trans_get_preferred(sin->sin_addr.s_addr); 188 trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
185 if (!trans) { 189 if (!trans) {
186 ret = -EADDRNOTAVAIL; 190 ret = -EADDRNOTAVAIL;
diff --git a/net/rds/ib.h b/net/rds/ib.h
index c36d713229e0..86d88ec5d556 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -235,28 +235,34 @@ extern struct workqueue_struct *rds_ib_wq;
235 * doesn't define it. 235 * doesn't define it.
236 */ 236 */
237static inline void rds_ib_dma_sync_sg_for_cpu(struct ib_device *dev, 237static inline void rds_ib_dma_sync_sg_for_cpu(struct ib_device *dev,
238 struct scatterlist *sg, unsigned int sg_dma_len, int direction) 238 struct scatterlist *sglist,
239 unsigned int sg_dma_len,
240 int direction)
239{ 241{
242 struct scatterlist *sg;
240 unsigned int i; 243 unsigned int i;
241 244
242 for (i = 0; i < sg_dma_len; ++i) { 245 for_each_sg(sglist, sg, sg_dma_len, i) {
243 ib_dma_sync_single_for_cpu(dev, 246 ib_dma_sync_single_for_cpu(dev,
244 ib_sg_dma_address(dev, &sg[i]), 247 ib_sg_dma_address(dev, sg),
245 ib_sg_dma_len(dev, &sg[i]), 248 ib_sg_dma_len(dev, sg),
246 direction); 249 direction);
247 } 250 }
248} 251}
249#define ib_dma_sync_sg_for_cpu rds_ib_dma_sync_sg_for_cpu 252#define ib_dma_sync_sg_for_cpu rds_ib_dma_sync_sg_for_cpu
250 253
251static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev, 254static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev,
252 struct scatterlist *sg, unsigned int sg_dma_len, int direction) 255 struct scatterlist *sglist,
256 unsigned int sg_dma_len,
257 int direction)
253{ 258{
259 struct scatterlist *sg;
254 unsigned int i; 260 unsigned int i;
255 261
256 for (i = 0; i < sg_dma_len; ++i) { 262 for_each_sg(sglist, sg, sg_dma_len, i) {
257 ib_dma_sync_single_for_device(dev, 263 ib_dma_sync_single_for_device(dev,
258 ib_sg_dma_address(dev, &sg[i]), 264 ib_sg_dma_address(dev, sg),
259 ib_sg_dma_len(dev, &sg[i]), 265 ib_sg_dma_len(dev, sg),
260 direction); 266 direction);
261 } 267 }
262} 268}
@@ -339,7 +345,6 @@ u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest);
339extern wait_queue_head_t rds_ib_ring_empty_wait; 345extern wait_queue_head_t rds_ib_ring_empty_wait;
340 346
341/* ib_send.c */ 347/* ib_send.c */
342char *rds_ib_wc_status_str(enum ib_wc_status status);
343void rds_ib_xmit_complete(struct rds_connection *conn); 348void rds_ib_xmit_complete(struct rds_connection *conn);
344int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, 349int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
345 unsigned int hdr_off, unsigned int sg, unsigned int off); 350 unsigned int hdr_off, unsigned int sg, unsigned int off);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 8a09ee7db3c1..0da2a45b33bd 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -39,36 +39,6 @@
39#include "rds.h" 39#include "rds.h"
40#include "ib.h" 40#include "ib.h"
41 41
42static char *rds_ib_event_type_strings[] = {
43#define RDS_IB_EVENT_STRING(foo) \
44 [IB_EVENT_##foo] = __stringify(IB_EVENT_##foo)
45 RDS_IB_EVENT_STRING(CQ_ERR),
46 RDS_IB_EVENT_STRING(QP_FATAL),
47 RDS_IB_EVENT_STRING(QP_REQ_ERR),
48 RDS_IB_EVENT_STRING(QP_ACCESS_ERR),
49 RDS_IB_EVENT_STRING(COMM_EST),
50 RDS_IB_EVENT_STRING(SQ_DRAINED),
51 RDS_IB_EVENT_STRING(PATH_MIG),
52 RDS_IB_EVENT_STRING(PATH_MIG_ERR),
53 RDS_IB_EVENT_STRING(DEVICE_FATAL),
54 RDS_IB_EVENT_STRING(PORT_ACTIVE),
55 RDS_IB_EVENT_STRING(PORT_ERR),
56 RDS_IB_EVENT_STRING(LID_CHANGE),
57 RDS_IB_EVENT_STRING(PKEY_CHANGE),
58 RDS_IB_EVENT_STRING(SM_CHANGE),
59 RDS_IB_EVENT_STRING(SRQ_ERR),
60 RDS_IB_EVENT_STRING(SRQ_LIMIT_REACHED),
61 RDS_IB_EVENT_STRING(QP_LAST_WQE_REACHED),
62 RDS_IB_EVENT_STRING(CLIENT_REREGISTER),
63#undef RDS_IB_EVENT_STRING
64};
65
66static char *rds_ib_event_str(enum ib_event_type type)
67{
68 return rds_str_array(rds_ib_event_type_strings,
69 ARRAY_SIZE(rds_ib_event_type_strings), type);
70};
71
72/* 42/*
73 * Set the selected protocol version 43 * Set the selected protocol version
74 */ 44 */
@@ -243,7 +213,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
243static void rds_ib_cq_event_handler(struct ib_event *event, void *data) 213static void rds_ib_cq_event_handler(struct ib_event *event, void *data)
244{ 214{
245 rdsdebug("event %u (%s) data %p\n", 215 rdsdebug("event %u (%s) data %p\n",
246 event->event, rds_ib_event_str(event->event), data); 216 event->event, ib_event_msg(event->event), data);
247} 217}
248 218
249static void rds_ib_qp_event_handler(struct ib_event *event, void *data) 219static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
@@ -252,7 +222,7 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
252 struct rds_ib_connection *ic = conn->c_transport_data; 222 struct rds_ib_connection *ic = conn->c_transport_data;
253 223
254 rdsdebug("conn %p ic %p event %u (%s)\n", conn, ic, event->event, 224 rdsdebug("conn %p ic %p event %u (%s)\n", conn, ic, event->event,
255 rds_ib_event_str(event->event)); 225 ib_event_msg(event->event));
256 226
257 switch (event->event) { 227 switch (event->event) {
258 case IB_EVENT_COMM_EST: 228 case IB_EVENT_COMM_EST:
@@ -261,7 +231,7 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
261 default: 231 default:
262 rdsdebug("Fatal QP Event %u (%s) " 232 rdsdebug("Fatal QP Event %u (%s) "
263 "- connection %pI4->%pI4, reconnecting\n", 233 "- connection %pI4->%pI4, reconnecting\n",
264 event->event, rds_ib_event_str(event->event), 234 event->event, ib_event_msg(event->event),
265 &conn->c_laddr, &conn->c_faddr); 235 &conn->c_laddr, &conn->c_faddr);
266 rds_conn_drop(conn); 236 rds_conn_drop(conn);
267 break; 237 break;
@@ -277,6 +247,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
277 struct rds_ib_connection *ic = conn->c_transport_data; 247 struct rds_ib_connection *ic = conn->c_transport_data;
278 struct ib_device *dev = ic->i_cm_id->device; 248 struct ib_device *dev = ic->i_cm_id->device;
279 struct ib_qp_init_attr attr; 249 struct ib_qp_init_attr attr;
250 struct ib_cq_init_attr cq_attr = {};
280 struct rds_ib_device *rds_ibdev; 251 struct rds_ib_device *rds_ibdev;
281 int ret; 252 int ret;
282 253
@@ -300,9 +271,10 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
300 ic->i_pd = rds_ibdev->pd; 271 ic->i_pd = rds_ibdev->pd;
301 ic->i_mr = rds_ibdev->mr; 272 ic->i_mr = rds_ibdev->mr;
302 273
274 cq_attr.cqe = ic->i_send_ring.w_nr + 1;
303 ic->i_send_cq = ib_create_cq(dev, rds_ib_send_cq_comp_handler, 275 ic->i_send_cq = ib_create_cq(dev, rds_ib_send_cq_comp_handler,
304 rds_ib_cq_event_handler, conn, 276 rds_ib_cq_event_handler, conn,
305 ic->i_send_ring.w_nr + 1, 0); 277 &cq_attr);
306 if (IS_ERR(ic->i_send_cq)) { 278 if (IS_ERR(ic->i_send_cq)) {
307 ret = PTR_ERR(ic->i_send_cq); 279 ret = PTR_ERR(ic->i_send_cq);
308 ic->i_send_cq = NULL; 280 ic->i_send_cq = NULL;
@@ -310,9 +282,10 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
310 goto out; 282 goto out;
311 } 283 }
312 284
285 cq_attr.cqe = ic->i_recv_ring.w_nr;
313 ic->i_recv_cq = ib_create_cq(dev, rds_ib_recv_cq_comp_handler, 286 ic->i_recv_cq = ib_create_cq(dev, rds_ib_recv_cq_comp_handler,
314 rds_ib_cq_event_handler, conn, 287 rds_ib_cq_event_handler, conn,
315 ic->i_recv_ring.w_nr, 0); 288 &cq_attr);
316 if (IS_ERR(ic->i_recv_cq)) { 289 if (IS_ERR(ic->i_recv_cq)) {
317 ret = PTR_ERR(ic->i_recv_cq); 290 ret = PTR_ERR(ic->i_recv_cq);
318 ic->i_recv_cq = NULL; 291 ic->i_recv_cq = NULL;
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 273b8bff6ba4..657ba9f5d308 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -759,8 +759,10 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
759 } 759 }
760 760
761 ibmr = rds_ib_alloc_fmr(rds_ibdev); 761 ibmr = rds_ib_alloc_fmr(rds_ibdev);
762 if (IS_ERR(ibmr)) 762 if (IS_ERR(ibmr)) {
763 rds_ib_dev_put(rds_ibdev);
763 return ibmr; 764 return ibmr;
765 }
764 766
765 ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents); 767 ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents);
766 if (ret == 0) 768 if (ret == 0)
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 1b981a4e42c2..cac5b4506ee3 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -956,7 +956,7 @@ static inline void rds_poll_cq(struct rds_ib_connection *ic,
956 while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) { 956 while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) {
957 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n", 957 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
958 (unsigned long long)wc.wr_id, wc.status, 958 (unsigned long long)wc.wr_id, wc.status,
959 rds_ib_wc_status_str(wc.status), wc.byte_len, 959 ib_wc_status_msg(wc.status), wc.byte_len,
960 be32_to_cpu(wc.ex.imm_data)); 960 be32_to_cpu(wc.ex.imm_data));
961 rds_ib_stats_inc(s_ib_rx_cq_event); 961 rds_ib_stats_inc(s_ib_rx_cq_event);
962 962
@@ -978,7 +978,7 @@ static inline void rds_poll_cq(struct rds_ib_connection *ic,
978 "status %u (%s), disconnecting and " 978 "status %u (%s), disconnecting and "
979 "reconnecting\n", &conn->c_faddr, 979 "reconnecting\n", &conn->c_faddr,
980 wc.status, 980 wc.status,
981 rds_ib_wc_status_str(wc.status)); 981 ib_wc_status_msg(wc.status));
982 } 982 }
983 983
984 /* 984 /*
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index bd3825d38abc..5d0a704fa039 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -39,40 +39,6 @@
39#include "rds.h" 39#include "rds.h"
40#include "ib.h" 40#include "ib.h"
41 41
42static char *rds_ib_wc_status_strings[] = {
43#define RDS_IB_WC_STATUS_STR(foo) \
44 [IB_WC_##foo] = __stringify(IB_WC_##foo)
45 RDS_IB_WC_STATUS_STR(SUCCESS),
46 RDS_IB_WC_STATUS_STR(LOC_LEN_ERR),
47 RDS_IB_WC_STATUS_STR(LOC_QP_OP_ERR),
48 RDS_IB_WC_STATUS_STR(LOC_EEC_OP_ERR),
49 RDS_IB_WC_STATUS_STR(LOC_PROT_ERR),
50 RDS_IB_WC_STATUS_STR(WR_FLUSH_ERR),
51 RDS_IB_WC_STATUS_STR(MW_BIND_ERR),
52 RDS_IB_WC_STATUS_STR(BAD_RESP_ERR),
53 RDS_IB_WC_STATUS_STR(LOC_ACCESS_ERR),
54 RDS_IB_WC_STATUS_STR(REM_INV_REQ_ERR),
55 RDS_IB_WC_STATUS_STR(REM_ACCESS_ERR),
56 RDS_IB_WC_STATUS_STR(REM_OP_ERR),
57 RDS_IB_WC_STATUS_STR(RETRY_EXC_ERR),
58 RDS_IB_WC_STATUS_STR(RNR_RETRY_EXC_ERR),
59 RDS_IB_WC_STATUS_STR(LOC_RDD_VIOL_ERR),
60 RDS_IB_WC_STATUS_STR(REM_INV_RD_REQ_ERR),
61 RDS_IB_WC_STATUS_STR(REM_ABORT_ERR),
62 RDS_IB_WC_STATUS_STR(INV_EECN_ERR),
63 RDS_IB_WC_STATUS_STR(INV_EEC_STATE_ERR),
64 RDS_IB_WC_STATUS_STR(FATAL_ERR),
65 RDS_IB_WC_STATUS_STR(RESP_TIMEOUT_ERR),
66 RDS_IB_WC_STATUS_STR(GENERAL_ERR),
67#undef RDS_IB_WC_STATUS_STR
68};
69
70char *rds_ib_wc_status_str(enum ib_wc_status status)
71{
72 return rds_str_array(rds_ib_wc_status_strings,
73 ARRAY_SIZE(rds_ib_wc_status_strings), status);
74}
75
76/* 42/*
77 * Convert IB-specific error message to RDS error message and call core 43 * Convert IB-specific error message to RDS error message and call core
78 * completion handler. 44 * completion handler.
@@ -293,7 +259,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
293 while (ib_poll_cq(cq, 1, &wc) > 0) { 259 while (ib_poll_cq(cq, 1, &wc) > 0) {
294 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n", 260 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
295 (unsigned long long)wc.wr_id, wc.status, 261 (unsigned long long)wc.wr_id, wc.status,
296 rds_ib_wc_status_str(wc.status), wc.byte_len, 262 ib_wc_status_msg(wc.status), wc.byte_len,
297 be32_to_cpu(wc.ex.imm_data)); 263 be32_to_cpu(wc.ex.imm_data));
298 rds_ib_stats_inc(s_ib_tx_cq_event); 264 rds_ib_stats_inc(s_ib_tx_cq_event);
299 265
@@ -344,7 +310,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
344 rds_ib_conn_error(conn, "send completion on %pI4 had status " 310 rds_ib_conn_error(conn, "send completion on %pI4 had status "
345 "%u (%s), disconnecting and reconnecting\n", 311 "%u (%s), disconnecting and reconnecting\n",
346 &conn->c_faddr, wc.status, 312 &conn->c_faddr, wc.status,
347 rds_ib_wc_status_str(wc.status)); 313 ib_wc_status_msg(wc.status));
348 } 314 }
349 } 315 }
350} 316}
@@ -605,6 +571,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
605 } 571 }
606 572
607 rds_message_addref(rm); 573 rds_message_addref(rm);
574 rm->data.op_dmasg = 0;
575 rm->data.op_dmaoff = 0;
608 ic->i_data_op = &rm->data; 576 ic->i_data_op = &rm->data;
609 577
610 /* Finalize the header */ 578 /* Finalize the header */
@@ -658,7 +626,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
658 send = &ic->i_sends[pos]; 626 send = &ic->i_sends[pos];
659 first = send; 627 first = send;
660 prev = NULL; 628 prev = NULL;
661 scat = &ic->i_data_op->op_sg[sg]; 629 scat = &ic->i_data_op->op_sg[rm->data.op_dmasg];
662 i = 0; 630 i = 0;
663 do { 631 do {
664 unsigned int len = 0; 632 unsigned int len = 0;
@@ -680,17 +648,20 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
680 /* Set up the data, if present */ 648 /* Set up the data, if present */
681 if (i < work_alloc 649 if (i < work_alloc
682 && scat != &rm->data.op_sg[rm->data.op_count]) { 650 && scat != &rm->data.op_sg[rm->data.op_count]) {
683 len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off); 651 len = min(RDS_FRAG_SIZE,
652 ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
684 send->s_wr.num_sge = 2; 653 send->s_wr.num_sge = 2;
685 654
686 send->s_sge[1].addr = ib_sg_dma_address(dev, scat) + off; 655 send->s_sge[1].addr = ib_sg_dma_address(dev, scat);
656 send->s_sge[1].addr += rm->data.op_dmaoff;
687 send->s_sge[1].length = len; 657 send->s_sge[1].length = len;
688 658
689 bytes_sent += len; 659 bytes_sent += len;
690 off += len; 660 rm->data.op_dmaoff += len;
691 if (off == ib_sg_dma_len(dev, scat)) { 661 if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) {
692 scat++; 662 scat++;
693 off = 0; 663 rm->data.op_dmasg++;
664 rm->data.op_dmaoff = 0;
694 } 665 }
695 } 666 }
696 667
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index a6c2bea9f8f9..8f486fa32079 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -179,6 +179,7 @@ static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr,
179 void *context) 179 void *context)
180{ 180{
181 struct ib_device *dev = rds_iwdev->dev; 181 struct ib_device *dev = rds_iwdev->dev;
182 struct ib_cq_init_attr cq_attr = {};
182 unsigned int send_size, recv_size; 183 unsigned int send_size, recv_size;
183 int ret; 184 int ret;
184 185
@@ -198,9 +199,10 @@ static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr,
198 attr->sq_sig_type = IB_SIGNAL_REQ_WR; 199 attr->sq_sig_type = IB_SIGNAL_REQ_WR;
199 attr->qp_type = IB_QPT_RC; 200 attr->qp_type = IB_QPT_RC;
200 201
202 cq_attr.cqe = send_size;
201 attr->send_cq = ib_create_cq(dev, send_cq_handler, 203 attr->send_cq = ib_create_cq(dev, send_cq_handler,
202 rds_iw_cq_event_handler, 204 rds_iw_cq_event_handler,
203 context, send_size, 0); 205 context, &cq_attr);
204 if (IS_ERR(attr->send_cq)) { 206 if (IS_ERR(attr->send_cq)) {
205 ret = PTR_ERR(attr->send_cq); 207 ret = PTR_ERR(attr->send_cq);
206 attr->send_cq = NULL; 208 attr->send_cq = NULL;
@@ -208,9 +210,10 @@ static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr,
208 goto out; 210 goto out;
209 } 211 }
210 212
213 cq_attr.cqe = recv_size;
211 attr->recv_cq = ib_create_cq(dev, recv_cq_handler, 214 attr->recv_cq = ib_create_cq(dev, recv_cq_handler,
212 rds_iw_cq_event_handler, 215 rds_iw_cq_event_handler,
213 context, recv_size, 0); 216 context, &cq_attr);
214 if (IS_ERR(attr->recv_cq)) { 217 if (IS_ERR(attr->recv_cq)) {
215 ret = PTR_ERR(attr->recv_cq); 218 ret = PTR_ERR(attr->recv_cq);
216 attr->recv_cq = NULL; 219 attr->recv_cq = NULL;
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 13834780a308..334fe98c5084 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -581,6 +581,8 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
581 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; 581 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
582 ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes; 582 ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
583 rds_message_addref(rm); 583 rds_message_addref(rm);
584 rm->data.op_dmasg = 0;
585 rm->data.op_dmaoff = 0;
584 ic->i_rm = rm; 586 ic->i_rm = rm;
585 587
586 /* Finalize the header */ 588 /* Finalize the header */
@@ -622,7 +624,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
622 send = &ic->i_sends[pos]; 624 send = &ic->i_sends[pos];
623 first = send; 625 first = send;
624 prev = NULL; 626 prev = NULL;
625 scat = &rm->data.op_sg[sg]; 627 scat = &rm->data.op_sg[rm->data.op_dmasg];
626 sent = 0; 628 sent = 0;
627 i = 0; 629 i = 0;
628 630
@@ -656,10 +658,11 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
656 658
657 send = &ic->i_sends[pos]; 659 send = &ic->i_sends[pos];
658 660
659 len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off); 661 len = min(RDS_FRAG_SIZE,
662 ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
660 rds_iw_xmit_populate_wr(ic, send, pos, 663 rds_iw_xmit_populate_wr(ic, send, pos,
661 ib_sg_dma_address(dev, scat) + off, len, 664 ib_sg_dma_address(dev, scat) + rm->data.op_dmaoff, len,
662 send_flags); 665 send_flags);
663 666
664 /* 667 /*
665 * We want to delay signaling completions just enough to get 668 * We want to delay signaling completions just enough to get
@@ -687,10 +690,11 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
687 &send->s_wr, send->s_wr.num_sge, send->s_wr.next); 690 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
688 691
689 sent += len; 692 sent += len;
690 off += len; 693 rm->data.op_dmaoff += len;
691 if (off == ib_sg_dma_len(dev, scat)) { 694 if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) {
692 scat++; 695 scat++;
693 off = 0; 696 rm->data.op_dmaoff = 0;
697 rm->data.op_dmasg++;
694 } 698 }
695 699
696add_header: 700add_header:
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 6cd9d1deafc3..208240836043 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -37,34 +37,6 @@
37 37
38static struct rdma_cm_id *rds_rdma_listen_id; 38static struct rdma_cm_id *rds_rdma_listen_id;
39 39
40static char *rds_cm_event_strings[] = {
41#define RDS_CM_EVENT_STRING(foo) \
42 [RDMA_CM_EVENT_##foo] = __stringify(RDMA_CM_EVENT_##foo)
43 RDS_CM_EVENT_STRING(ADDR_RESOLVED),
44 RDS_CM_EVENT_STRING(ADDR_ERROR),
45 RDS_CM_EVENT_STRING(ROUTE_RESOLVED),
46 RDS_CM_EVENT_STRING(ROUTE_ERROR),
47 RDS_CM_EVENT_STRING(CONNECT_REQUEST),
48 RDS_CM_EVENT_STRING(CONNECT_RESPONSE),
49 RDS_CM_EVENT_STRING(CONNECT_ERROR),
50 RDS_CM_EVENT_STRING(UNREACHABLE),
51 RDS_CM_EVENT_STRING(REJECTED),
52 RDS_CM_EVENT_STRING(ESTABLISHED),
53 RDS_CM_EVENT_STRING(DISCONNECTED),
54 RDS_CM_EVENT_STRING(DEVICE_REMOVAL),
55 RDS_CM_EVENT_STRING(MULTICAST_JOIN),
56 RDS_CM_EVENT_STRING(MULTICAST_ERROR),
57 RDS_CM_EVENT_STRING(ADDR_CHANGE),
58 RDS_CM_EVENT_STRING(TIMEWAIT_EXIT),
59#undef RDS_CM_EVENT_STRING
60};
61
62static char *rds_cm_event_str(enum rdma_cm_event_type type)
63{
64 return rds_str_array(rds_cm_event_strings,
65 ARRAY_SIZE(rds_cm_event_strings), type);
66};
67
68int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, 40int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
69 struct rdma_cm_event *event) 41 struct rdma_cm_event *event)
70{ 42{
@@ -74,7 +46,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
74 int ret = 0; 46 int ret = 0;
75 47
76 rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id, 48 rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id,
77 event->event, rds_cm_event_str(event->event)); 49 event->event, rdma_event_msg(event->event));
78 50
79 if (cm_id->device->node_type == RDMA_NODE_RNIC) 51 if (cm_id->device->node_type == RDMA_NODE_RNIC)
80 trans = &rds_iw_transport; 52 trans = &rds_iw_transport;
@@ -139,7 +111,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
139 default: 111 default:
140 /* things like device disconnect? */ 112 /* things like device disconnect? */
141 printk(KERN_ERR "RDS: unknown event %u (%s)!\n", 113 printk(KERN_ERR "RDS: unknown event %u (%s)!\n",
142 event->event, rds_cm_event_str(event->event)); 114 event->event, rdma_event_msg(event->event));
143 break; 115 break;
144 } 116 }
145 117
@@ -148,7 +120,7 @@ out:
148 mutex_unlock(&conn->c_cm_lock); 120 mutex_unlock(&conn->c_cm_lock);
149 121
150 rdsdebug("id %p event %u (%s) handling ret %d\n", cm_id, event->event, 122 rdsdebug("id %p event %u (%s) handling ret %d\n", cm_id, event->event,
151 rds_cm_event_str(event->event), ret); 123 rdma_event_msg(event->event), ret);
152 124
153 return ret; 125 return ret;
154} 126}
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 0d41155a2258..2260c1e434b1 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -363,6 +363,8 @@ struct rds_message {
363 unsigned int op_active:1; 363 unsigned int op_active:1;
364 unsigned int op_nents; 364 unsigned int op_nents;
365 unsigned int op_count; 365 unsigned int op_count;
366 unsigned int op_dmasg;
367 unsigned int op_dmaoff;
366 struct scatterlist *op_sg; 368 struct scatterlist *op_sg;
367 } data; 369 } data;
368 }; 370 };
@@ -408,11 +410,6 @@ struct rds_notifier {
408 * should try hard not to block. 410 * should try hard not to block.
409 */ 411 */
410 412
411#define RDS_TRANS_IB 0
412#define RDS_TRANS_IWARP 1
413#define RDS_TRANS_TCP 2
414#define RDS_TRANS_COUNT 3
415
416struct rds_transport { 413struct rds_transport {
417 char t_name[TRANSNAMSIZ]; 414 char t_name[TRANSNAMSIZ];
418 struct list_head t_item; 415 struct list_head t_item;
@@ -575,7 +572,6 @@ struct rds_statistics {
575}; 572};
576 573
577/* af_rds.c */ 574/* af_rds.c */
578char *rds_str_array(char **array, size_t elements, size_t index);
579void rds_sock_addref(struct rds_sock *rs); 575void rds_sock_addref(struct rds_sock *rs);
580void rds_sock_put(struct rds_sock *rs); 576void rds_sock_put(struct rds_sock *rs);
581void rds_wake_sk_sleep(struct rds_sock *rs); 577void rds_wake_sk_sleep(struct rds_sock *rs);
@@ -803,6 +799,7 @@ struct rds_transport *rds_trans_get_preferred(__be32 addr);
803void rds_trans_put(struct rds_transport *trans); 799void rds_trans_put(struct rds_transport *trans);
804unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, 800unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
805 unsigned int avail); 801 unsigned int avail);
802struct rds_transport *rds_trans_get(int t_type);
806int rds_trans_init(void); 803int rds_trans_init(void);
807void rds_trans_exit(void); 804void rds_trans_exit(void);
808 805
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 7f2ac4fec367..83498e1c75b8 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -73,7 +73,7 @@ EXPORT_SYMBOL_GPL(rds_trans_unregister);
73 73
74void rds_trans_put(struct rds_transport *trans) 74void rds_trans_put(struct rds_transport *trans)
75{ 75{
76 if (trans && trans->t_owner) 76 if (trans)
77 module_put(trans->t_owner); 77 module_put(trans->t_owner);
78} 78}
79 79
@@ -101,6 +101,27 @@ struct rds_transport *rds_trans_get_preferred(__be32 addr)
101 return ret; 101 return ret;
102} 102}
103 103
104struct rds_transport *rds_trans_get(int t_type)
105{
106 struct rds_transport *ret = NULL;
107 struct rds_transport *trans;
108 unsigned int i;
109
110 down_read(&rds_trans_sem);
111 for (i = 0; i < RDS_TRANS_COUNT; i++) {
112 trans = transports[i];
113
114 if (trans && trans->t_type == t_type &&
115 (!trans->t_owner || try_module_get(trans->t_owner))) {
116 ret = trans;
117 break;
118 }
119 }
120 up_read(&rds_trans_sem);
121
122 return ret;
123}
124
104/* 125/*
105 * This returns the number of stats entries in the snapshot and only 126 * This returns the number of stats entries in the snapshot and only
106 * copies them using the iter if there is enough space for them. The 127 * copies them using the iter if there is enough space for them. The
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index fa7cd792791c..f12149a29cb1 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -794,7 +794,8 @@ void rfkill_resume_polling(struct rfkill *rfkill)
794} 794}
795EXPORT_SYMBOL(rfkill_resume_polling); 795EXPORT_SYMBOL(rfkill_resume_polling);
796 796
797static int rfkill_suspend(struct device *dev, pm_message_t state) 797#ifdef CONFIG_PM_SLEEP
798static int rfkill_suspend(struct device *dev)
798{ 799{
799 struct rfkill *rfkill = to_rfkill(dev); 800 struct rfkill *rfkill = to_rfkill(dev);
800 801
@@ -818,13 +819,18 @@ static int rfkill_resume(struct device *dev)
818 return 0; 819 return 0;
819} 820}
820 821
822static SIMPLE_DEV_PM_OPS(rfkill_pm_ops, rfkill_suspend, rfkill_resume);
823#define RFKILL_PM_OPS (&rfkill_pm_ops)
824#else
825#define RFKILL_PM_OPS NULL
826#endif
827
821static struct class rfkill_class = { 828static struct class rfkill_class = {
822 .name = "rfkill", 829 .name = "rfkill",
823 .dev_release = rfkill_release, 830 .dev_release = rfkill_release,
824 .dev_groups = rfkill_dev_groups, 831 .dev_groups = rfkill_dev_groups,
825 .dev_uevent = rfkill_dev_uevent, 832 .dev_uevent = rfkill_dev_uevent,
826 .suspend = rfkill_suspend, 833 .pm = RFKILL_PM_OPS,
827 .resume = rfkill_resume,
828}; 834};
829 835
830bool rfkill_blocked(struct rfkill *rfkill) 836bool rfkill_blocked(struct rfkill *rfkill)
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index d978f2f46ff3..d5d58d919552 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -112,21 +112,17 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
112 112
113 rfkill->clk = devm_clk_get(&pdev->dev, NULL); 113 rfkill->clk = devm_clk_get(&pdev->dev, NULL);
114 114
115 gpio = devm_gpiod_get(&pdev->dev, "reset"); 115 gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_OUT_LOW);
116 if (!IS_ERR(gpio)) { 116 if (IS_ERR(gpio))
117 ret = gpiod_direction_output(gpio, 0); 117 return PTR_ERR(gpio);
118 if (ret)
119 return ret;
120 rfkill->reset_gpio = gpio;
121 }
122 118
123 gpio = devm_gpiod_get(&pdev->dev, "shutdown"); 119 rfkill->reset_gpio = gpio;
124 if (!IS_ERR(gpio)) { 120
125 ret = gpiod_direction_output(gpio, 0); 121 gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_OUT_LOW);
126 if (ret) 122 if (IS_ERR(gpio))
127 return ret; 123 return PTR_ERR(gpio);
128 rfkill->shutdown_gpio = gpio; 124
129 } 125 rfkill->shutdown_gpio = gpio;
130 126
131 /* Make sure at-least one of the GPIO is defined and that 127 /* Make sure at-least one of the GPIO is defined and that
132 * a name is specified for this instance 128 * a name is specified for this instance
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 8ae603069a1a..129d357d2722 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -192,7 +192,8 @@ static void rose_kill_by_device(struct net_device *dev)
192 192
193 if (rose->device == dev) { 193 if (rose->device == dev) {
194 rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); 194 rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0);
195 rose->neighbour->use--; 195 if (rose->neighbour)
196 rose->neighbour->use--;
196 rose->device = NULL; 197 rose->device = NULL;
197 } 198 }
198 } 199 }
@@ -520,7 +521,7 @@ static int rose_create(struct net *net, struct socket *sock, int protocol,
520 if (sock->type != SOCK_SEQPACKET || protocol != 0) 521 if (sock->type != SOCK_SEQPACKET || protocol != 0)
521 return -ESOCKTNOSUPPORT; 522 return -ESOCKTNOSUPPORT;
522 523
523 sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto); 524 sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, kern);
524 if (sk == NULL) 525 if (sk == NULL)
525 return -ENOMEM; 526 return -ENOMEM;
526 527
@@ -559,7 +560,7 @@ static struct sock *rose_make_new(struct sock *osk)
559 if (osk->sk_type != SOCK_SEQPACKET) 560 if (osk->sk_type != SOCK_SEQPACKET)
560 return NULL; 561 return NULL;
561 562
562 sk = sk_alloc(sock_net(osk), PF_ROSE, GFP_ATOMIC, &rose_proto); 563 sk = sk_alloc(sock_net(osk), PF_ROSE, GFP_ATOMIC, &rose_proto, 0);
563 if (sk == NULL) 564 if (sk == NULL)
564 return NULL; 565 return NULL;
565 566
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index e873d7d9f857..c76638cc2cd5 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -25,7 +25,6 @@
25#include <linux/fcntl.h> 25#include <linux/fcntl.h>
26#include <linux/mm.h> 26#include <linux/mm.h>
27#include <linux/interrupt.h> 27#include <linux/interrupt.h>
28#include <linux/netfilter.h>
29#include <net/rose.h> 28#include <net/rose.h>
30 29
31static void rose_ftimer_expiry(unsigned long); 30static void rose_ftimer_expiry(unsigned long);
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 40148932c8a4..0fc76d845103 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -31,7 +31,6 @@
31#include <linux/mm.h> 31#include <linux/mm.h>
32#include <linux/interrupt.h> 32#include <linux/interrupt.h>
33#include <linux/notifier.h> 33#include <linux/notifier.h>
34#include <linux/netfilter.h>
35#include <linux/init.h> 34#include <linux/init.h>
36#include <net/rose.h> 35#include <net/rose.h>
37#include <linux/seq_file.h> 36#include <linux/seq_file.h>
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 0095b9a0b779..25d60ed15284 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -632,7 +632,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
632 sock->ops = &rxrpc_rpc_ops; 632 sock->ops = &rxrpc_rpc_ops;
633 sock->state = SS_UNCONNECTED; 633 sock->state = SS_UNCONNECTED;
634 634
635 sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto); 635 sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, kern);
636 if (!sk) 636 if (!sk)
637 return -ENOMEM; 637 return -ENOMEM;
638 638
diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c
index ca904ed5400a..78483b4602bf 100644
--- a/net/rxrpc/ar-local.c
+++ b/net/rxrpc/ar-local.c
@@ -73,8 +73,8 @@ static int rxrpc_create_local(struct rxrpc_local *local)
73 _enter("%p{%d}", local, local->srx.transport_type); 73 _enter("%p{%d}", local, local->srx.transport_type);
74 74
75 /* create a socket to represent the local endpoint */ 75 /* create a socket to represent the local endpoint */
76 ret = sock_create_kern(PF_INET, local->srx.transport_type, IPPROTO_UDP, 76 ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type,
77 &local->socket); 77 IPPROTO_UDP, &local->socket);
78 if (ret < 0) { 78 if (ret < 0) {
79 _leave(" = %d [socket]", ret); 79 _leave(" = %d [socket]", ret);
80 return ret; 80 return ret;
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2274e723a3df..daa33432b716 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -312,6 +312,7 @@ config NET_SCH_PIE
312config NET_SCH_INGRESS 312config NET_SCH_INGRESS
313 tristate "Ingress Qdisc" 313 tristate "Ingress Qdisc"
314 depends on NET_CLS_ACT 314 depends on NET_CLS_ACT
315 select NET_INGRESS
315 ---help--- 316 ---help---
316 Say Y here if you want to use classifiers for incoming packets. 317 Say Y here if you want to use classifiers for incoming packets.
317 If unsure, say Y. 318 If unsure, say Y.
@@ -477,6 +478,16 @@ config NET_CLS_BPF
477 To compile this code as a module, choose M here: the module will 478 To compile this code as a module, choose M here: the module will
478 be called cls_bpf. 479 be called cls_bpf.
479 480
481config NET_CLS_FLOWER
482 tristate "Flower classifier"
483 select NET_CLS
484 ---help---
485 If you say Y here, you will be able to classify packets based on
486 a configurable combination of packet keys and masks.
487
488 To compile this code as a module, choose M here: the module will
489 be called cls_flower.
490
480config NET_EMATCH 491config NET_EMATCH
481 bool "Extended Matches" 492 bool "Extended Matches"
482 select NET_CLS 493 select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 7ca7f4c1b8c2..690c1689e090 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
56obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o 56obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
57obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o 57obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
58obj-$(CONFIG_NET_CLS_BPF) += cls_bpf.o 58obj-$(CONFIG_NET_CLS_BPF) += cls_bpf.o
59obj-$(CONFIG_NET_CLS_FLOWER) += cls_flower.o
59obj-$(CONFIG_NET_EMATCH) += ematch.o 60obj-$(CONFIG_NET_EMATCH) += ematch.o
60obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o 61obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o
61obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o 62obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 3d43e4979f27..af427a3dbcba 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -392,11 +392,6 @@ int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
392 list_for_each_entry(a, actions, list) { 392 list_for_each_entry(a, actions, list) {
393repeat: 393repeat:
394 ret = a->ops->act(skb, a, res); 394 ret = a->ops->act(skb, a, res);
395 if (TC_MUNGED & skb->tc_verd) {
396 /* copied already, allow trampling */
397 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
398 skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
399 }
400 if (ret == TC_ACT_REPEAT) 395 if (ret == TC_ACT_REPEAT)
401 goto repeat; /* we need a ttl - JHS */ 396 goto repeat; /* we need a ttl - JHS */
402 if (ret != TC_ACT_PIPE) 397 if (ret != TC_ACT_PIPE)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index dc6a2d324bd8..1d56903fd4c7 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -37,6 +37,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
37{ 37{
38 struct tcf_bpf *prog = act->priv; 38 struct tcf_bpf *prog = act->priv;
39 int action, filter_res; 39 int action, filter_res;
40 bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
40 41
41 if (unlikely(!skb_mac_header_was_set(skb))) 42 if (unlikely(!skb_mac_header_was_set(skb)))
42 return TC_ACT_UNSPEC; 43 return TC_ACT_UNSPEC;
@@ -48,7 +49,13 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
48 49
49 /* Needed here for accessing maps. */ 50 /* Needed here for accessing maps. */
50 rcu_read_lock(); 51 rcu_read_lock();
51 filter_res = BPF_PROG_RUN(prog->filter, skb); 52 if (at_ingress) {
53 __skb_push(skb, skb->mac_len);
54 filter_res = BPF_PROG_RUN(prog->filter, skb);
55 __skb_pull(skb, skb->mac_len);
56 } else {
57 filter_res = BPF_PROG_RUN(prog->filter, skb);
58 }
52 rcu_read_unlock(); 59 rcu_read_unlock();
53 60
54 /* A BPF program may overwrite the default action opcode. 61 /* A BPF program may overwrite the default action opcode.
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 3f63ceac8e01..a42a3b257226 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -151,7 +151,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
151 } 151 }
152 152
153 at = G_TC_AT(skb->tc_verd); 153 at = G_TC_AT(skb->tc_verd);
154 skb2 = skb_act_clone(skb, GFP_ATOMIC, m->tcf_action); 154 skb2 = skb_clone(skb, GFP_ATOMIC);
155 if (skb2 == NULL) 155 if (skb2 == NULL)
156 goto out; 156 goto out;
157 157
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 59649d588d79..17e6d6669c7f 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -108,7 +108,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
108 struct tcf_result *res) 108 struct tcf_result *res)
109{ 109{
110 struct tcf_pedit *p = a->priv; 110 struct tcf_pedit *p = a->priv;
111 int i, munged = 0; 111 int i;
112 unsigned int off; 112 unsigned int off;
113 113
114 if (skb_unclone(skb, GFP_ATOMIC)) 114 if (skb_unclone(skb, GFP_ATOMIC))
@@ -156,11 +156,8 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
156 *ptr = ((*ptr & tkey->mask) ^ tkey->val); 156 *ptr = ((*ptr & tkey->mask) ^ tkey->val);
157 if (ptr == &_data) 157 if (ptr == &_data)
158 skb_store_bits(skb, off + offset, ptr, 4); 158 skb_store_bits(skb, off + offset, ptr, 4);
159 munged++;
160 } 159 }
161 160
162 if (munged)
163 skb->tc_verd = SET_TC_MUNGED(skb->tc_verd);
164 goto done; 161 goto done;
165 } else 162 } else
166 WARN(1, "pedit BUG: index %d\n", p->tcf_index); 163 WARN(1, "pedit BUG: index %d\n", p->tcf_index);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 91bd9c19471d..c79ecfd36e0f 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -64,6 +64,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
64{ 64{
65 struct cls_bpf_head *head = rcu_dereference_bh(tp->root); 65 struct cls_bpf_head *head = rcu_dereference_bh(tp->root);
66 struct cls_bpf_prog *prog; 66 struct cls_bpf_prog *prog;
67#ifdef CONFIG_NET_CLS_ACT
68 bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
69#else
70 bool at_ingress = false;
71#endif
67 int ret = -1; 72 int ret = -1;
68 73
69 if (unlikely(!skb_mac_header_was_set(skb))) 74 if (unlikely(!skb_mac_header_was_set(skb)))
@@ -72,7 +77,16 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
72 /* Needed here for accessing maps. */ 77 /* Needed here for accessing maps. */
73 rcu_read_lock(); 78 rcu_read_lock();
74 list_for_each_entry_rcu(prog, &head->plist, link) { 79 list_for_each_entry_rcu(prog, &head->plist, link) {
75 int filter_res = BPF_PROG_RUN(prog->filter, skb); 80 int filter_res;
81
82 if (at_ingress) {
83 /* It is safe to push/pull even if skb_shared() */
84 __skb_push(skb, skb->mac_len);
85 filter_res = BPF_PROG_RUN(prog->filter, skb);
86 __skb_pull(skb, skb->mac_len);
87 } else {
88 filter_res = BPF_PROG_RUN(prog->filter, skb);
89 }
76 90
77 if (filter_res == 0) 91 if (filter_res == 0)
78 continue; 92 continue;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index a620c4e288a5..76bc3a20ffdb 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -26,7 +26,7 @@
26#include <net/pkt_cls.h> 26#include <net/pkt_cls.h>
27#include <net/ip.h> 27#include <net/ip.h>
28#include <net/route.h> 28#include <net/route.h>
29#include <net/flow_keys.h> 29#include <net/flow_dissector.h>
30 30
31#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 31#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
32#include <net/netfilter/nf_conntrack.h> 32#include <net/netfilter/nf_conntrack.h>
@@ -68,35 +68,41 @@ static inline u32 addr_fold(void *addr)
68 68
69static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow) 69static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
70{ 70{
71 if (flow->src) 71 __be32 src = flow_get_u32_src(flow);
72 return ntohl(flow->src); 72
73 if (src)
74 return ntohl(src);
75
73 return addr_fold(skb->sk); 76 return addr_fold(skb->sk);
74} 77}
75 78
76static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) 79static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
77{ 80{
78 if (flow->dst) 81 __be32 dst = flow_get_u32_dst(flow);
79 return ntohl(flow->dst); 82
83 if (dst)
84 return ntohl(dst);
85
80 return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); 86 return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
81} 87}
82 88
83static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) 89static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
84{ 90{
85 return flow->ip_proto; 91 return flow->basic.ip_proto;
86} 92}
87 93
88static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) 94static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
89{ 95{
90 if (flow->ports) 96 if (flow->ports.ports)
91 return ntohs(flow->port16[0]); 97 return ntohs(flow->ports.src);
92 98
93 return addr_fold(skb->sk); 99 return addr_fold(skb->sk);
94} 100}
95 101
96static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) 102static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
97{ 103{
98 if (flow->ports) 104 if (flow->ports.ports)
99 return ntohs(flow->port16[1]); 105 return ntohs(flow->ports.dst);
100 106
101 return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); 107 return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
102} 108}
@@ -295,7 +301,7 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
295 301
296 keymask = f->keymask; 302 keymask = f->keymask;
297 if (keymask & FLOW_KEYS_NEEDED) 303 if (keymask & FLOW_KEYS_NEEDED)
298 skb_flow_dissect(skb, &flow_keys); 304 skb_flow_dissect_flow_keys(skb, &flow_keys);
299 305
300 for (n = 0; n < f->nkeys; n++) { 306 for (n = 0; n < f->nkeys; n++) {
301 key = ffs(keymask) - 1; 307 key = ffs(keymask) - 1;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
new file mode 100644
index 000000000000..9d37ccd95062
--- /dev/null
+++ b/net/sched/cls_flower.c
@@ -0,0 +1,691 @@
1/*
2 * net/sched/cls_flower.c Flower classifier
3 *
4 * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/init.h>
14#include <linux/module.h>
15#include <linux/rhashtable.h>
16
17#include <linux/if_ether.h>
18#include <linux/in6.h>
19#include <linux/ip.h>
20
21#include <net/sch_generic.h>
22#include <net/pkt_cls.h>
23#include <net/ip.h>
24#include <net/flow_dissector.h>
25
26struct fl_flow_key {
27 int indev_ifindex;
28 struct flow_dissector_key_control control;
29 struct flow_dissector_key_basic basic;
30 struct flow_dissector_key_eth_addrs eth;
31 struct flow_dissector_key_addrs ipaddrs;
32 union {
33 struct flow_dissector_key_ipv4_addrs ipv4;
34 struct flow_dissector_key_ipv6_addrs ipv6;
35 };
36 struct flow_dissector_key_ports tp;
37} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
38
39struct fl_flow_mask_range {
40 unsigned short int start;
41 unsigned short int end;
42};
43
44struct fl_flow_mask {
45 struct fl_flow_key key;
46 struct fl_flow_mask_range range;
47 struct rcu_head rcu;
48};
49
50struct cls_fl_head {
51 struct rhashtable ht;
52 struct fl_flow_mask mask;
53 struct flow_dissector dissector;
54 u32 hgen;
55 bool mask_assigned;
56 struct list_head filters;
57 struct rhashtable_params ht_params;
58 struct rcu_head rcu;
59};
60
61struct cls_fl_filter {
62 struct rhash_head ht_node;
63 struct fl_flow_key mkey;
64 struct tcf_exts exts;
65 struct tcf_result res;
66 struct fl_flow_key key;
67 struct list_head list;
68 u32 handle;
69 struct rcu_head rcu;
70};
71
72static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
73{
74 return mask->range.end - mask->range.start;
75}
76
77static void fl_mask_update_range(struct fl_flow_mask *mask)
78{
79 const u8 *bytes = (const u8 *) &mask->key;
80 size_t size = sizeof(mask->key);
81 size_t i, first = 0, last = size - 1;
82
83 for (i = 0; i < sizeof(mask->key); i++) {
84 if (bytes[i]) {
85 if (!first && i)
86 first = i;
87 last = i;
88 }
89 }
90 mask->range.start = rounddown(first, sizeof(long));
91 mask->range.end = roundup(last + 1, sizeof(long));
92}
93
94static void *fl_key_get_start(struct fl_flow_key *key,
95 const struct fl_flow_mask *mask)
96{
97 return (u8 *) key + mask->range.start;
98}
99
100static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
101 struct fl_flow_mask *mask)
102{
103 const long *lkey = fl_key_get_start(key, mask);
104 const long *lmask = fl_key_get_start(&mask->key, mask);
105 long *lmkey = fl_key_get_start(mkey, mask);
106 int i;
107
108 for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
109 *lmkey++ = *lkey++ & *lmask++;
110}
111
112static void fl_clear_masked_range(struct fl_flow_key *key,
113 struct fl_flow_mask *mask)
114{
115 memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
116}
117
118static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
119 struct tcf_result *res)
120{
121 struct cls_fl_head *head = rcu_dereference_bh(tp->root);
122 struct cls_fl_filter *f;
123 struct fl_flow_key skb_key;
124 struct fl_flow_key skb_mkey;
125
126 fl_clear_masked_range(&skb_key, &head->mask);
127 skb_key.indev_ifindex = skb->skb_iif;
128 /* skb_flow_dissect() does not set n_proto in case an unknown protocol,
129 * so do it rather here.
130 */
131 skb_key.basic.n_proto = skb->protocol;
132 skb_flow_dissect(skb, &head->dissector, &skb_key);
133
134 fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
135
136 f = rhashtable_lookup_fast(&head->ht,
137 fl_key_get_start(&skb_mkey, &head->mask),
138 head->ht_params);
139 if (f) {
140 *res = f->res;
141 return tcf_exts_exec(skb, &f->exts, res);
142 }
143 return -1;
144}
145
146static int fl_init(struct tcf_proto *tp)
147{
148 struct cls_fl_head *head;
149
150 head = kzalloc(sizeof(*head), GFP_KERNEL);
151 if (!head)
152 return -ENOBUFS;
153
154 INIT_LIST_HEAD_RCU(&head->filters);
155 rcu_assign_pointer(tp->root, head);
156
157 return 0;
158}
159
160static void fl_destroy_filter(struct rcu_head *head)
161{
162 struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
163
164 tcf_exts_destroy(&f->exts);
165 kfree(f);
166}
167
168static bool fl_destroy(struct tcf_proto *tp, bool force)
169{
170 struct cls_fl_head *head = rtnl_dereference(tp->root);
171 struct cls_fl_filter *f, *next;
172
173 if (!force && !list_empty(&head->filters))
174 return false;
175
176 list_for_each_entry_safe(f, next, &head->filters, list) {
177 list_del_rcu(&f->list);
178 call_rcu(&f->rcu, fl_destroy_filter);
179 }
180 RCU_INIT_POINTER(tp->root, NULL);
181 if (head->mask_assigned)
182 rhashtable_destroy(&head->ht);
183 kfree_rcu(head, rcu);
184 return true;
185}
186
187static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
188{
189 struct cls_fl_head *head = rtnl_dereference(tp->root);
190 struct cls_fl_filter *f;
191
192 list_for_each_entry(f, &head->filters, list)
193 if (f->handle == handle)
194 return (unsigned long) f;
195 return 0;
196}
197
198static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
199 [TCA_FLOWER_UNSPEC] = { .type = NLA_UNSPEC },
200 [TCA_FLOWER_CLASSID] = { .type = NLA_U32 },
201 [TCA_FLOWER_INDEV] = { .type = NLA_STRING,
202 .len = IFNAMSIZ },
203 [TCA_FLOWER_KEY_ETH_DST] = { .len = ETH_ALEN },
204 [TCA_FLOWER_KEY_ETH_DST_MASK] = { .len = ETH_ALEN },
205 [TCA_FLOWER_KEY_ETH_SRC] = { .len = ETH_ALEN },
206 [TCA_FLOWER_KEY_ETH_SRC_MASK] = { .len = ETH_ALEN },
207 [TCA_FLOWER_KEY_ETH_TYPE] = { .type = NLA_U16 },
208 [TCA_FLOWER_KEY_IP_PROTO] = { .type = NLA_U8 },
209 [TCA_FLOWER_KEY_IPV4_SRC] = { .type = NLA_U32 },
210 [TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NLA_U32 },
211 [TCA_FLOWER_KEY_IPV4_DST] = { .type = NLA_U32 },
212 [TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NLA_U32 },
213 [TCA_FLOWER_KEY_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
214 [TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
215 [TCA_FLOWER_KEY_IPV6_DST] = { .len = sizeof(struct in6_addr) },
216 [TCA_FLOWER_KEY_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
217 [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 },
218 [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 },
219 [TCA_FLOWER_KEY_UDP_SRC] = { .type = NLA_U16 },
220 [TCA_FLOWER_KEY_UDP_DST] = { .type = NLA_U16 },
221};
222
223static void fl_set_key_val(struct nlattr **tb,
224 void *val, int val_type,
225 void *mask, int mask_type, int len)
226{
227 if (!tb[val_type])
228 return;
229 memcpy(val, nla_data(tb[val_type]), len);
230 if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
231 memset(mask, 0xff, len);
232 else
233 memcpy(mask, nla_data(tb[mask_type]), len);
234}
235
236static int fl_set_key(struct net *net, struct nlattr **tb,
237 struct fl_flow_key *key, struct fl_flow_key *mask)
238{
239#ifdef CONFIG_NET_CLS_IND
240 if (tb[TCA_FLOWER_INDEV]) {
241 int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
242 if (err < 0)
243 return err;
244 key->indev_ifindex = err;
245 mask->indev_ifindex = 0xffffffff;
246 }
247#endif
248
249 fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
250 mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
251 sizeof(key->eth.dst));
252 fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
253 mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
254 sizeof(key->eth.src));
255 fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
256 &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
257 sizeof(key->basic.n_proto));
258 if (key->basic.n_proto == htons(ETH_P_IP) ||
259 key->basic.n_proto == htons(ETH_P_IPV6)) {
260 fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
261 &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
262 sizeof(key->basic.ip_proto));
263 }
264 if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
265 fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
266 &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
267 sizeof(key->ipv4.src));
268 fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
269 &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
270 sizeof(key->ipv4.dst));
271 } else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
272 fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
273 &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
274 sizeof(key->ipv6.src));
275 fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
276 &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
277 sizeof(key->ipv6.dst));
278 }
279 if (key->basic.ip_proto == IPPROTO_TCP) {
280 fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
281 &mask->tp.src, TCA_FLOWER_UNSPEC,
282 sizeof(key->tp.src));
283 fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
284 &mask->tp.dst, TCA_FLOWER_UNSPEC,
285 sizeof(key->tp.dst));
286 } else if (key->basic.ip_proto == IPPROTO_UDP) {
287 fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
288 &mask->tp.src, TCA_FLOWER_UNSPEC,
289 sizeof(key->tp.src));
290 fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
291 &mask->tp.dst, TCA_FLOWER_UNSPEC,
292 sizeof(key->tp.dst));
293 }
294
295 return 0;
296}
297
298static bool fl_mask_eq(struct fl_flow_mask *mask1,
299 struct fl_flow_mask *mask2)
300{
301 const long *lmask1 = fl_key_get_start(&mask1->key, mask1);
302 const long *lmask2 = fl_key_get_start(&mask2->key, mask2);
303
304 return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) &&
305 !memcmp(lmask1, lmask2, fl_mask_range(mask1));
306}
307
308static const struct rhashtable_params fl_ht_params = {
309 .key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */
310 .head_offset = offsetof(struct cls_fl_filter, ht_node),
311 .automatic_shrinking = true,
312};
313
314static int fl_init_hashtable(struct cls_fl_head *head,
315 struct fl_flow_mask *mask)
316{
317 head->ht_params = fl_ht_params;
318 head->ht_params.key_len = fl_mask_range(mask);
319 head->ht_params.key_offset += mask->range.start;
320
321 return rhashtable_init(&head->ht, &head->ht_params);
322}
323
324#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
325#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
326#define FL_KEY_MEMBER_END_OFFSET(member) \
327 (FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member))
328
329#define FL_KEY_IN_RANGE(mask, member) \
330 (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end && \
331 FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start)
332
333#define FL_KEY_SET(keys, cnt, id, member) \
334 do { \
335 keys[cnt].key_id = id; \
336 keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member); \
337 cnt++; \
338 } while(0);
339
340#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member) \
341 do { \
342 if (FL_KEY_IN_RANGE(mask, member)) \
343 FL_KEY_SET(keys, cnt, id, member); \
344 } while(0);
345
346static void fl_init_dissector(struct cls_fl_head *head,
347 struct fl_flow_mask *mask)
348{
349 struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
350 size_t cnt = 0;
351
352 FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
353 FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
354 FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
355 FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
356 FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
357 FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
358 FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
359 FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
360 FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
361 FLOW_DISSECTOR_KEY_PORTS, tp);
362
363 skb_flow_dissector_init(&head->dissector, keys, cnt);
364}
365
366static int fl_check_assign_mask(struct cls_fl_head *head,
367 struct fl_flow_mask *mask)
368{
369 int err;
370
371 if (head->mask_assigned) {
372 if (!fl_mask_eq(&head->mask, mask))
373 return -EINVAL;
374 else
375 return 0;
376 }
377
378 /* Mask is not assigned yet. So assign it and init hashtable
379 * according to that.
380 */
381 err = fl_init_hashtable(head, mask);
382 if (err)
383 return err;
384 memcpy(&head->mask, mask, sizeof(head->mask));
385 head->mask_assigned = true;
386
387 fl_init_dissector(head, mask);
388
389 return 0;
390}
391
392static int fl_set_parms(struct net *net, struct tcf_proto *tp,
393 struct cls_fl_filter *f, struct fl_flow_mask *mask,
394 unsigned long base, struct nlattr **tb,
395 struct nlattr *est, bool ovr)
396{
397 struct tcf_exts e;
398 int err;
399
400 tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
401 err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
402 if (err < 0)
403 return err;
404
405 if (tb[TCA_FLOWER_CLASSID]) {
406 f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
407 tcf_bind_filter(tp, &f->res, base);
408 }
409
410 err = fl_set_key(net, tb, &f->key, &mask->key);
411 if (err)
412 goto errout;
413
414 fl_mask_update_range(mask);
415 fl_set_masked_key(&f->mkey, &f->key, mask);
416
417 tcf_exts_change(tp, &f->exts, &e);
418
419 return 0;
420errout:
421 tcf_exts_destroy(&e);
422 return err;
423}
424
425static u32 fl_grab_new_handle(struct tcf_proto *tp,
426 struct cls_fl_head *head)
427{
428 unsigned int i = 0x80000000;
429 u32 handle;
430
431 do {
432 if (++head->hgen == 0x7FFFFFFF)
433 head->hgen = 1;
434 } while (--i > 0 && fl_get(tp, head->hgen));
435
436 if (unlikely(i == 0)) {
437 pr_err("Insufficient number of handles\n");
438 handle = 0;
439 } else {
440 handle = head->hgen;
441 }
442
443 return handle;
444}
445
446static int fl_change(struct net *net, struct sk_buff *in_skb,
447 struct tcf_proto *tp, unsigned long base,
448 u32 handle, struct nlattr **tca,
449 unsigned long *arg, bool ovr)
450{
451 struct cls_fl_head *head = rtnl_dereference(tp->root);
452 struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
453 struct cls_fl_filter *fnew;
454 struct nlattr *tb[TCA_FLOWER_MAX + 1];
455 struct fl_flow_mask mask = {};
456 int err;
457
458 if (!tca[TCA_OPTIONS])
459 return -EINVAL;
460
461 err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy);
462 if (err < 0)
463 return err;
464
465 if (fold && handle && fold->handle != handle)
466 return -EINVAL;
467
468 fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
469 if (!fnew)
470 return -ENOBUFS;
471
472 tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
473
474 if (!handle) {
475 handle = fl_grab_new_handle(tp, head);
476 if (!handle) {
477 err = -EINVAL;
478 goto errout;
479 }
480 }
481 fnew->handle = handle;
482
483 err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
484 if (err)
485 goto errout;
486
487 err = fl_check_assign_mask(head, &mask);
488 if (err)
489 goto errout;
490
491 err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
492 head->ht_params);
493 if (err)
494 goto errout;
495 if (fold)
496 rhashtable_remove_fast(&head->ht, &fold->ht_node,
497 head->ht_params);
498
499 *arg = (unsigned long) fnew;
500
501 if (fold) {
502 list_replace_rcu(&fnew->list, &fold->list);
503 tcf_unbind_filter(tp, &fold->res);
504 call_rcu(&fold->rcu, fl_destroy_filter);
505 } else {
506 list_add_tail_rcu(&fnew->list, &head->filters);
507 }
508
509 return 0;
510
511errout:
512 kfree(fnew);
513 return err;
514}
515
516static int fl_delete(struct tcf_proto *tp, unsigned long arg)
517{
518 struct cls_fl_head *head = rtnl_dereference(tp->root);
519 struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
520
521 rhashtable_remove_fast(&head->ht, &f->ht_node,
522 head->ht_params);
523 list_del_rcu(&f->list);
524 tcf_unbind_filter(tp, &f->res);
525 call_rcu(&f->rcu, fl_destroy_filter);
526 return 0;
527}
528
529static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
530{
531 struct cls_fl_head *head = rtnl_dereference(tp->root);
532 struct cls_fl_filter *f;
533
534 list_for_each_entry_rcu(f, &head->filters, list) {
535 if (arg->count < arg->skip)
536 goto skip;
537 if (arg->fn(tp, (unsigned long) f, arg) < 0) {
538 arg->stop = 1;
539 break;
540 }
541skip:
542 arg->count++;
543 }
544}
545
546static int fl_dump_key_val(struct sk_buff *skb,
547 void *val, int val_type,
548 void *mask, int mask_type, int len)
549{
550 int err;
551
552 if (!memchr_inv(mask, 0, len))
553 return 0;
554 err = nla_put(skb, val_type, len, val);
555 if (err)
556 return err;
557 if (mask_type != TCA_FLOWER_UNSPEC) {
558 err = nla_put(skb, mask_type, len, mask);
559 if (err)
560 return err;
561 }
562 return 0;
563}
564
565static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
566 struct sk_buff *skb, struct tcmsg *t)
567{
568 struct cls_fl_head *head = rtnl_dereference(tp->root);
569 struct cls_fl_filter *f = (struct cls_fl_filter *) fh;
570 struct nlattr *nest;
571 struct fl_flow_key *key, *mask;
572
573 if (!f)
574 return skb->len;
575
576 t->tcm_handle = f->handle;
577
578 nest = nla_nest_start(skb, TCA_OPTIONS);
579 if (!nest)
580 goto nla_put_failure;
581
582 if (f->res.classid &&
583 nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
584 goto nla_put_failure;
585
586 key = &f->key;
587 mask = &head->mask.key;
588
589 if (mask->indev_ifindex) {
590 struct net_device *dev;
591
592 dev = __dev_get_by_index(net, key->indev_ifindex);
593 if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name))
594 goto nla_put_failure;
595 }
596
597 if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
598 mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
599 sizeof(key->eth.dst)) ||
600 fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
601 mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
602 sizeof(key->eth.src)) ||
603 fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
604 &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
605 sizeof(key->basic.n_proto)))
606 goto nla_put_failure;
607 if ((key->basic.n_proto == htons(ETH_P_IP) ||
608 key->basic.n_proto == htons(ETH_P_IPV6)) &&
609 fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
610 &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
611 sizeof(key->basic.ip_proto)))
612 goto nla_put_failure;
613
614 if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
615 (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
616 &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
617 sizeof(key->ipv4.src)) ||
618 fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
619 &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
620 sizeof(key->ipv4.dst))))
621 goto nla_put_failure;
622 else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
623 (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
624 &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
625 sizeof(key->ipv6.src)) ||
626 fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
627 &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
628 sizeof(key->ipv6.dst))))
629 goto nla_put_failure;
630
631 if (key->basic.ip_proto == IPPROTO_TCP &&
632 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
633 &mask->tp.src, TCA_FLOWER_UNSPEC,
634 sizeof(key->tp.src)) ||
635 fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
636 &mask->tp.dst, TCA_FLOWER_UNSPEC,
637 sizeof(key->tp.dst))))
638 goto nla_put_failure;
639 else if (key->basic.ip_proto == IPPROTO_UDP &&
640 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
641 &mask->tp.src, TCA_FLOWER_UNSPEC,
642 sizeof(key->tp.src)) ||
643 fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
644 &mask->tp.dst, TCA_FLOWER_UNSPEC,
645 sizeof(key->tp.dst))))
646 goto nla_put_failure;
647
648 if (tcf_exts_dump(skb, &f->exts))
649 goto nla_put_failure;
650
651 nla_nest_end(skb, nest);
652
653 if (tcf_exts_dump_stats(skb, &f->exts) < 0)
654 goto nla_put_failure;
655
656 return skb->len;
657
658nla_put_failure:
659 nla_nest_cancel(skb, nest);
660 return -1;
661}
662
663static struct tcf_proto_ops cls_fl_ops __read_mostly = {
664 .kind = "flower",
665 .classify = fl_classify,
666 .init = fl_init,
667 .destroy = fl_destroy,
668 .get = fl_get,
669 .change = fl_change,
670 .delete = fl_delete,
671 .walk = fl_walk,
672 .dump = fl_dump,
673 .owner = THIS_MODULE,
674};
675
676static int __init cls_fl_init(void)
677{
678 return register_tcf_proto_ops(&cls_fl_ops);
679}
680
681static void __exit cls_fl_exit(void)
682{
683 unregister_tcf_proto_ops(&cls_fl_ops);
684}
685
686module_init(cls_fl_init);
687module_exit(cls_fl_exit);
688
689MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
690MODULE_DESCRIPTION("Flower classifier");
691MODULE_LICENSE("GPL v2");
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index a3d79c8bf3b8..df0328ba6a48 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -92,8 +92,8 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
92 92
93 rcu_read_lock(); 93 rcu_read_lock();
94 94
95 if (dev && skb->skb_iif) 95 if (skb->skb_iif)
96 indev = dev_get_by_index_rcu(dev_net(dev), skb->skb_iif); 96 indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
97 97
98 acpar.in = indev ? indev : dev; 98 acpar.in = indev ? indev : dev;
99 acpar.out = dev; 99 acpar.out = dev;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 1e1c89e51a11..f06aa01d60fd 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1818,13 +1818,8 @@ int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
1818 continue; 1818 continue;
1819 err = tp->classify(skb, tp, res); 1819 err = tp->classify(skb, tp, res);
1820 1820
1821 if (err >= 0) { 1821 if (err >= 0)
1822#ifdef CONFIG_NET_CLS_ACT
1823 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1824 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1825#endif
1826 return err; 1822 return err;
1827 }
1828 } 1823 }
1829 return -1; 1824 return -1;
1830} 1825}
@@ -1836,23 +1831,22 @@ int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1836 int err = 0; 1831 int err = 0;
1837#ifdef CONFIG_NET_CLS_ACT 1832#ifdef CONFIG_NET_CLS_ACT
1838 const struct tcf_proto *otp = tp; 1833 const struct tcf_proto *otp = tp;
1834 int limit = 0;
1839reclassify: 1835reclassify:
1840#endif 1836#endif
1841 1837
1842 err = tc_classify_compat(skb, tp, res); 1838 err = tc_classify_compat(skb, tp, res);
1843#ifdef CONFIG_NET_CLS_ACT 1839#ifdef CONFIG_NET_CLS_ACT
1844 if (err == TC_ACT_RECLASSIFY) { 1840 if (err == TC_ACT_RECLASSIFY) {
1845 u32 verd = G_TC_VERD(skb->tc_verd);
1846 tp = otp; 1841 tp = otp;
1847 1842
1848 if (verd++ >= MAX_REC_LOOP) { 1843 if (unlikely(limit++ >= MAX_REC_LOOP)) {
1849 net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n", 1844 net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n",
1850 tp->q->ops->id, 1845 tp->q->ops->id,
1851 tp->prio & 0xffff, 1846 tp->prio & 0xffff,
1852 ntohs(tp->protocol)); 1847 ntohs(tp->protocol));
1853 return TC_ACT_SHOT; 1848 return TC_ACT_SHOT;
1854 } 1849 }
1855 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1856 goto reclassify; 1850 goto reclassify;
1857 } 1851 }
1858#endif 1852#endif
@@ -1885,13 +1879,10 @@ EXPORT_SYMBOL(tcf_destroy_chain);
1885#ifdef CONFIG_PROC_FS 1879#ifdef CONFIG_PROC_FS
1886static int psched_show(struct seq_file *seq, void *v) 1880static int psched_show(struct seq_file *seq, void *v)
1887{ 1881{
1888 struct timespec ts;
1889
1890 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
1891 seq_printf(seq, "%08x %08x %08x %08x\n", 1882 seq_printf(seq, "%08x %08x %08x %08x\n",
1892 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1), 1883 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1893 1000000, 1884 1000000,
1894 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts))); 1885 (u32)NSEC_PER_SEC / hrtimer_resolution);
1895 1886
1896 return 0; 1887 return 0;
1897} 1888}
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index c009eb9045ce..93d5742dc7e0 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -18,7 +18,7 @@
18#include <net/pkt_sched.h> 18#include <net/pkt_sched.h>
19#include <net/inet_ecn.h> 19#include <net/inet_ecn.h>
20#include <net/red.h> 20#include <net/red.h>
21#include <net/flow_keys.h> 21#include <net/flow_dissector.h>
22 22
23/* 23/*
24 CHOKe stateless AQM for fair bandwidth allocation 24 CHOKe stateless AQM for fair bandwidth allocation
@@ -133,16 +133,10 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
133 --sch->q.qlen; 133 --sch->q.qlen;
134} 134}
135 135
136/* private part of skb->cb[] that a qdisc is allowed to use
137 * is limited to QDISC_CB_PRIV_LEN bytes.
138 * As a flow key might be too large, we store a part of it only.
139 */
140#define CHOKE_K_LEN min_t(u32, sizeof(struct flow_keys), QDISC_CB_PRIV_LEN - 3)
141
142struct choke_skb_cb { 136struct choke_skb_cb {
143 u16 classid; 137 u16 classid;
144 u8 keys_valid; 138 u8 keys_valid;
145 u8 keys[QDISC_CB_PRIV_LEN - 3]; 139 struct flow_keys_digest keys;
146}; 140};
147 141
148static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) 142static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
@@ -176,19 +170,19 @@ static bool choke_match_flow(struct sk_buff *skb1,
176 170
177 if (!choke_skb_cb(skb1)->keys_valid) { 171 if (!choke_skb_cb(skb1)->keys_valid) {
178 choke_skb_cb(skb1)->keys_valid = 1; 172 choke_skb_cb(skb1)->keys_valid = 1;
179 skb_flow_dissect(skb1, &temp); 173 skb_flow_dissect_flow_keys(skb1, &temp);
180 memcpy(&choke_skb_cb(skb1)->keys, &temp, CHOKE_K_LEN); 174 make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
181 } 175 }
182 176
183 if (!choke_skb_cb(skb2)->keys_valid) { 177 if (!choke_skb_cb(skb2)->keys_valid) {
184 choke_skb_cb(skb2)->keys_valid = 1; 178 choke_skb_cb(skb2)->keys_valid = 1;
185 skb_flow_dissect(skb2, &temp); 179 skb_flow_dissect_flow_keys(skb2, &temp);
186 memcpy(&choke_skb_cb(skb2)->keys, &temp, CHOKE_K_LEN); 180 make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
187 } 181 }
188 182
189 return !memcmp(&choke_skb_cb(skb1)->keys, 183 return !memcmp(&choke_skb_cb(skb1)->keys,
190 &choke_skb_cb(skb2)->keys, 184 &choke_skb_cb(skb2)->keys,
191 CHOKE_K_LEN); 185 sizeof(choke_skb_cb(skb1)->keys));
192} 186}
193 187
194/* 188/*
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 7a0bdb16ac92..535007d5f0b5 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -6,7 +6,7 @@
6 * 6 *
7 * Implemented on linux by : 7 * Implemented on linux by :
8 * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> 8 * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
9 * Copyright (C) 2012 Eric Dumazet <edumazet@google.com> 9 * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
@@ -109,6 +109,7 @@ static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
109 [TCA_CODEL_LIMIT] = { .type = NLA_U32 }, 109 [TCA_CODEL_LIMIT] = { .type = NLA_U32 },
110 [TCA_CODEL_INTERVAL] = { .type = NLA_U32 }, 110 [TCA_CODEL_INTERVAL] = { .type = NLA_U32 },
111 [TCA_CODEL_ECN] = { .type = NLA_U32 }, 111 [TCA_CODEL_ECN] = { .type = NLA_U32 },
112 [TCA_CODEL_CE_THRESHOLD]= { .type = NLA_U32 },
112}; 113};
113 114
114static int codel_change(struct Qdisc *sch, struct nlattr *opt) 115static int codel_change(struct Qdisc *sch, struct nlattr *opt)
@@ -133,6 +134,12 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
133 q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT; 134 q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
134 } 135 }
135 136
137 if (tb[TCA_CODEL_CE_THRESHOLD]) {
138 u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
139
140 q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
141 }
142
136 if (tb[TCA_CODEL_INTERVAL]) { 143 if (tb[TCA_CODEL_INTERVAL]) {
137 u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]); 144 u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
138 145
@@ -201,7 +208,10 @@ static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
201 nla_put_u32(skb, TCA_CODEL_ECN, 208 nla_put_u32(skb, TCA_CODEL_ECN,
202 q->params.ecn)) 209 q->params.ecn))
203 goto nla_put_failure; 210 goto nla_put_failure;
204 211 if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD &&
212 nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD,
213 codel_time_to_us(q->params.ce_threshold)))
214 goto nla_put_failure;
205 return nla_nest_end(skb, opts); 215 return nla_nest_end(skb, opts);
206 216
207nla_put_failure: 217nla_put_failure:
@@ -220,6 +230,7 @@ static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
220 .ldelay = codel_time_to_us(q->vars.ldelay), 230 .ldelay = codel_time_to_us(q->vars.ldelay),
221 .dropping = q->vars.dropping, 231 .dropping = q->vars.dropping,
222 .ecn_mark = q->stats.ecn_mark, 232 .ecn_mark = q->stats.ecn_mark,
233 .ce_mark = q->stats.ce_mark,
223 }; 234 };
224 235
225 if (q->vars.dropping) { 236 if (q->vars.dropping) {
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index c244c45b78d7..d75993f89fac 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -6,7 +6,7 @@
6 * as published by the Free Software Foundation; either version 6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version. 7 * 2 of the License, or (at your option) any later version.
8 * 8 *
9 * Copyright (C) 2012 Eric Dumazet <edumazet@google.com> 9 * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
10 */ 10 */
11 11
12#include <linux/module.h> 12#include <linux/module.h>
@@ -23,7 +23,6 @@
23#include <linux/vmalloc.h> 23#include <linux/vmalloc.h>
24#include <net/netlink.h> 24#include <net/netlink.h>
25#include <net/pkt_sched.h> 25#include <net/pkt_sched.h>
26#include <net/flow_keys.h>
27#include <net/codel.h> 26#include <net/codel.h>
28 27
29/* Fair Queue CoDel. 28/* Fair Queue CoDel.
@@ -68,15 +67,9 @@ struct fq_codel_sched_data {
68}; 67};
69 68
70static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q, 69static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
71 const struct sk_buff *skb) 70 struct sk_buff *skb)
72{ 71{
73 struct flow_keys keys; 72 u32 hash = skb_get_hash_perturb(skb, q->perturbation);
74 unsigned int hash;
75
76 skb_flow_dissect(skb, &keys);
77 hash = jhash_3words((__force u32)keys.dst,
78 (__force u32)keys.src ^ keys.ip_proto,
79 (__force u32)keys.ports, q->perturbation);
80 73
81 return reciprocal_scale(hash, q->flows_cnt); 74 return reciprocal_scale(hash, q->flows_cnt);
82} 75}
@@ -299,6 +292,7 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
299 [TCA_FQ_CODEL_ECN] = { .type = NLA_U32 }, 292 [TCA_FQ_CODEL_ECN] = { .type = NLA_U32 },
300 [TCA_FQ_CODEL_FLOWS] = { .type = NLA_U32 }, 293 [TCA_FQ_CODEL_FLOWS] = { .type = NLA_U32 },
301 [TCA_FQ_CODEL_QUANTUM] = { .type = NLA_U32 }, 294 [TCA_FQ_CODEL_QUANTUM] = { .type = NLA_U32 },
295 [TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 },
302}; 296};
303 297
304static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) 298static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
@@ -329,6 +323,12 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
329 q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT; 323 q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT;
330 } 324 }
331 325
326 if (tb[TCA_FQ_CODEL_CE_THRESHOLD]) {
327 u64 val = nla_get_u32(tb[TCA_FQ_CODEL_CE_THRESHOLD]);
328
329 q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
330 }
331
332 if (tb[TCA_FQ_CODEL_INTERVAL]) { 332 if (tb[TCA_FQ_CODEL_INTERVAL]) {
333 u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]); 333 u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]);
334 334
@@ -448,6 +448,11 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
448 q->flows_cnt)) 448 q->flows_cnt))
449 goto nla_put_failure; 449 goto nla_put_failure;
450 450
451 if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD &&
452 nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD,
453 codel_time_to_us(q->cparams.ce_threshold)))
454 goto nla_put_failure;
455
451 return nla_nest_end(skb, opts); 456 return nla_nest_end(skb, opts);
452 457
453nla_put_failure: 458nla_put_failure:
@@ -466,6 +471,7 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
466 st.qdisc_stats.drop_overlimit = q->drop_overlimit; 471 st.qdisc_stats.drop_overlimit = q->drop_overlimit;
467 st.qdisc_stats.ecn_mark = q->cstats.ecn_mark; 472 st.qdisc_stats.ecn_mark = q->cstats.ecn_mark;
468 st.qdisc_stats.new_flow_count = q->new_flow_count; 473 st.qdisc_stats.new_flow_count = q->new_flow_count;
474 st.qdisc_stats.ce_mark = q->cstats.ce_mark;
469 475
470 list_for_each(pos, &q->new_flows) 476 list_for_each(pos, &q->new_flows)
471 st.qdisc_stats.new_flows_len++; 477 st.qdisc_stats.new_flows_len++;
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 634529e0ce6b..abb9f2fec28f 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -165,7 +165,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
165 * if no default DP has been configured. This 165 * if no default DP has been configured. This
166 * allows for DP flows to be left untouched. 166 * allows for DP flows to be left untouched.
167 */ 167 */
168 if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len) 168 if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <=
169 sch->limit))
169 return qdisc_enqueue_tail(skb, sch); 170 return qdisc_enqueue_tail(skb, sch);
170 else 171 else
171 goto drop; 172 goto drop;
@@ -397,7 +398,10 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
397 398
398 q->DP = dp; 399 q->DP = dp;
399 q->prio = prio; 400 q->prio = prio;
400 q->limit = ctl->limit; 401 if (ctl->limit > sch->limit)
402 q->limit = sch->limit;
403 else
404 q->limit = ctl->limit;
401 405
402 if (q->backlog == 0) 406 if (q->backlog == 0)
403 red_end_of_idle_period(&q->vars); 407 red_end_of_idle_period(&q->vars);
@@ -414,6 +418,7 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
414 [TCA_GRED_STAB] = { .len = 256 }, 418 [TCA_GRED_STAB] = { .len = 256 },
415 [TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) }, 419 [TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) },
416 [TCA_GRED_MAX_P] = { .type = NLA_U32 }, 420 [TCA_GRED_MAX_P] = { .type = NLA_U32 },
421 [TCA_GRED_LIMIT] = { .type = NLA_U32 },
417}; 422};
418 423
419static int gred_change(struct Qdisc *sch, struct nlattr *opt) 424static int gred_change(struct Qdisc *sch, struct nlattr *opt)
@@ -433,11 +438,15 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
433 if (err < 0) 438 if (err < 0)
434 return err; 439 return err;
435 440
436 if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) 441 if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) {
442 if (tb[TCA_GRED_LIMIT] != NULL)
443 sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
437 return gred_change_table_def(sch, opt); 444 return gred_change_table_def(sch, opt);
445 }
438 446
439 if (tb[TCA_GRED_PARMS] == NULL || 447 if (tb[TCA_GRED_PARMS] == NULL ||
440 tb[TCA_GRED_STAB] == NULL) 448 tb[TCA_GRED_STAB] == NULL ||
449 tb[TCA_GRED_LIMIT] != NULL)
441 return -EINVAL; 450 return -EINVAL;
442 451
443 max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0; 452 max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
@@ -501,6 +510,14 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt)
501 if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) 510 if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB])
502 return -EINVAL; 511 return -EINVAL;
503 512
513 if (tb[TCA_GRED_LIMIT])
514 sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
515 else {
516 u32 qlen = qdisc_dev(sch)->tx_queue_len ? : 1;
517
518 sch->limit = qlen * psched_mtu(qdisc_dev(sch));
519 }
520
504 return gred_change_table_def(sch, tb[TCA_GRED_DPS]); 521 return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
505} 522}
506 523
@@ -531,6 +548,9 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
531 if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p)) 548 if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p))
532 goto nla_put_failure; 549 goto nla_put_failure;
533 550
551 if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit))
552 goto nla_put_failure;
553
534 parms = nla_nest_start(skb, TCA_GRED_PARMS); 554 parms = nla_nest_start(skb, TCA_GRED_PARMS);
535 if (parms == NULL) 555 if (parms == NULL)
536 goto nla_put_failure; 556 goto nla_put_failure;
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 15d3aabfe250..9d15cb6b8cb1 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -9,7 +9,6 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/skbuff.h> 10#include <linux/skbuff.h>
11#include <linux/vmalloc.h> 11#include <linux/vmalloc.h>
12#include <net/flow_keys.h>
13#include <net/pkt_sched.h> 12#include <net/pkt_sched.h>
14#include <net/sock.h> 13#include <net/sock.h>
15 14
@@ -176,22 +175,6 @@ static u32 hhf_time_stamp(void)
176 return jiffies; 175 return jiffies;
177} 176}
178 177
179static unsigned int skb_hash(const struct hhf_sched_data *q,
180 const struct sk_buff *skb)
181{
182 struct flow_keys keys;
183 unsigned int hash;
184
185 if (skb->sk && skb->sk->sk_hash)
186 return skb->sk->sk_hash;
187
188 skb_flow_dissect(skb, &keys);
189 hash = jhash_3words((__force u32)keys.dst,
190 (__force u32)keys.src ^ keys.ip_proto,
191 (__force u32)keys.ports, q->perturbation);
192 return hash;
193}
194
195/* Looks up a heavy-hitter flow in a chaining list of table T. */ 178/* Looks up a heavy-hitter flow in a chaining list of table T. */
196static struct hh_flow_state *seek_list(const u32 hash, 179static struct hh_flow_state *seek_list(const u32 hash,
197 struct list_head *head, 180 struct list_head *head,
@@ -280,7 +263,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch)
280 } 263 }
281 264
282 /* Get hashed flow-id of the skb. */ 265 /* Get hashed flow-id of the skb. */
283 hash = skb_hash(q, skb); 266 hash = skb_get_hash_perturb(skb, q->perturbation);
284 267
285 /* Check if this packet belongs to an already established HH flow. */ 268 /* Check if this packet belongs to an already established HH flow. */
286 flow_pos = hash & HHF_BIT_MASK; 269 flow_pos = hash & HHF_BIT_MASK;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 4cdbfb85686a..e7c648fa9dc3 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -12,16 +12,10 @@
12#include <linux/list.h> 12#include <linux/list.h>
13#include <linux/skbuff.h> 13#include <linux/skbuff.h>
14#include <linux/rtnetlink.h> 14#include <linux/rtnetlink.h>
15
15#include <net/netlink.h> 16#include <net/netlink.h>
16#include <net/pkt_sched.h> 17#include <net/pkt_sched.h>
17 18
18
19struct ingress_qdisc_data {
20 struct tcf_proto __rcu *filter_list;
21};
22
23/* ------------------------- Class/flow operations ------------------------- */
24
25static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) 19static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
26{ 20{
27 return NULL; 21 return NULL;
@@ -49,57 +43,24 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
49static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch, 43static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch,
50 unsigned long cl) 44 unsigned long cl)
51{ 45{
52 struct ingress_qdisc_data *p = qdisc_priv(sch); 46 struct net_device *dev = qdisc_dev(sch);
53
54 return &p->filter_list;
55}
56
57/* --------------------------- Qdisc operations ---------------------------- */
58 47
59static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch) 48 return &dev->ingress_cl_list;
60{
61 struct ingress_qdisc_data *p = qdisc_priv(sch);
62 struct tcf_result res;
63 struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
64 int result;
65
66 result = tc_classify(skb, fl, &res);
67
68 qdisc_bstats_update(sch, skb);
69 switch (result) {
70 case TC_ACT_SHOT:
71 result = TC_ACT_SHOT;
72 qdisc_qstats_drop(sch);
73 break;
74 case TC_ACT_STOLEN:
75 case TC_ACT_QUEUED:
76 result = TC_ACT_STOLEN;
77 break;
78 case TC_ACT_RECLASSIFY:
79 case TC_ACT_OK:
80 skb->tc_index = TC_H_MIN(res.classid);
81 default:
82 result = TC_ACT_OK;
83 break;
84 }
85
86 return result;
87} 49}
88 50
89/* ------------------------------------------------------------- */
90
91static int ingress_init(struct Qdisc *sch, struct nlattr *opt) 51static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
92{ 52{
93 net_inc_ingress_queue(); 53 net_inc_ingress_queue();
54 sch->flags |= TCQ_F_CPUSTATS;
94 55
95 return 0; 56 return 0;
96} 57}
97 58
98static void ingress_destroy(struct Qdisc *sch) 59static void ingress_destroy(struct Qdisc *sch)
99{ 60{
100 struct ingress_qdisc_data *p = qdisc_priv(sch); 61 struct net_device *dev = qdisc_dev(sch);
101 62
102 tcf_destroy_chain(&p->filter_list); 63 tcf_destroy_chain(&dev->ingress_cl_list);
103 net_dec_ingress_queue(); 64 net_dec_ingress_queue();
104} 65}
105 66
@@ -110,6 +71,7 @@ static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
110 nest = nla_nest_start(skb, TCA_OPTIONS); 71 nest = nla_nest_start(skb, TCA_OPTIONS);
111 if (nest == NULL) 72 if (nest == NULL)
112 goto nla_put_failure; 73 goto nla_put_failure;
74
113 return nla_nest_end(skb, nest); 75 return nla_nest_end(skb, nest);
114 76
115nla_put_failure: 77nla_put_failure:
@@ -130,8 +92,6 @@ static const struct Qdisc_class_ops ingress_class_ops = {
130static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { 92static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
131 .cl_ops = &ingress_class_ops, 93 .cl_ops = &ingress_class_ops,
132 .id = "ingress", 94 .id = "ingress",
133 .priv_size = sizeof(struct ingress_qdisc_data),
134 .enqueue = ingress_enqueue,
135 .init = ingress_init, 95 .init = ingress_init,
136 .destroy = ingress_destroy, 96 .destroy = ingress_destroy,
137 .dump = ingress_dump, 97 .dump = ingress_dump,
@@ -148,6 +108,7 @@ static void __exit ingress_module_exit(void)
148 unregister_qdisc(&ingress_qdisc_ops); 108 unregister_qdisc(&ingress_qdisc_ops);
149} 109}
150 110
151module_init(ingress_module_init) 111module_init(ingress_module_init);
152module_exit(ingress_module_exit) 112module_exit(ingress_module_exit);
113
153MODULE_LICENSE("GPL"); 114MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 956ead2cab9a..5abd1d9de989 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -440,9 +440,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
440 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { 440 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
441 struct Qdisc *rootq = qdisc_root(sch); 441 struct Qdisc *rootq = qdisc_root(sch);
442 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ 442 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
443 q->duplicate = 0;
444 443
445 qdisc_enqueue_root(skb2, rootq); 444 q->duplicate = 0;
445 rootq->enqueue(skb2, rootq);
446 q->duplicate = dupsave; 446 q->duplicate = dupsave;
447 } 447 }
448 448
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 3ec7e88a43ca..b8d73bca683c 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -339,8 +339,7 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *);
339 339
340static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg) 340static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
341{ 341{
342 if (!hlist_unhashed(&agg->nonfull_next)) 342 hlist_del_init(&agg->nonfull_next);
343 hlist_del_init(&agg->nonfull_next);
344 q->wsum -= agg->class_weight; 343 q->wsum -= agg->class_weight;
345 if (q->wsum != 0) 344 if (q->wsum != 0)
346 q->iwsum = ONE_FP / q->wsum; 345 q->iwsum = ONE_FP / q->wsum;
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 5819dd82630d..4b815193326c 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -26,7 +26,6 @@
26#include <net/ip.h> 26#include <net/ip.h>
27#include <net/pkt_sched.h> 27#include <net/pkt_sched.h>
28#include <net/inet_ecn.h> 28#include <net/inet_ecn.h>
29#include <net/flow_keys.h>
30 29
31/* 30/*
32 * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level) 31 * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level)
@@ -285,9 +284,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
285 int i; 284 int i;
286 u32 p_min = ~0; 285 u32 p_min = ~0;
287 u32 minqlen = ~0; 286 u32 minqlen = ~0;
288 u32 r, slot, salt, sfbhash; 287 u32 r, sfbhash;
288 u32 slot = q->slot;
289 int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 289 int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
290 struct flow_keys keys;
291 290
292 if (unlikely(sch->q.qlen >= q->limit)) { 291 if (unlikely(sch->q.qlen >= q->limit)) {
293 qdisc_qstats_overlimit(sch); 292 qdisc_qstats_overlimit(sch);
@@ -309,22 +308,17 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
309 308
310 fl = rcu_dereference_bh(q->filter_list); 309 fl = rcu_dereference_bh(q->filter_list);
311 if (fl) { 310 if (fl) {
311 u32 salt;
312
312 /* If using external classifiers, get result and record it. */ 313 /* If using external classifiers, get result and record it. */
313 if (!sfb_classify(skb, fl, &ret, &salt)) 314 if (!sfb_classify(skb, fl, &ret, &salt))
314 goto other_drop; 315 goto other_drop;
315 keys.src = salt; 316 sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
316 keys.dst = 0;
317 keys.ports = 0;
318 } else { 317 } else {
319 skb_flow_dissect(skb, &keys); 318 sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation);
320 } 319 }
321 320
322 slot = q->slot;
323 321
324 sfbhash = jhash_3words((__force u32)keys.dst,
325 (__force u32)keys.src,
326 (__force u32)keys.ports,
327 q->bins[slot].perturbation);
328 if (!sfbhash) 322 if (!sfbhash)
329 sfbhash = 1; 323 sfbhash = 1;
330 sfb_skb_cb(skb)->hashes[slot] = sfbhash; 324 sfb_skb_cb(skb)->hashes[slot] = sfbhash;
@@ -356,10 +350,8 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
356 if (unlikely(p_min >= SFB_MAX_PROB)) { 350 if (unlikely(p_min >= SFB_MAX_PROB)) {
357 /* Inelastic flow */ 351 /* Inelastic flow */
358 if (q->double_buffering) { 352 if (q->double_buffering) {
359 sfbhash = jhash_3words((__force u32)keys.dst, 353 sfbhash = skb_get_hash_perturb(skb,
360 (__force u32)keys.src, 354 q->bins[slot].perturbation);
361 (__force u32)keys.ports,
362 q->bins[slot].perturbation);
363 if (!sfbhash) 355 if (!sfbhash)
364 sfbhash = 1; 356 sfbhash = 1;
365 sfb_skb_cb(skb)->hashes[slot] = sfbhash; 357 sfb_skb_cb(skb)->hashes[slot] = sfbhash;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index b877140beda5..7d1492663360 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -23,7 +23,6 @@
23#include <linux/vmalloc.h> 23#include <linux/vmalloc.h>
24#include <net/netlink.h> 24#include <net/netlink.h>
25#include <net/pkt_sched.h> 25#include <net/pkt_sched.h>
26#include <net/flow_keys.h>
27#include <net/red.h> 26#include <net/red.h>
28 27
29 28
@@ -156,30 +155,10 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
156 return &q->dep[val - SFQ_MAX_FLOWS]; 155 return &q->dep[val - SFQ_MAX_FLOWS];
157} 156}
158 157
159/*
160 * In order to be able to quickly rehash our queue when timer changes
161 * q->perturbation, we store flow_keys in skb->cb[]
162 */
163struct sfq_skb_cb {
164 struct flow_keys keys;
165};
166
167static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
168{
169 qdisc_cb_private_validate(skb, sizeof(struct sfq_skb_cb));
170 return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
171}
172
173static unsigned int sfq_hash(const struct sfq_sched_data *q, 158static unsigned int sfq_hash(const struct sfq_sched_data *q,
174 const struct sk_buff *skb) 159 const struct sk_buff *skb)
175{ 160{
176 const struct flow_keys *keys = &sfq_skb_cb(skb)->keys; 161 return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1);
177 unsigned int hash;
178
179 hash = jhash_3words((__force u32)keys->dst,
180 (__force u32)keys->src ^ keys->ip_proto,
181 (__force u32)keys->ports, q->perturbation);
182 return hash & (q->divisor - 1);
183} 162}
184 163
185static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, 164static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
@@ -196,10 +175,8 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
196 return TC_H_MIN(skb->priority); 175 return TC_H_MIN(skb->priority);
197 176
198 fl = rcu_dereference_bh(q->filter_list); 177 fl = rcu_dereference_bh(q->filter_list);
199 if (!fl) { 178 if (!fl)
200 skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
201 return sfq_hash(q, skb) + 1; 179 return sfq_hash(q, skb) + 1;
202 }
203 180
204 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 181 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
205 result = tc_classify(skb, fl, &res); 182 result = tc_classify(skb, fl, &res);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0e4198ee2370..e917d27328ea 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -331,8 +331,9 @@ out:
331 331
332 rt = (struct rt6_info *)dst; 332 rt = (struct rt6_info *)dst;
333 t->dst = dst; 333 t->dst = dst;
334 t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; 334 t->dst_cookie = rt6_get_cookie(rt);
335 pr_debug("rt6_dst:%pI6 rt6_src:%pI6\n", &rt->rt6i_dst.addr, 335 pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n",
336 &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
336 &fl6->saddr); 337 &fl6->saddr);
337 } else { 338 } else {
338 t->dst = NULL; 339 t->dst = NULL;
@@ -635,7 +636,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
635 struct ipv6_pinfo *newnp, *np = inet6_sk(sk); 636 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
636 struct sctp6_sock *newsctp6sk; 637 struct sctp6_sock *newsctp6sk;
637 638
638 newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot); 639 newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0);
639 if (!newsk) 640 if (!newsk)
640 goto out; 641 goto out;
641 642
diff --git a/net/sctp/output.c b/net/sctp/output.c
index fc5e45b8a832..abe7c2db2412 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -599,7 +599,9 @@ out:
599 return err; 599 return err;
600no_route: 600no_route:
601 kfree_skb(nskb); 601 kfree_skb(nskb);
602 IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); 602
603 if (asoc)
604 IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
603 605
604 /* FIXME: Returning the 'err' will effect all the associations 606 /* FIXME: Returning the 'err' will effect all the associations
605 * associated with a socket, although only one of the paths of the 607 * associated with a socket, although only one of the paths of the
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 53b7acde9aa3..59e80356672b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -550,7 +550,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
550 struct sctp_association *asoc) 550 struct sctp_association *asoc)
551{ 551{
552 struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL, 552 struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL,
553 sk->sk_prot); 553 sk->sk_prot, 0);
554 struct inet_sock *newinet; 554 struct inet_sock *newinet;
555 555
556 if (!newsk) 556 if (!newsk)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f09de7fac2e6..1425ec2bbd5a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1528,8 +1528,10 @@ static void sctp_close(struct sock *sk, long timeout)
1528 1528
1529 /* Supposedly, no process has access to the socket, but 1529 /* Supposedly, no process has access to the socket, but
1530 * the net layers still may. 1530 * the net layers still may.
1531 * Also, sctp_destroy_sock() needs to be called with addr_wq_lock
1532 * held and that should be grabbed before socket lock.
1531 */ 1533 */
1532 local_bh_disable(); 1534 spin_lock_bh(&net->sctp.addr_wq_lock);
1533 bh_lock_sock(sk); 1535 bh_lock_sock(sk);
1534 1536
1535 /* Hold the sock, since sk_common_release() will put sock_put() 1537 /* Hold the sock, since sk_common_release() will put sock_put()
@@ -1539,7 +1541,7 @@ static void sctp_close(struct sock *sk, long timeout)
1539 sk_common_release(sk); 1541 sk_common_release(sk);
1540 1542
1541 bh_unlock_sock(sk); 1543 bh_unlock_sock(sk);
1542 local_bh_enable(); 1544 spin_unlock_bh(&net->sctp.addr_wq_lock);
1543 1545
1544 sock_put(sk); 1546 sock_put(sk);
1545 1547
@@ -2119,12 +2121,6 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
2119 if (sp->subscribe.sctp_data_io_event) 2121 if (sp->subscribe.sctp_data_io_event)
2120 sctp_ulpevent_read_sndrcvinfo(event, msg); 2122 sctp_ulpevent_read_sndrcvinfo(event, msg);
2121 2123
2122#if 0
2123 /* FIXME: we should be calling IP/IPv6 layers. */
2124 if (sk->sk_protinfo.af_inet.cmsg_flags)
2125 ip_cmsg_recv(msg, skb);
2126#endif
2127
2128 err = copied; 2124 err = copied;
2129 2125
2130 /* If skb's length exceeds the user's buffer, update the skb and 2126 /* If skb's length exceeds the user's buffer, update the skb and
@@ -3580,6 +3576,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval,
3580 if ((val && sp->do_auto_asconf) || (!val && !sp->do_auto_asconf)) 3576 if ((val && sp->do_auto_asconf) || (!val && !sp->do_auto_asconf))
3581 return 0; 3577 return 0;
3582 3578
3579 spin_lock_bh(&sock_net(sk)->sctp.addr_wq_lock);
3583 if (val == 0 && sp->do_auto_asconf) { 3580 if (val == 0 && sp->do_auto_asconf) {
3584 list_del(&sp->auto_asconf_list); 3581 list_del(&sp->auto_asconf_list);
3585 sp->do_auto_asconf = 0; 3582 sp->do_auto_asconf = 0;
@@ -3588,6 +3585,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval,
3588 &sock_net(sk)->sctp.auto_asconf_splist); 3585 &sock_net(sk)->sctp.auto_asconf_splist);
3589 sp->do_auto_asconf = 1; 3586 sp->do_auto_asconf = 1;
3590 } 3587 }
3588 spin_unlock_bh(&sock_net(sk)->sctp.addr_wq_lock);
3591 return 0; 3589 return 0;
3592} 3590}
3593 3591
@@ -4121,18 +4119,28 @@ static int sctp_init_sock(struct sock *sk)
4121 local_bh_disable(); 4119 local_bh_disable();
4122 percpu_counter_inc(&sctp_sockets_allocated); 4120 percpu_counter_inc(&sctp_sockets_allocated);
4123 sock_prot_inuse_add(net, sk->sk_prot, 1); 4121 sock_prot_inuse_add(net, sk->sk_prot, 1);
4122
4123 /* Nothing can fail after this block, otherwise
4124 * sctp_destroy_sock() will be called without addr_wq_lock held
4125 */
4124 if (net->sctp.default_auto_asconf) { 4126 if (net->sctp.default_auto_asconf) {
4127 spin_lock(&sock_net(sk)->sctp.addr_wq_lock);
4125 list_add_tail(&sp->auto_asconf_list, 4128 list_add_tail(&sp->auto_asconf_list,
4126 &net->sctp.auto_asconf_splist); 4129 &net->sctp.auto_asconf_splist);
4127 sp->do_auto_asconf = 1; 4130 sp->do_auto_asconf = 1;
4128 } else 4131 spin_unlock(&sock_net(sk)->sctp.addr_wq_lock);
4132 } else {
4129 sp->do_auto_asconf = 0; 4133 sp->do_auto_asconf = 0;
4134 }
4135
4130 local_bh_enable(); 4136 local_bh_enable();
4131 4137
4132 return 0; 4138 return 0;
4133} 4139}
4134 4140
4135/* Cleanup any SCTP per socket resources. */ 4141/* Cleanup any SCTP per socket resources. Must be called with
4142 * sock_net(sk)->sctp.addr_wq_lock held if sp->do_auto_asconf is true
4143 */
4136static void sctp_destroy_sock(struct sock *sk) 4144static void sctp_destroy_sock(struct sock *sk)
4137{ 4145{
4138 struct sctp_sock *sp; 4146 struct sctp_sock *sp;
@@ -7195,6 +7203,19 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
7195 newinet->mc_list = NULL; 7203 newinet->mc_list = NULL;
7196} 7204}
7197 7205
7206static inline void sctp_copy_descendant(struct sock *sk_to,
7207 const struct sock *sk_from)
7208{
7209 int ancestor_size = sizeof(struct inet_sock) +
7210 sizeof(struct sctp_sock) -
7211 offsetof(struct sctp_sock, auto_asconf_list);
7212
7213 if (sk_from->sk_family == PF_INET6)
7214 ancestor_size += sizeof(struct ipv6_pinfo);
7215
7216 __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size);
7217}
7218
7198/* Populate the fields of the newsk from the oldsk and migrate the assoc 7219/* Populate the fields of the newsk from the oldsk and migrate the assoc
7199 * and its messages to the newsk. 7220 * and its messages to the newsk.
7200 */ 7221 */
@@ -7209,7 +7230,6 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
7209 struct sk_buff *skb, *tmp; 7230 struct sk_buff *skb, *tmp;
7210 struct sctp_ulpevent *event; 7231 struct sctp_ulpevent *event;
7211 struct sctp_bind_hashbucket *head; 7232 struct sctp_bind_hashbucket *head;
7212 struct list_head tmplist;
7213 7233
7214 /* Migrate socket buffer sizes and all the socket level options to the 7234 /* Migrate socket buffer sizes and all the socket level options to the
7215 * new socket. 7235 * new socket.
@@ -7217,12 +7237,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
7217 newsk->sk_sndbuf = oldsk->sk_sndbuf; 7237 newsk->sk_sndbuf = oldsk->sk_sndbuf;
7218 newsk->sk_rcvbuf = oldsk->sk_rcvbuf; 7238 newsk->sk_rcvbuf = oldsk->sk_rcvbuf;
7219 /* Brute force copy old sctp opt. */ 7239 /* Brute force copy old sctp opt. */
7220 if (oldsp->do_auto_asconf) { 7240 sctp_copy_descendant(newsk, oldsk);
7221 memcpy(&tmplist, &newsp->auto_asconf_list, sizeof(tmplist));
7222 inet_sk_copy_descendant(newsk, oldsk);
7223 memcpy(&newsp->auto_asconf_list, &tmplist, sizeof(tmplist));
7224 } else
7225 inet_sk_copy_descendant(newsk, oldsk);
7226 7241
7227 /* Restore the ep value that was overwritten with the above structure 7242 /* Restore the ep value that was overwritten with the above structure
7228 * copy. 7243 * copy.
diff --git a/net/socket.c b/net/socket.c
index 884e32997698..9963a0b53a64 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -576,9 +576,6 @@ void sock_release(struct socket *sock)
576 if (rcu_dereference_protected(sock->wq, 1)->fasync_list) 576 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
577 pr_err("%s: fasync list not empty!\n", __func__); 577 pr_err("%s: fasync list not empty!\n", __func__);
578 578
579 if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
580 return;
581
582 this_cpu_sub(sockets_in_use, 1); 579 this_cpu_sub(sockets_in_use, 1);
583 if (!sock->file) { 580 if (!sock->file) {
584 iput(SOCK_INODE(sock)); 581 iput(SOCK_INODE(sock));
@@ -1213,9 +1210,9 @@ int sock_create(int family, int type, int protocol, struct socket **res)
1213} 1210}
1214EXPORT_SYMBOL(sock_create); 1211EXPORT_SYMBOL(sock_create);
1215 1212
1216int sock_create_kern(int family, int type, int protocol, struct socket **res) 1213int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1217{ 1214{
1218 return __sock_create(&init_net, family, type, protocol, res, 1); 1215 return __sock_create(net, family, type, protocol, res, 1);
1219} 1216}
1220EXPORT_SYMBOL(sock_create_kern); 1217EXPORT_SYMBOL(sock_create_kern);
1221 1218
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 9068e72aa73c..04ce2c0b660e 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -48,28 +48,16 @@ config SUNRPC_DEBUG
48 48
49 If unsure, say Y. 49 If unsure, say Y.
50 50
51config SUNRPC_XPRT_RDMA_CLIENT 51config SUNRPC_XPRT_RDMA
52 tristate "RPC over RDMA Client Support" 52 tristate "RPC-over-RDMA transport"
53 depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS 53 depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
54 default SUNRPC && INFINIBAND 54 default SUNRPC && INFINIBAND
55 help 55 help
56 This option allows the NFS client to support an RDMA-enabled 56 This option allows the NFS client and server to use RDMA
57 transport. 57 transports (InfiniBand, iWARP, or RoCE).
58 58
59 To compile RPC client RDMA transport support as a module, 59 To compile this support as a module, choose M. The module
60 choose M here: the module will be called xprtrdma. 60 will be called rpcrdma.ko.
61 61
62 If unsure, say N. 62 If unsure, or you know there is no RDMA capability on your
63 63 hardware platform, say N.
64config SUNRPC_XPRT_RDMA_SERVER
65 tristate "RPC over RDMA Server Support"
66 depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
67 default SUNRPC && INFINIBAND
68 help
69 This option allows the NFS server to support an RDMA-enabled
70 transport.
71
72 To compile RPC server RDMA transport support as a module,
73 choose M here: the module will be called svcrdma.
74
75 If unsure, say N.
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 15e6f6c23c5d..b512fbd9d79a 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -5,8 +5,7 @@
5 5
6obj-$(CONFIG_SUNRPC) += sunrpc.o 6obj-$(CONFIG_SUNRPC) += sunrpc.o
7obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ 7obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
8 8obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
9obj-y += xprtrdma/
10 9
11sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ 10sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
12 auth.o auth_null.o auth_unix.o auth_generic.o \ 11 auth.o auth_null.o auth_unix.o auth_generic.o \
@@ -15,6 +14,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
15 sunrpc_syms.o cache.o rpc_pipe.o \ 14 sunrpc_syms.o cache.o rpc_pipe.o \
16 svc_xprt.o 15 svc_xprt.o
17sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o 16sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o
18sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o 17sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o
19sunrpc-$(CONFIG_PROC_FS) += stats.o 18sunrpc-$(CONFIG_PROC_FS) += stats.o
20sunrpc-$(CONFIG_SYSCTL) += sysctl.o 19sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 47f38be4155f..02f53674dc39 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -72,7 +72,7 @@ static int param_get_hashtbl_sz(char *buffer, const struct kernel_param *kp)
72 72
73#define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int); 73#define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int);
74 74
75static struct kernel_param_ops param_ops_hashtbl_sz = { 75static const struct kernel_param_ops param_ops_hashtbl_sz = {
76 .set = param_set_hashtbl_sz, 76 .set = param_set_hashtbl_sz,
77 .get = param_get_hashtbl_sz, 77 .get = param_get_hashtbl_sz,
78}; 78};
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index b5408e8a37f2..fee3c15a4b52 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -881,9 +881,7 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
881 if (err) 881 if (err)
882 goto out_err; 882 goto out_err;
883 883
884 sg_init_table(sg, 1); 884 sg_init_one(sg, &zeroconstant, 4);
885 sg_set_buf(sg, &zeroconstant, 4);
886
887 err = crypto_hash_digest(&desc, sg, 4, Kseq); 885 err = crypto_hash_digest(&desc, sg, 4, Kseq);
888 if (err) 886 if (err)
889 goto out_err; 887 goto out_err;
@@ -951,9 +949,7 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
951 if (err) 949 if (err)
952 goto out_err; 950 goto out_err;
953 951
954 sg_init_table(sg, 1); 952 sg_init_one(sg, zeroconstant, 4);
955 sg_set_buf(sg, zeroconstant, 4);
956
957 err = crypto_hash_digest(&desc, sg, 4, Kcrypt); 953 err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
958 if (err) 954 if (err)
959 goto out_err; 955 goto out_err;
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 9dd0ea8db463..9825ff0f91d6 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -37,16 +37,18 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 */ 37 */
38static inline int xprt_need_to_requeue(struct rpc_xprt *xprt) 38static inline int xprt_need_to_requeue(struct rpc_xprt *xprt)
39{ 39{
40 return xprt->bc_alloc_count > 0; 40 return xprt->bc_alloc_count < atomic_read(&xprt->bc_free_slots);
41} 41}
42 42
43static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n) 43static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n)
44{ 44{
45 atomic_add(n, &xprt->bc_free_slots);
45 xprt->bc_alloc_count += n; 46 xprt->bc_alloc_count += n;
46} 47}
47 48
48static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n) 49static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n)
49{ 50{
51 atomic_sub(n, &xprt->bc_free_slots);
50 return xprt->bc_alloc_count -= n; 52 return xprt->bc_alloc_count -= n;
51} 53}
52 54
@@ -60,13 +62,62 @@ static void xprt_free_allocation(struct rpc_rqst *req)
60 62
61 dprintk("RPC: free allocations for req= %p\n", req); 63 dprintk("RPC: free allocations for req= %p\n", req);
62 WARN_ON_ONCE(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); 64 WARN_ON_ONCE(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
63 xbufp = &req->rq_private_buf; 65 xbufp = &req->rq_rcv_buf;
64 free_page((unsigned long)xbufp->head[0].iov_base); 66 free_page((unsigned long)xbufp->head[0].iov_base);
65 xbufp = &req->rq_snd_buf; 67 xbufp = &req->rq_snd_buf;
66 free_page((unsigned long)xbufp->head[0].iov_base); 68 free_page((unsigned long)xbufp->head[0].iov_base);
67 kfree(req); 69 kfree(req);
68} 70}
69 71
72static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags)
73{
74 struct page *page;
75 /* Preallocate one XDR receive buffer */
76 page = alloc_page(gfp_flags);
77 if (page == NULL)
78 return -ENOMEM;
79 buf->head[0].iov_base = page_address(page);
80 buf->head[0].iov_len = PAGE_SIZE;
81 buf->tail[0].iov_base = NULL;
82 buf->tail[0].iov_len = 0;
83 buf->page_len = 0;
84 buf->len = 0;
85 buf->buflen = PAGE_SIZE;
86 return 0;
87}
88
89static
90struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags)
91{
92 struct rpc_rqst *req;
93
94 /* Pre-allocate one backchannel rpc_rqst */
95 req = kzalloc(sizeof(*req), gfp_flags);
96 if (req == NULL)
97 return NULL;
98
99 req->rq_xprt = xprt;
100 INIT_LIST_HEAD(&req->rq_list);
101 INIT_LIST_HEAD(&req->rq_bc_list);
102
103 /* Preallocate one XDR receive buffer */
104 if (xprt_alloc_xdr_buf(&req->rq_rcv_buf, gfp_flags) < 0) {
105 printk(KERN_ERR "Failed to create bc receive xbuf\n");
106 goto out_free;
107 }
108 req->rq_rcv_buf.len = PAGE_SIZE;
109
110 /* Preallocate one XDR send buffer */
111 if (xprt_alloc_xdr_buf(&req->rq_snd_buf, gfp_flags) < 0) {
112 printk(KERN_ERR "Failed to create bc snd xbuf\n");
113 goto out_free;
114 }
115 return req;
116out_free:
117 xprt_free_allocation(req);
118 return NULL;
119}
120
70/* 121/*
71 * Preallocate up to min_reqs structures and related buffers for use 122 * Preallocate up to min_reqs structures and related buffers for use
72 * by the backchannel. This function can be called multiple times 123 * by the backchannel. This function can be called multiple times
@@ -87,9 +138,7 @@ static void xprt_free_allocation(struct rpc_rqst *req)
87 */ 138 */
88int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) 139int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
89{ 140{
90 struct page *page_rcv = NULL, *page_snd = NULL; 141 struct rpc_rqst *req;
91 struct xdr_buf *xbufp = NULL;
92 struct rpc_rqst *req, *tmp;
93 struct list_head tmp_list; 142 struct list_head tmp_list;
94 int i; 143 int i;
95 144
@@ -106,7 +155,7 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
106 INIT_LIST_HEAD(&tmp_list); 155 INIT_LIST_HEAD(&tmp_list);
107 for (i = 0; i < min_reqs; i++) { 156 for (i = 0; i < min_reqs; i++) {
108 /* Pre-allocate one backchannel rpc_rqst */ 157 /* Pre-allocate one backchannel rpc_rqst */
109 req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); 158 req = xprt_alloc_bc_req(xprt, GFP_KERNEL);
110 if (req == NULL) { 159 if (req == NULL) {
111 printk(KERN_ERR "Failed to create bc rpc_rqst\n"); 160 printk(KERN_ERR "Failed to create bc rpc_rqst\n");
112 goto out_free; 161 goto out_free;
@@ -115,41 +164,6 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
115 /* Add the allocated buffer to the tmp list */ 164 /* Add the allocated buffer to the tmp list */
116 dprintk("RPC: adding req= %p\n", req); 165 dprintk("RPC: adding req= %p\n", req);
117 list_add(&req->rq_bc_pa_list, &tmp_list); 166 list_add(&req->rq_bc_pa_list, &tmp_list);
118
119 req->rq_xprt = xprt;
120 INIT_LIST_HEAD(&req->rq_list);
121 INIT_LIST_HEAD(&req->rq_bc_list);
122
123 /* Preallocate one XDR receive buffer */
124 page_rcv = alloc_page(GFP_KERNEL);
125 if (page_rcv == NULL) {
126 printk(KERN_ERR "Failed to create bc receive xbuf\n");
127 goto out_free;
128 }
129 xbufp = &req->rq_rcv_buf;
130 xbufp->head[0].iov_base = page_address(page_rcv);
131 xbufp->head[0].iov_len = PAGE_SIZE;
132 xbufp->tail[0].iov_base = NULL;
133 xbufp->tail[0].iov_len = 0;
134 xbufp->page_len = 0;
135 xbufp->len = PAGE_SIZE;
136 xbufp->buflen = PAGE_SIZE;
137
138 /* Preallocate one XDR send buffer */
139 page_snd = alloc_page(GFP_KERNEL);
140 if (page_snd == NULL) {
141 printk(KERN_ERR "Failed to create bc snd xbuf\n");
142 goto out_free;
143 }
144
145 xbufp = &req->rq_snd_buf;
146 xbufp->head[0].iov_base = page_address(page_snd);
147 xbufp->head[0].iov_len = 0;
148 xbufp->tail[0].iov_base = NULL;
149 xbufp->tail[0].iov_len = 0;
150 xbufp->page_len = 0;
151 xbufp->len = 0;
152 xbufp->buflen = PAGE_SIZE;
153 } 167 }
154 168
155 /* 169 /*
@@ -167,7 +181,10 @@ out_free:
167 /* 181 /*
168 * Memory allocation failed, free the temporary list 182 * Memory allocation failed, free the temporary list
169 */ 183 */
170 list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) { 184 while (!list_empty(&tmp_list)) {
185 req = list_first_entry(&tmp_list,
186 struct rpc_rqst,
187 rq_bc_pa_list);
171 list_del(&req->rq_bc_pa_list); 188 list_del(&req->rq_bc_pa_list);
172 xprt_free_allocation(req); 189 xprt_free_allocation(req);
173 } 190 }
@@ -217,9 +234,15 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
217 struct rpc_rqst *req = NULL; 234 struct rpc_rqst *req = NULL;
218 235
219 dprintk("RPC: allocate a backchannel request\n"); 236 dprintk("RPC: allocate a backchannel request\n");
220 if (list_empty(&xprt->bc_pa_list)) 237 if (atomic_read(&xprt->bc_free_slots) <= 0)
221 goto not_found; 238 goto not_found;
222 239 if (list_empty(&xprt->bc_pa_list)) {
240 req = xprt_alloc_bc_req(xprt, GFP_ATOMIC);
241 if (!req)
242 goto not_found;
243 /* Note: this 'free' request adds it to xprt->bc_pa_list */
244 xprt_free_bc_request(req);
245 }
223 req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, 246 req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
224 rq_bc_pa_list); 247 rq_bc_pa_list);
225 req->rq_reply_bytes_recvd = 0; 248 req->rq_reply_bytes_recvd = 0;
@@ -245,11 +268,21 @@ void xprt_free_bc_request(struct rpc_rqst *req)
245 268
246 req->rq_connect_cookie = xprt->connect_cookie - 1; 269 req->rq_connect_cookie = xprt->connect_cookie - 1;
247 smp_mb__before_atomic(); 270 smp_mb__before_atomic();
248 WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
249 clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); 271 clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
250 smp_mb__after_atomic(); 272 smp_mb__after_atomic();
251 273
252 if (!xprt_need_to_requeue(xprt)) { 274 /*
275 * Return it to the list of preallocations so that it
276 * may be reused by a new callback request.
277 */
278 spin_lock_bh(&xprt->bc_pa_lock);
279 if (xprt_need_to_requeue(xprt)) {
280 list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
281 xprt->bc_alloc_count++;
282 req = NULL;
283 }
284 spin_unlock_bh(&xprt->bc_pa_lock);
285 if (req != NULL) {
253 /* 286 /*
254 * The last remaining session was destroyed while this 287 * The last remaining session was destroyed while this
255 * entry was in use. Free the entry and don't attempt 288 * entry was in use. Free the entry and don't attempt
@@ -260,14 +293,6 @@ void xprt_free_bc_request(struct rpc_rqst *req)
260 xprt_free_allocation(req); 293 xprt_free_allocation(req);
261 return; 294 return;
262 } 295 }
263
264 /*
265 * Return it to the list of preallocations so that it
266 * may be reused by a new callback request.
267 */
268 spin_lock_bh(&xprt->bc_pa_lock);
269 list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
270 spin_unlock_bh(&xprt->bc_pa_lock);
271} 296}
272 297
273/* 298/*
@@ -311,6 +336,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
311 336
312 spin_lock(&xprt->bc_pa_lock); 337 spin_lock(&xprt->bc_pa_lock);
313 list_del(&req->rq_bc_pa_list); 338 list_del(&req->rq_bc_pa_list);
339 xprt->bc_alloc_count--;
314 spin_unlock(&xprt->bc_pa_lock); 340 spin_unlock(&xprt->bc_pa_lock);
315 341
316 req->rq_private_buf.len = copied; 342 req->rq_private_buf.len = copied;
diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
deleted file mode 100644
index 15c7a8a1c24f..000000000000
--- a/net/sunrpc/bc_svc.c
+++ /dev/null
@@ -1,63 +0,0 @@
1/******************************************************************************
2
3(c) 2007 Network Appliance, Inc. All Rights Reserved.
4(c) 2009 NetApp. All Rights Reserved.
5
6NetApp provides this source code under the GPL v2 License.
7The GPL v2 license is available at
8http://opensource.org/licenses/gpl-license.php.
9
10THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
11"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
12LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
13A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
14CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
15EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
17PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
18LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
19NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
20SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
21
22******************************************************************************/
23
24/*
25 * The NFSv4.1 callback service helper routines.
26 * They implement the transport level processing required to send the
27 * reply over an existing open connection previously established by the client.
28 */
29
30#include <linux/module.h>
31
32#include <linux/sunrpc/xprt.h>
33#include <linux/sunrpc/sched.h>
34#include <linux/sunrpc/bc_xprt.h>
35
36#define RPCDBG_FACILITY RPCDBG_SVCDSP
37
38/* Empty callback ops */
39static const struct rpc_call_ops nfs41_callback_ops = {
40};
41
42
43/*
44 * Send the callback reply
45 */
46int bc_send(struct rpc_rqst *req)
47{
48 struct rpc_task *task;
49 int ret;
50
51 dprintk("RPC: bc_send req= %p\n", req);
52 task = rpc_run_bc_task(req, &nfs41_callback_ops);
53 if (IS_ERR(task))
54 ret = PTR_ERR(task);
55 else {
56 WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
57 ret = task->tk_status;
58 rpc_put_task(task);
59 }
60 dprintk("RPC: bc_send ret= %d\n", ret);
61 return ret;
62}
63
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e6ce1517367f..cbc6af923dd1 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -891,15 +891,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
891 task->tk_flags |= RPC_TASK_SOFT; 891 task->tk_flags |= RPC_TASK_SOFT;
892 if (clnt->cl_noretranstimeo) 892 if (clnt->cl_noretranstimeo)
893 task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; 893 task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
894 if (sk_memalloc_socks()) { 894 if (atomic_read(&clnt->cl_swapper))
895 struct rpc_xprt *xprt; 895 task->tk_flags |= RPC_TASK_SWAPPER;
896
897 rcu_read_lock();
898 xprt = rcu_dereference(clnt->cl_xprt);
899 if (xprt->swapper)
900 task->tk_flags |= RPC_TASK_SWAPPER;
901 rcu_read_unlock();
902 }
903 /* Add to the client's list of all tasks */ 896 /* Add to the client's list of all tasks */
904 spin_lock(&clnt->cl_lock); 897 spin_lock(&clnt->cl_lock);
905 list_add_tail(&task->tk_task, &clnt->cl_tasks); 898 list_add_tail(&task->tk_task, &clnt->cl_tasks);
@@ -1031,15 +1024,14 @@ EXPORT_SYMBOL_GPL(rpc_call_async);
1031 * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run 1024 * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
1032 * rpc_execute against it 1025 * rpc_execute against it
1033 * @req: RPC request 1026 * @req: RPC request
1034 * @tk_ops: RPC call ops
1035 */ 1027 */
1036struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, 1028struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
1037 const struct rpc_call_ops *tk_ops)
1038{ 1029{
1039 struct rpc_task *task; 1030 struct rpc_task *task;
1040 struct xdr_buf *xbufp = &req->rq_snd_buf; 1031 struct xdr_buf *xbufp = &req->rq_snd_buf;
1041 struct rpc_task_setup task_setup_data = { 1032 struct rpc_task_setup task_setup_data = {
1042 .callback_ops = tk_ops, 1033 .callback_ops = &rpc_default_ops,
1034 .flags = RPC_TASK_SOFTCONN,
1043 }; 1035 };
1044 1036
1045 dprintk("RPC: rpc_run_bc_task req= %p\n", req); 1037 dprintk("RPC: rpc_run_bc_task req= %p\n", req);
@@ -1614,6 +1606,7 @@ call_allocate(struct rpc_task *task)
1614 req->rq_callsize + req->rq_rcvsize); 1606 req->rq_callsize + req->rq_rcvsize);
1615 if (req->rq_buffer != NULL) 1607 if (req->rq_buffer != NULL)
1616 return; 1608 return;
1609 xprt_inject_disconnect(xprt);
1617 1610
1618 dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); 1611 dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
1619 1612
@@ -1951,33 +1944,36 @@ call_bc_transmit(struct rpc_task *task)
1951{ 1944{
1952 struct rpc_rqst *req = task->tk_rqstp; 1945 struct rpc_rqst *req = task->tk_rqstp;
1953 1946
1954 if (!xprt_prepare_transmit(task)) { 1947 if (!xprt_prepare_transmit(task))
1955 /* 1948 goto out_retry;
1956 * Could not reserve the transport. Try again after the
1957 * transport is released.
1958 */
1959 task->tk_status = 0;
1960 task->tk_action = call_bc_transmit;
1961 return;
1962 }
1963 1949
1964 task->tk_action = rpc_exit_task;
1965 if (task->tk_status < 0) { 1950 if (task->tk_status < 0) {
1966 printk(KERN_NOTICE "RPC: Could not send backchannel reply " 1951 printk(KERN_NOTICE "RPC: Could not send backchannel reply "
1967 "error: %d\n", task->tk_status); 1952 "error: %d\n", task->tk_status);
1968 return; 1953 goto out_done;
1969 } 1954 }
1955 if (req->rq_connect_cookie != req->rq_xprt->connect_cookie)
1956 req->rq_bytes_sent = 0;
1970 1957
1971 xprt_transmit(task); 1958 xprt_transmit(task);
1959
1960 if (task->tk_status == -EAGAIN)
1961 goto out_nospace;
1962
1972 xprt_end_transmit(task); 1963 xprt_end_transmit(task);
1973 dprint_status(task); 1964 dprint_status(task);
1974 switch (task->tk_status) { 1965 switch (task->tk_status) {
1975 case 0: 1966 case 0:
1976 /* Success */ 1967 /* Success */
1977 break;
1978 case -EHOSTDOWN: 1968 case -EHOSTDOWN:
1979 case -EHOSTUNREACH: 1969 case -EHOSTUNREACH:
1980 case -ENETUNREACH: 1970 case -ENETUNREACH:
1971 case -ECONNRESET:
1972 case -ECONNREFUSED:
1973 case -EADDRINUSE:
1974 case -ENOTCONN:
1975 case -EPIPE:
1976 break;
1981 case -ETIMEDOUT: 1977 case -ETIMEDOUT:
1982 /* 1978 /*
1983 * Problem reaching the server. Disconnect and let the 1979 * Problem reaching the server. Disconnect and let the
@@ -2002,6 +1998,13 @@ call_bc_transmit(struct rpc_task *task)
2002 break; 1998 break;
2003 } 1999 }
2004 rpc_wake_up_queued_task(&req->rq_xprt->pending, task); 2000 rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
2001out_done:
2002 task->tk_action = rpc_exit_task;
2003 return;
2004out_nospace:
2005 req->rq_connect_cookie = req->rq_xprt->connect_cookie;
2006out_retry:
2007 task->tk_status = 0;
2005} 2008}
2006#endif /* CONFIG_SUNRPC_BACKCHANNEL */ 2009#endif /* CONFIG_SUNRPC_BACKCHANNEL */
2007 2010
@@ -2476,3 +2479,59 @@ void rpc_show_tasks(struct net *net)
2476 spin_unlock(&sn->rpc_client_lock); 2479 spin_unlock(&sn->rpc_client_lock);
2477} 2480}
2478#endif 2481#endif
2482
2483#if IS_ENABLED(CONFIG_SUNRPC_SWAP)
2484int
2485rpc_clnt_swap_activate(struct rpc_clnt *clnt)
2486{
2487 int ret = 0;
2488 struct rpc_xprt *xprt;
2489
2490 if (atomic_inc_return(&clnt->cl_swapper) == 1) {
2491retry:
2492 rcu_read_lock();
2493 xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
2494 rcu_read_unlock();
2495 if (!xprt) {
2496 /*
2497 * If we didn't get a reference, then we likely are
2498 * racing with a migration event. Wait for a grace
2499 * period and try again.
2500 */
2501 synchronize_rcu();
2502 goto retry;
2503 }
2504
2505 ret = xprt_enable_swap(xprt);
2506 xprt_put(xprt);
2507 }
2508 return ret;
2509}
2510EXPORT_SYMBOL_GPL(rpc_clnt_swap_activate);
2511
2512void
2513rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
2514{
2515 struct rpc_xprt *xprt;
2516
2517 if (atomic_dec_if_positive(&clnt->cl_swapper) == 0) {
2518retry:
2519 rcu_read_lock();
2520 xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
2521 rcu_read_unlock();
2522 if (!xprt) {
2523 /*
2524 * If we didn't get a reference, then we likely are
2525 * racing with a migration event. Wait for a grace
2526 * period and try again.
2527 */
2528 synchronize_rcu();
2529 goto retry;
2530 }
2531
2532 xprt_disable_swap(xprt);
2533 xprt_put(xprt);
2534 }
2535}
2536EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate);
2537#endif /* CONFIG_SUNRPC_SWAP */
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 82962f7e6e88..e7b4d93566df 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -10,9 +10,12 @@
10#include "netns.h" 10#include "netns.h"
11 11
12static struct dentry *topdir; 12static struct dentry *topdir;
13static struct dentry *rpc_fault_dir;
13static struct dentry *rpc_clnt_dir; 14static struct dentry *rpc_clnt_dir;
14static struct dentry *rpc_xprt_dir; 15static struct dentry *rpc_xprt_dir;
15 16
17unsigned int rpc_inject_disconnect;
18
16struct rpc_clnt_iter { 19struct rpc_clnt_iter {
17 struct rpc_clnt *clnt; 20 struct rpc_clnt *clnt;
18 loff_t pos; 21 loff_t pos;
@@ -257,6 +260,8 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
257 debugfs_remove_recursive(xprt->debugfs); 260 debugfs_remove_recursive(xprt->debugfs);
258 xprt->debugfs = NULL; 261 xprt->debugfs = NULL;
259 } 262 }
263
264 atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect);
260} 265}
261 266
262void 267void
@@ -266,11 +271,79 @@ rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt)
266 xprt->debugfs = NULL; 271 xprt->debugfs = NULL;
267} 272}
268 273
274static int
275fault_open(struct inode *inode, struct file *filp)
276{
277 filp->private_data = kmalloc(128, GFP_KERNEL);
278 if (!filp->private_data)
279 return -ENOMEM;
280 return 0;
281}
282
283static int
284fault_release(struct inode *inode, struct file *filp)
285{
286 kfree(filp->private_data);
287 return 0;
288}
289
290static ssize_t
291fault_disconnect_read(struct file *filp, char __user *user_buf,
292 size_t len, loff_t *offset)
293{
294 char *buffer = (char *)filp->private_data;
295 size_t size;
296
297 size = sprintf(buffer, "%u\n", rpc_inject_disconnect);
298 return simple_read_from_buffer(user_buf, len, offset, buffer, size);
299}
300
301static ssize_t
302fault_disconnect_write(struct file *filp, const char __user *user_buf,
303 size_t len, loff_t *offset)
304{
305 char buffer[16];
306
307 if (len >= sizeof(buffer))
308 len = sizeof(buffer) - 1;
309 if (copy_from_user(buffer, user_buf, len))
310 return -EFAULT;
311 buffer[len] = '\0';
312 if (kstrtouint(buffer, 10, &rpc_inject_disconnect))
313 return -EINVAL;
314 return len;
315}
316
317static const struct file_operations fault_disconnect_fops = {
318 .owner = THIS_MODULE,
319 .open = fault_open,
320 .read = fault_disconnect_read,
321 .write = fault_disconnect_write,
322 .release = fault_release,
323};
324
325static struct dentry *
326inject_fault_dir(struct dentry *topdir)
327{
328 struct dentry *faultdir;
329
330 faultdir = debugfs_create_dir("inject_fault", topdir);
331 if (!faultdir)
332 return NULL;
333
334 if (!debugfs_create_file("disconnect", S_IFREG | S_IRUSR, faultdir,
335 NULL, &fault_disconnect_fops))
336 return NULL;
337
338 return faultdir;
339}
340
269void __exit 341void __exit
270sunrpc_debugfs_exit(void) 342sunrpc_debugfs_exit(void)
271{ 343{
272 debugfs_remove_recursive(topdir); 344 debugfs_remove_recursive(topdir);
273 topdir = NULL; 345 topdir = NULL;
346 rpc_fault_dir = NULL;
274 rpc_clnt_dir = NULL; 347 rpc_clnt_dir = NULL;
275 rpc_xprt_dir = NULL; 348 rpc_xprt_dir = NULL;
276} 349}
@@ -282,6 +355,10 @@ sunrpc_debugfs_init(void)
282 if (!topdir) 355 if (!topdir)
283 return; 356 return;
284 357
358 rpc_fault_dir = inject_fault_dir(topdir);
359 if (!rpc_fault_dir)
360 goto out_remove;
361
285 rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir); 362 rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir);
286 if (!rpc_clnt_dir) 363 if (!rpc_clnt_dir)
287 goto out_remove; 364 goto out_remove;
@@ -294,5 +371,6 @@ sunrpc_debugfs_init(void)
294out_remove: 371out_remove:
295 debugfs_remove_recursive(topdir); 372 debugfs_remove_recursive(topdir);
296 topdir = NULL; 373 topdir = NULL;
374 rpc_fault_dir = NULL;
297 rpc_clnt_dir = NULL; 375 rpc_clnt_dir = NULL;
298} 376}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 78974e4d9ad2..5a16d8d8c831 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1290,7 +1290,6 @@ err_bad:
1290 svc_putnl(resv, ntohl(rpc_stat)); 1290 svc_putnl(resv, ntohl(rpc_stat));
1291 goto sendit; 1291 goto sendit;
1292} 1292}
1293EXPORT_SYMBOL_GPL(svc_process);
1294 1293
1295/* 1294/*
1296 * Process the RPC request. 1295 * Process the RPC request.
@@ -1338,6 +1337,7 @@ out_drop:
1338 svc_drop(rqstp); 1337 svc_drop(rqstp);
1339 return 0; 1338 return 0;
1340} 1339}
1340EXPORT_SYMBOL_GPL(svc_process);
1341 1341
1342#if defined(CONFIG_SUNRPC_BACKCHANNEL) 1342#if defined(CONFIG_SUNRPC_BACKCHANNEL)
1343/* 1343/*
@@ -1350,6 +1350,11 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
1350{ 1350{
1351 struct kvec *argv = &rqstp->rq_arg.head[0]; 1351 struct kvec *argv = &rqstp->rq_arg.head[0];
1352 struct kvec *resv = &rqstp->rq_res.head[0]; 1352 struct kvec *resv = &rqstp->rq_res.head[0];
1353 struct rpc_task *task;
1354 int proc_error;
1355 int error;
1356
1357 dprintk("svc: %s(%p)\n", __func__, req);
1353 1358
1354 /* Build the svc_rqst used by the common processing routine */ 1359 /* Build the svc_rqst used by the common processing routine */
1355 rqstp->rq_xprt = serv->sv_bc_xprt; 1360 rqstp->rq_xprt = serv->sv_bc_xprt;
@@ -1372,21 +1377,36 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
1372 1377
1373 /* 1378 /*
1374 * Skip the next two words because they've already been 1379 * Skip the next two words because they've already been
1375 * processed in the trasport 1380 * processed in the transport
1376 */ 1381 */
1377 svc_getu32(argv); /* XID */ 1382 svc_getu32(argv); /* XID */
1378 svc_getnl(argv); /* CALLDIR */ 1383 svc_getnl(argv); /* CALLDIR */
1379 1384
1380 /* Returns 1 for send, 0 for drop */ 1385 /* Parse and execute the bc call */
1381 if (svc_process_common(rqstp, argv, resv)) { 1386 proc_error = svc_process_common(rqstp, argv, resv);
1382 memcpy(&req->rq_snd_buf, &rqstp->rq_res, 1387
1383 sizeof(req->rq_snd_buf)); 1388 atomic_inc(&req->rq_xprt->bc_free_slots);
1384 return bc_send(req); 1389 if (!proc_error) {
1385 } else { 1390 /* Processing error: drop the request */
1386 /* drop request */
1387 xprt_free_bc_request(req); 1391 xprt_free_bc_request(req);
1388 return 0; 1392 return 0;
1389 } 1393 }
1394
1395 /* Finally, send the reply synchronously */
1396 memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
1397 task = rpc_run_bc_task(req);
1398 if (IS_ERR(task)) {
1399 error = PTR_ERR(task);
1400 goto out;
1401 }
1402
1403 WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
1404 error = task->tk_status;
1405 rpc_put_task(task);
1406
1407out:
1408 dprintk("svc: %s(), error=%d\n", __func__, error);
1409 return error;
1390} 1410}
1391EXPORT_SYMBOL_GPL(bc_svc_process); 1411EXPORT_SYMBOL_GPL(bc_svc_process);
1392#endif /* CONFIG_SUNRPC_BACKCHANNEL */ 1412#endif /* CONFIG_SUNRPC_BACKCHANNEL */
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 1d4fe24af06a..ab5dd621ae0c 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -68,6 +68,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net);
68static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); 68static void xprt_request_init(struct rpc_task *, struct rpc_xprt *);
69static void xprt_connect_status(struct rpc_task *task); 69static void xprt_connect_status(struct rpc_task *task);
70static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); 70static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
71static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *);
71static void xprt_destroy(struct rpc_xprt *xprt); 72static void xprt_destroy(struct rpc_xprt *xprt);
72 73
73static DEFINE_SPINLOCK(xprt_list_lock); 74static DEFINE_SPINLOCK(xprt_list_lock);
@@ -250,6 +251,8 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
250 } 251 }
251 xprt_clear_locked(xprt); 252 xprt_clear_locked(xprt);
252out_sleep: 253out_sleep:
254 if (req)
255 __xprt_put_cong(xprt, req);
253 dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); 256 dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
254 task->tk_timeout = 0; 257 task->tk_timeout = 0;
255 task->tk_status = -EAGAIN; 258 task->tk_status = -EAGAIN;
@@ -608,8 +611,8 @@ static void xprt_autoclose(struct work_struct *work)
608 struct rpc_xprt *xprt = 611 struct rpc_xprt *xprt =
609 container_of(work, struct rpc_xprt, task_cleanup); 612 container_of(work, struct rpc_xprt, task_cleanup);
610 613
611 xprt->ops->close(xprt);
612 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 614 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
615 xprt->ops->close(xprt);
613 xprt_release_write(xprt, NULL); 616 xprt_release_write(xprt, NULL);
614} 617}
615 618
@@ -967,6 +970,7 @@ void xprt_transmit(struct rpc_task *task)
967 task->tk_status = status; 970 task->tk_status = status;
968 return; 971 return;
969 } 972 }
973 xprt_inject_disconnect(xprt);
970 974
971 dprintk("RPC: %5u xmit complete\n", task->tk_pid); 975 dprintk("RPC: %5u xmit complete\n", task->tk_pid);
972 task->tk_flags |= RPC_TASK_SENT; 976 task->tk_flags |= RPC_TASK_SENT;
@@ -1285,6 +1289,7 @@ void xprt_release(struct rpc_task *task)
1285 spin_unlock_bh(&xprt->transport_lock); 1289 spin_unlock_bh(&xprt->transport_lock);
1286 if (req->rq_buffer) 1290 if (req->rq_buffer)
1287 xprt->ops->buf_free(req->rq_buffer); 1291 xprt->ops->buf_free(req->rq_buffer);
1292 xprt_inject_disconnect(xprt);
1288 if (req->rq_cred != NULL) 1293 if (req->rq_cred != NULL)
1289 put_rpccred(req->rq_cred); 1294 put_rpccred(req->rq_cred);
1290 task->tk_rqstp = NULL; 1295 task->tk_rqstp = NULL;
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 579f72bbcf4b..48913de240bd 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -1,9 +1,7 @@
1obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o 1obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
2 2
3xprtrdma-y := transport.o rpc_rdma.o verbs.o \ 3rpcrdma-y := transport.o rpc_rdma.o verbs.o \
4 fmr_ops.o frwr_ops.o physical_ops.o 4 fmr_ops.o frwr_ops.o physical_ops.o \
5 5 svc_rdma.o svc_rdma_transport.o \
6obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o 6 svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
7 7 module.o
8svcrdma-y := svc_rdma.o svc_rdma_transport.o \
9 svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 302d4ebf6fbf..f1e8dafbd507 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -11,6 +11,21 @@
11 * can take tens of usecs to complete. 11 * can take tens of usecs to complete.
12 */ 12 */
13 13
14/* Normal operation
15 *
16 * A Memory Region is prepared for RDMA READ or WRITE using the
17 * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is
18 * finished, the Memory Region is unmapped using the ib_unmap_fmr
19 * verb (fmr_op_unmap).
20 */
21
22/* Transport recovery
23 *
24 * After a transport reconnect, fmr_op_map re-uses the MR already
25 * allocated for the RPC, but generates a fresh rkey then maps the
26 * MR again. This process is synchronous.
27 */
28
14#include "xprt_rdma.h" 29#include "xprt_rdma.h"
15 30
16#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 31#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -50,19 +65,28 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
50 struct rpcrdma_mw *r; 65 struct rpcrdma_mw *r;
51 int i, rc; 66 int i, rc;
52 67
68 spin_lock_init(&buf->rb_mwlock);
53 INIT_LIST_HEAD(&buf->rb_mws); 69 INIT_LIST_HEAD(&buf->rb_mws);
54 INIT_LIST_HEAD(&buf->rb_all); 70 INIT_LIST_HEAD(&buf->rb_all);
55 71
56 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; 72 i = max_t(int, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES, 1);
57 dprintk("RPC: %s: initializing %d FMRs\n", __func__, i); 73 i += 2; /* head + tail */
74 i *= buf->rb_max_requests; /* one set for each RPC slot */
75 dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
58 76
77 rc = -ENOMEM;
59 while (i--) { 78 while (i--) {
60 r = kzalloc(sizeof(*r), GFP_KERNEL); 79 r = kzalloc(sizeof(*r), GFP_KERNEL);
61 if (!r) 80 if (!r)
62 return -ENOMEM; 81 goto out;
63 82
64 r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr); 83 r->r.fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES *
65 if (IS_ERR(r->r.fmr)) 84 sizeof(u64), GFP_KERNEL);
85 if (!r->r.fmr.physaddrs)
86 goto out_free;
87
88 r->r.fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
89 if (IS_ERR(r->r.fmr.fmr))
66 goto out_fmr_err; 90 goto out_fmr_err;
67 91
68 list_add(&r->mw_list, &buf->rb_mws); 92 list_add(&r->mw_list, &buf->rb_mws);
@@ -71,12 +95,24 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
71 return 0; 95 return 0;
72 96
73out_fmr_err: 97out_fmr_err:
74 rc = PTR_ERR(r->r.fmr); 98 rc = PTR_ERR(r->r.fmr.fmr);
75 dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc); 99 dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc);
100 kfree(r->r.fmr.physaddrs);
101out_free:
76 kfree(r); 102 kfree(r);
103out:
77 return rc; 104 return rc;
78} 105}
79 106
107static int
108__fmr_unmap(struct rpcrdma_mw *r)
109{
110 LIST_HEAD(l);
111
112 list_add(&r->r.fmr.fmr->list, &l);
113 return ib_unmap_fmr(&l);
114}
115
80/* Use the ib_map_phys_fmr() verb to register a memory region 116/* Use the ib_map_phys_fmr() verb to register a memory region
81 * for remote access via RDMA READ or RDMA WRITE. 117 * for remote access via RDMA READ or RDMA WRITE.
82 */ 118 */
@@ -85,12 +121,24 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
85 int nsegs, bool writing) 121 int nsegs, bool writing)
86{ 122{
87 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 123 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
88 struct ib_device *device = ia->ri_id->device; 124 struct ib_device *device = ia->ri_device;
89 enum dma_data_direction direction = rpcrdma_data_dir(writing); 125 enum dma_data_direction direction = rpcrdma_data_dir(writing);
90 struct rpcrdma_mr_seg *seg1 = seg; 126 struct rpcrdma_mr_seg *seg1 = seg;
91 struct rpcrdma_mw *mw = seg1->rl_mw;
92 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
93 int len, pageoff, i, rc; 127 int len, pageoff, i, rc;
128 struct rpcrdma_mw *mw;
129
130 mw = seg1->rl_mw;
131 seg1->rl_mw = NULL;
132 if (!mw) {
133 mw = rpcrdma_get_mw(r_xprt);
134 if (!mw)
135 return -ENOMEM;
136 } else {
137 /* this is a retransmit; generate a fresh rkey */
138 rc = __fmr_unmap(mw);
139 if (rc)
140 return rc;
141 }
94 142
95 pageoff = offset_in_page(seg1->mr_offset); 143 pageoff = offset_in_page(seg1->mr_offset);
96 seg1->mr_offset -= pageoff; /* start of page */ 144 seg1->mr_offset -= pageoff; /* start of page */
@@ -100,7 +148,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
100 nsegs = RPCRDMA_MAX_FMR_SGES; 148 nsegs = RPCRDMA_MAX_FMR_SGES;
101 for (i = 0; i < nsegs;) { 149 for (i = 0; i < nsegs;) {
102 rpcrdma_map_one(device, seg, direction); 150 rpcrdma_map_one(device, seg, direction);
103 physaddrs[i] = seg->mr_dma; 151 mw->r.fmr.physaddrs[i] = seg->mr_dma;
104 len += seg->mr_len; 152 len += seg->mr_len;
105 ++seg; 153 ++seg;
106 ++i; 154 ++i;
@@ -110,11 +158,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
110 break; 158 break;
111 } 159 }
112 160
113 rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma); 161 rc = ib_map_phys_fmr(mw->r.fmr.fmr, mw->r.fmr.physaddrs,
162 i, seg1->mr_dma);
114 if (rc) 163 if (rc)
115 goto out_maperr; 164 goto out_maperr;
116 165
117 seg1->mr_rkey = mw->r.fmr->rkey; 166 seg1->rl_mw = mw;
167 seg1->mr_rkey = mw->r.fmr.fmr->rkey;
118 seg1->mr_base = seg1->mr_dma + pageoff; 168 seg1->mr_base = seg1->mr_dma + pageoff;
119 seg1->mr_nsegs = i; 169 seg1->mr_nsegs = i;
120 seg1->mr_len = len; 170 seg1->mr_len = len;
@@ -137,48 +187,28 @@ fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
137{ 187{
138 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 188 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
139 struct rpcrdma_mr_seg *seg1 = seg; 189 struct rpcrdma_mr_seg *seg1 = seg;
140 struct ib_device *device; 190 struct rpcrdma_mw *mw = seg1->rl_mw;
141 int rc, nsegs = seg->mr_nsegs; 191 int rc, nsegs = seg->mr_nsegs;
142 LIST_HEAD(l);
143 192
144 list_add(&seg1->rl_mw->r.fmr->list, &l); 193 dprintk("RPC: %s: FMR %p\n", __func__, mw);
145 rc = ib_unmap_fmr(&l); 194
146 read_lock(&ia->ri_qplock); 195 seg1->rl_mw = NULL;
147 device = ia->ri_id->device;
148 while (seg1->mr_nsegs--) 196 while (seg1->mr_nsegs--)
149 rpcrdma_unmap_one(device, seg++); 197 rpcrdma_unmap_one(ia->ri_device, seg++);
150 read_unlock(&ia->ri_qplock); 198 rc = __fmr_unmap(mw);
151 if (rc) 199 if (rc)
152 goto out_err; 200 goto out_err;
201 rpcrdma_put_mw(r_xprt, mw);
153 return nsegs; 202 return nsegs;
154 203
155out_err: 204out_err:
205 /* The FMR is abandoned, but remains in rb_all. fmr_op_destroy
206 * will attempt to release it when the transport is destroyed.
207 */
156 dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc); 208 dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc);
157 return nsegs; 209 return nsegs;
158} 210}
159 211
160/* After a disconnect, unmap all FMRs.
161 *
162 * This is invoked only in the transport connect worker in order
163 * to serialize with rpcrdma_register_fmr_external().
164 */
165static void
166fmr_op_reset(struct rpcrdma_xprt *r_xprt)
167{
168 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
169 struct rpcrdma_mw *r;
170 LIST_HEAD(list);
171 int rc;
172
173 list_for_each_entry(r, &buf->rb_all, mw_all)
174 list_add(&r->r.fmr->list, &list);
175
176 rc = ib_unmap_fmr(&list);
177 if (rc)
178 dprintk("RPC: %s: ib_unmap_fmr failed %i\n",
179 __func__, rc);
180}
181
182static void 212static void
183fmr_op_destroy(struct rpcrdma_buffer *buf) 213fmr_op_destroy(struct rpcrdma_buffer *buf)
184{ 214{
@@ -188,10 +218,13 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)
188 while (!list_empty(&buf->rb_all)) { 218 while (!list_empty(&buf->rb_all)) {
189 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); 219 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
190 list_del(&r->mw_all); 220 list_del(&r->mw_all);
191 rc = ib_dealloc_fmr(r->r.fmr); 221 kfree(r->r.fmr.physaddrs);
222
223 rc = ib_dealloc_fmr(r->r.fmr.fmr);
192 if (rc) 224 if (rc)
193 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", 225 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
194 __func__, rc); 226 __func__, rc);
227
195 kfree(r); 228 kfree(r);
196 } 229 }
197} 230}
@@ -202,7 +235,6 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
202 .ro_open = fmr_op_open, 235 .ro_open = fmr_op_open,
203 .ro_maxpages = fmr_op_maxpages, 236 .ro_maxpages = fmr_op_maxpages,
204 .ro_init = fmr_op_init, 237 .ro_init = fmr_op_init,
205 .ro_reset = fmr_op_reset,
206 .ro_destroy = fmr_op_destroy, 238 .ro_destroy = fmr_op_destroy,
207 .ro_displayname = "fmr", 239 .ro_displayname = "fmr",
208}; 240};
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index dff0481dbcf8..04ea914201b2 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -11,12 +11,136 @@
11 * but most complex memory registration mode. 11 * but most complex memory registration mode.
12 */ 12 */
13 13
14/* Normal operation
15 *
16 * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
17 * Work Request (frmr_op_map). When the RDMA operation is finished, this
18 * Memory Region is invalidated using a LOCAL_INV Work Request
19 * (frmr_op_unmap).
20 *
21 * Typically these Work Requests are not signaled, and neither are RDMA
22 * SEND Work Requests (with the exception of signaling occasionally to
23 * prevent provider work queue overflows). This greatly reduces HCA
24 * interrupt workload.
25 *
26 * As an optimization, frwr_op_unmap marks MRs INVALID before the
27 * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
28 * rb_mws immediately so that no work (like managing a linked list
29 * under a spinlock) is needed in the completion upcall.
30 *
31 * But this means that frwr_op_map() can occasionally encounter an MR
32 * that is INVALID but the LOCAL_INV WR has not completed. Work Queue
33 * ordering prevents a subsequent FAST_REG WR from executing against
34 * that MR while it is still being invalidated.
35 */
36
37/* Transport recovery
38 *
39 * ->op_map and the transport connect worker cannot run at the same
40 * time, but ->op_unmap can fire while the transport connect worker
41 * is running. Thus MR recovery is handled in ->op_map, to guarantee
42 * that recovered MRs are owned by a sending RPC, and not one where
43 * ->op_unmap could fire at the same time transport reconnect is
44 * being done.
45 *
46 * When the underlying transport disconnects, MRs are left in one of
47 * three states:
48 *
49 * INVALID: The MR was not in use before the QP entered ERROR state.
50 * (Or, the LOCAL_INV WR has not completed or flushed yet).
51 *
52 * STALE: The MR was being registered or unregistered when the QP
53 * entered ERROR state, and the pending WR was flushed.
54 *
55 * VALID: The MR was registered before the QP entered ERROR state.
56 *
57 * When frwr_op_map encounters STALE and VALID MRs, they are recovered
58 * with ib_dereg_mr and then are re-initialized. Beause MR recovery
59 * allocates fresh resources, it is deferred to a workqueue, and the
60 * recovered MRs are placed back on the rb_mws list when recovery is
61 * complete. frwr_op_map allocates another MR for the current RPC while
62 * the broken MR is reset.
63 *
64 * To ensure that frwr_op_map doesn't encounter an MR that is marked
65 * INVALID but that is about to be flushed due to a previous transport
66 * disconnect, the transport connect worker attempts to drain all
67 * pending send queue WRs before the transport is reconnected.
68 */
69
14#include "xprt_rdma.h" 70#include "xprt_rdma.h"
15 71
16#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 72#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
17# define RPCDBG_FACILITY RPCDBG_TRANS 73# define RPCDBG_FACILITY RPCDBG_TRANS
18#endif 74#endif
19 75
76static struct workqueue_struct *frwr_recovery_wq;
77
78#define FRWR_RECOVERY_WQ_FLAGS (WQ_UNBOUND | WQ_MEM_RECLAIM)
79
80int
81frwr_alloc_recovery_wq(void)
82{
83 frwr_recovery_wq = alloc_workqueue("frwr_recovery",
84 FRWR_RECOVERY_WQ_FLAGS, 0);
85 return !frwr_recovery_wq ? -ENOMEM : 0;
86}
87
88void
89frwr_destroy_recovery_wq(void)
90{
91 struct workqueue_struct *wq;
92
93 if (!frwr_recovery_wq)
94 return;
95
96 wq = frwr_recovery_wq;
97 frwr_recovery_wq = NULL;
98 destroy_workqueue(wq);
99}
100
101/* Deferred reset of a single FRMR. Generate a fresh rkey by
102 * replacing the MR.
103 *
104 * There's no recovery if this fails. The FRMR is abandoned, but
105 * remains in rb_all. It will be cleaned up when the transport is
106 * destroyed.
107 */
108static void
109__frwr_recovery_worker(struct work_struct *work)
110{
111 struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw,
112 r.frmr.fr_work);
113 struct rpcrdma_xprt *r_xprt = r->r.frmr.fr_xprt;
114 unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
115 struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
116
117 if (ib_dereg_mr(r->r.frmr.fr_mr))
118 goto out_fail;
119
120 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(pd, depth);
121 if (IS_ERR(r->r.frmr.fr_mr))
122 goto out_fail;
123
124 dprintk("RPC: %s: recovered FRMR %p\n", __func__, r);
125 r->r.frmr.fr_state = FRMR_IS_INVALID;
126 rpcrdma_put_mw(r_xprt, r);
127 return;
128
129out_fail:
130 pr_warn("RPC: %s: FRMR %p unrecovered\n",
131 __func__, r);
132}
133
134/* A broken MR was discovered in a context that can't sleep.
135 * Defer recovery to the recovery worker.
136 */
137static void
138__frwr_queue_recovery(struct rpcrdma_mw *r)
139{
140 INIT_WORK(&r->r.frmr.fr_work, __frwr_recovery_worker);
141 queue_work(frwr_recovery_wq, &r->r.frmr.fr_work);
142}
143
20static int 144static int
21__frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, 145__frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
22 unsigned int depth) 146 unsigned int depth)
@@ -128,8 +252,8 @@ frwr_sendcompletion(struct ib_wc *wc)
128 252
129 /* WARNING: Only wr_id and status are reliable at this point */ 253 /* WARNING: Only wr_id and status are reliable at this point */
130 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; 254 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
131 dprintk("RPC: %s: frmr %p (stale), status %d\n", 255 pr_warn("RPC: %s: frmr %p flushed, status %s (%d)\n",
132 __func__, r, wc->status); 256 __func__, r, ib_wc_status_msg(wc->status), wc->status);
133 r->r.frmr.fr_state = FRMR_IS_STALE; 257 r->r.frmr.fr_state = FRMR_IS_STALE;
134} 258}
135 259
@@ -137,16 +261,19 @@ static int
137frwr_op_init(struct rpcrdma_xprt *r_xprt) 261frwr_op_init(struct rpcrdma_xprt *r_xprt)
138{ 262{
139 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 263 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
140 struct ib_device *device = r_xprt->rx_ia.ri_id->device; 264 struct ib_device *device = r_xprt->rx_ia.ri_device;
141 unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; 265 unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
142 struct ib_pd *pd = r_xprt->rx_ia.ri_pd; 266 struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
143 int i; 267 int i;
144 268
269 spin_lock_init(&buf->rb_mwlock);
145 INIT_LIST_HEAD(&buf->rb_mws); 270 INIT_LIST_HEAD(&buf->rb_mws);
146 INIT_LIST_HEAD(&buf->rb_all); 271 INIT_LIST_HEAD(&buf->rb_all);
147 272
148 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; 273 i = max_t(int, RPCRDMA_MAX_DATA_SEGS / depth, 1);
149 dprintk("RPC: %s: initializing %d FRMRs\n", __func__, i); 274 i += 2; /* head + tail */
275 i *= buf->rb_max_requests; /* one set for each RPC slot */
276 dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i);
150 277
151 while (i--) { 278 while (i--) {
152 struct rpcrdma_mw *r; 279 struct rpcrdma_mw *r;
@@ -165,6 +292,7 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt)
165 list_add(&r->mw_list, &buf->rb_mws); 292 list_add(&r->mw_list, &buf->rb_mws);
166 list_add(&r->mw_all, &buf->rb_all); 293 list_add(&r->mw_all, &buf->rb_all);
167 r->mw_sendcompletion = frwr_sendcompletion; 294 r->mw_sendcompletion = frwr_sendcompletion;
295 r->r.frmr.fr_xprt = r_xprt;
168 } 296 }
169 297
170 return 0; 298 return 0;
@@ -178,12 +306,12 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
178 int nsegs, bool writing) 306 int nsegs, bool writing)
179{ 307{
180 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 308 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
181 struct ib_device *device = ia->ri_id->device; 309 struct ib_device *device = ia->ri_device;
182 enum dma_data_direction direction = rpcrdma_data_dir(writing); 310 enum dma_data_direction direction = rpcrdma_data_dir(writing);
183 struct rpcrdma_mr_seg *seg1 = seg; 311 struct rpcrdma_mr_seg *seg1 = seg;
184 struct rpcrdma_mw *mw = seg1->rl_mw; 312 struct rpcrdma_mw *mw;
185 struct rpcrdma_frmr *frmr = &mw->r.frmr; 313 struct rpcrdma_frmr *frmr;
186 struct ib_mr *mr = frmr->fr_mr; 314 struct ib_mr *mr;
187 struct ib_send_wr fastreg_wr, *bad_wr; 315 struct ib_send_wr fastreg_wr, *bad_wr;
188 u8 key; 316 u8 key;
189 int len, pageoff; 317 int len, pageoff;
@@ -192,12 +320,25 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
192 u64 pa; 320 u64 pa;
193 int page_no; 321 int page_no;
194 322
323 mw = seg1->rl_mw;
324 seg1->rl_mw = NULL;
325 do {
326 if (mw)
327 __frwr_queue_recovery(mw);
328 mw = rpcrdma_get_mw(r_xprt);
329 if (!mw)
330 return -ENOMEM;
331 } while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
332 frmr = &mw->r.frmr;
333 frmr->fr_state = FRMR_IS_VALID;
334
195 pageoff = offset_in_page(seg1->mr_offset); 335 pageoff = offset_in_page(seg1->mr_offset);
196 seg1->mr_offset -= pageoff; /* start of page */ 336 seg1->mr_offset -= pageoff; /* start of page */
197 seg1->mr_len += pageoff; 337 seg1->mr_len += pageoff;
198 len = -pageoff; 338 len = -pageoff;
199 if (nsegs > ia->ri_max_frmr_depth) 339 if (nsegs > ia->ri_max_frmr_depth)
200 nsegs = ia->ri_max_frmr_depth; 340 nsegs = ia->ri_max_frmr_depth;
341
201 for (page_no = i = 0; i < nsegs;) { 342 for (page_no = i = 0; i < nsegs;) {
202 rpcrdma_map_one(device, seg, direction); 343 rpcrdma_map_one(device, seg, direction);
203 pa = seg->mr_dma; 344 pa = seg->mr_dma;
@@ -216,8 +357,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
216 dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", 357 dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n",
217 __func__, mw, i, len); 358 __func__, mw, i, len);
218 359
219 frmr->fr_state = FRMR_IS_VALID;
220
221 memset(&fastreg_wr, 0, sizeof(fastreg_wr)); 360 memset(&fastreg_wr, 0, sizeof(fastreg_wr));
222 fastreg_wr.wr_id = (unsigned long)(void *)mw; 361 fastreg_wr.wr_id = (unsigned long)(void *)mw;
223 fastreg_wr.opcode = IB_WR_FAST_REG_MR; 362 fastreg_wr.opcode = IB_WR_FAST_REG_MR;
@@ -229,6 +368,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
229 fastreg_wr.wr.fast_reg.access_flags = writing ? 368 fastreg_wr.wr.fast_reg.access_flags = writing ?
230 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 369 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
231 IB_ACCESS_REMOTE_READ; 370 IB_ACCESS_REMOTE_READ;
371 mr = frmr->fr_mr;
232 key = (u8)(mr->rkey & 0x000000FF); 372 key = (u8)(mr->rkey & 0x000000FF);
233 ib_update_fast_reg_key(mr, ++key); 373 ib_update_fast_reg_key(mr, ++key);
234 fastreg_wr.wr.fast_reg.rkey = mr->rkey; 374 fastreg_wr.wr.fast_reg.rkey = mr->rkey;
@@ -238,6 +378,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
238 if (rc) 378 if (rc)
239 goto out_senderr; 379 goto out_senderr;
240 380
381 seg1->rl_mw = mw;
241 seg1->mr_rkey = mr->rkey; 382 seg1->mr_rkey = mr->rkey;
242 seg1->mr_base = seg1->mr_dma + pageoff; 383 seg1->mr_base = seg1->mr_dma + pageoff;
243 seg1->mr_nsegs = i; 384 seg1->mr_nsegs = i;
@@ -246,10 +387,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
246 387
247out_senderr: 388out_senderr:
248 dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); 389 dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc);
249 ib_update_fast_reg_key(mr, --key);
250 frmr->fr_state = FRMR_IS_INVALID;
251 while (i--) 390 while (i--)
252 rpcrdma_unmap_one(device, --seg); 391 rpcrdma_unmap_one(device, --seg);
392 __frwr_queue_recovery(mw);
253 return rc; 393 return rc;
254} 394}
255 395
@@ -261,78 +401,46 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
261{ 401{
262 struct rpcrdma_mr_seg *seg1 = seg; 402 struct rpcrdma_mr_seg *seg1 = seg;
263 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 403 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
404 struct rpcrdma_mw *mw = seg1->rl_mw;
264 struct ib_send_wr invalidate_wr, *bad_wr; 405 struct ib_send_wr invalidate_wr, *bad_wr;
265 int rc, nsegs = seg->mr_nsegs; 406 int rc, nsegs = seg->mr_nsegs;
266 struct ib_device *device;
267 407
268 seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; 408 dprintk("RPC: %s: FRMR %p\n", __func__, mw);
409
410 seg1->rl_mw = NULL;
411 mw->r.frmr.fr_state = FRMR_IS_INVALID;
269 412
270 memset(&invalidate_wr, 0, sizeof(invalidate_wr)); 413 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
271 invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; 414 invalidate_wr.wr_id = (unsigned long)(void *)mw;
272 invalidate_wr.opcode = IB_WR_LOCAL_INV; 415 invalidate_wr.opcode = IB_WR_LOCAL_INV;
273 invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; 416 invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey;
274 DECR_CQCOUNT(&r_xprt->rx_ep); 417 DECR_CQCOUNT(&r_xprt->rx_ep);
275 418
276 read_lock(&ia->ri_qplock);
277 device = ia->ri_id->device;
278 while (seg1->mr_nsegs--) 419 while (seg1->mr_nsegs--)
279 rpcrdma_unmap_one(device, seg++); 420 rpcrdma_unmap_one(ia->ri_device, seg++);
421 read_lock(&ia->ri_qplock);
280 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); 422 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
281 read_unlock(&ia->ri_qplock); 423 read_unlock(&ia->ri_qplock);
282 if (rc) 424 if (rc)
283 goto out_err; 425 goto out_err;
426
427 rpcrdma_put_mw(r_xprt, mw);
284 return nsegs; 428 return nsegs;
285 429
286out_err: 430out_err:
287 /* Force rpcrdma_buffer_get() to retry */
288 seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
289 dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); 431 dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc);
432 __frwr_queue_recovery(mw);
290 return nsegs; 433 return nsegs;
291} 434}
292 435
293/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
294 * an unusable state. Find FRMRs in this state and dereg / reg
295 * each. FRMRs that are VALID and attached to an rpcrdma_req are
296 * also torn down.
297 *
298 * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
299 *
300 * This is invoked only in the transport connect worker in order
301 * to serialize with rpcrdma_register_frmr_external().
302 */
303static void
304frwr_op_reset(struct rpcrdma_xprt *r_xprt)
305{
306 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
307 struct ib_device *device = r_xprt->rx_ia.ri_id->device;
308 unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
309 struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
310 struct rpcrdma_mw *r;
311 int rc;
312
313 list_for_each_entry(r, &buf->rb_all, mw_all) {
314 if (r->r.frmr.fr_state == FRMR_IS_INVALID)
315 continue;
316
317 __frwr_release(r);
318 rc = __frwr_init(r, pd, device, depth);
319 if (rc) {
320 dprintk("RPC: %s: mw %p left %s\n",
321 __func__, r,
322 (r->r.frmr.fr_state == FRMR_IS_STALE ?
323 "stale" : "valid"));
324 continue;
325 }
326
327 r->r.frmr.fr_state = FRMR_IS_INVALID;
328 }
329}
330
331static void 436static void
332frwr_op_destroy(struct rpcrdma_buffer *buf) 437frwr_op_destroy(struct rpcrdma_buffer *buf)
333{ 438{
334 struct rpcrdma_mw *r; 439 struct rpcrdma_mw *r;
335 440
441 /* Ensure stale MWs for "buf" are no longer in flight */
442 flush_workqueue(frwr_recovery_wq);
443
336 while (!list_empty(&buf->rb_all)) { 444 while (!list_empty(&buf->rb_all)) {
337 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); 445 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
338 list_del(&r->mw_all); 446 list_del(&r->mw_all);
@@ -347,7 +455,6 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
347 .ro_open = frwr_op_open, 455 .ro_open = frwr_op_open,
348 .ro_maxpages = frwr_op_maxpages, 456 .ro_maxpages = frwr_op_maxpages,
349 .ro_init = frwr_op_init, 457 .ro_init = frwr_op_init,
350 .ro_reset = frwr_op_reset,
351 .ro_destroy = frwr_op_destroy, 458 .ro_destroy = frwr_op_destroy,
352 .ro_displayname = "frwr", 459 .ro_displayname = "frwr",
353}; 460};
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
new file mode 100644
index 000000000000..560712bd9fa2
--- /dev/null
+++ b/net/sunrpc/xprtrdma/module.c
@@ -0,0 +1,46 @@
1/*
2 * Copyright (c) 2015 Oracle. All rights reserved.
3 */
4
5/* rpcrdma.ko module initialization
6 */
7
8#include <linux/module.h>
9#include <linux/init.h>
10#include <linux/sunrpc/svc_rdma.h>
11#include "xprt_rdma.h"
12
13#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
14# define RPCDBG_FACILITY RPCDBG_TRANS
15#endif
16
17MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
18MODULE_DESCRIPTION("RPC/RDMA Transport");
19MODULE_LICENSE("Dual BSD/GPL");
20MODULE_ALIAS("svcrdma");
21MODULE_ALIAS("xprtrdma");
22
23static void __exit rpc_rdma_cleanup(void)
24{
25 xprt_rdma_cleanup();
26 svc_rdma_cleanup();
27}
28
29static int __init rpc_rdma_init(void)
30{
31 int rc;
32
33 rc = svc_rdma_init();
34 if (rc)
35 goto out;
36
37 rc = xprt_rdma_init();
38 if (rc)
39 svc_rdma_cleanup();
40
41out:
42 return rc;
43}
44
45module_init(rpc_rdma_init);
46module_exit(rpc_rdma_cleanup);
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index ba518af16787..41985d07fdb7 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -50,8 +50,7 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
50{ 50{
51 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 51 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
52 52
53 rpcrdma_map_one(ia->ri_id->device, seg, 53 rpcrdma_map_one(ia->ri_device, seg, rpcrdma_data_dir(writing));
54 rpcrdma_data_dir(writing));
55 seg->mr_rkey = ia->ri_bind_mem->rkey; 54 seg->mr_rkey = ia->ri_bind_mem->rkey;
56 seg->mr_base = seg->mr_dma; 55 seg->mr_base = seg->mr_dma;
57 seg->mr_nsegs = 1; 56 seg->mr_nsegs = 1;
@@ -65,19 +64,11 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
65{ 64{
66 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 65 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
67 66
68 read_lock(&ia->ri_qplock); 67 rpcrdma_unmap_one(ia->ri_device, seg);
69 rpcrdma_unmap_one(ia->ri_id->device, seg);
70 read_unlock(&ia->ri_qplock);
71
72 return 1; 68 return 1;
73} 69}
74 70
75static void 71static void
76physical_op_reset(struct rpcrdma_xprt *r_xprt)
77{
78}
79
80static void
81physical_op_destroy(struct rpcrdma_buffer *buf) 72physical_op_destroy(struct rpcrdma_buffer *buf)
82{ 73{
83} 74}
@@ -88,7 +79,6 @@ const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
88 .ro_open = physical_op_open, 79 .ro_open = physical_op_open,
89 .ro_maxpages = physical_op_maxpages, 80 .ro_maxpages = physical_op_maxpages,
90 .ro_init = physical_op_init, 81 .ro_init = physical_op_init,
91 .ro_reset = physical_op_reset,
92 .ro_destroy = physical_op_destroy, 82 .ro_destroy = physical_op_destroy,
93 .ro_displayname = "physical", 83 .ro_displayname = "physical",
94}; 84};
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 2c53ea9e1b83..84ea37daef36 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -284,9 +284,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
284 return (unsigned char *)iptr - (unsigned char *)headerp; 284 return (unsigned char *)iptr - (unsigned char *)headerp;
285 285
286out: 286out:
287 if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
288 return n;
289
290 for (pos = 0; nchunks--;) 287 for (pos = 0; nchunks--;)
291 pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, 288 pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt,
292 &req->rl_segments[pos]); 289 &req->rl_segments[pos]);
@@ -732,8 +729,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
732 struct rpcrdma_msg *headerp; 729 struct rpcrdma_msg *headerp;
733 struct rpcrdma_req *req; 730 struct rpcrdma_req *req;
734 struct rpc_rqst *rqst; 731 struct rpc_rqst *rqst;
735 struct rpc_xprt *xprt = rep->rr_xprt; 732 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
736 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 733 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
737 __be32 *iptr; 734 __be32 *iptr;
738 int rdmalen, status; 735 int rdmalen, status;
739 unsigned long cwnd; 736 unsigned long cwnd;
@@ -770,7 +767,6 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
770 rep->rr_len); 767 rep->rr_len);
771repost: 768repost:
772 r_xprt->rx_stats.bad_reply_count++; 769 r_xprt->rx_stats.bad_reply_count++;
773 rep->rr_func = rpcrdma_reply_handler;
774 if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) 770 if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
775 rpcrdma_recv_buffer_put(rep); 771 rpcrdma_recv_buffer_put(rep);
776 772
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index c1b6270262c2..2cd252f023a5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -38,8 +38,7 @@
38 * 38 *
39 * Author: Tom Tucker <tom@opengridcomputing.com> 39 * Author: Tom Tucker <tom@opengridcomputing.com>
40 */ 40 */
41#include <linux/module.h> 41
42#include <linux/init.h>
43#include <linux/slab.h> 42#include <linux/slab.h>
44#include <linux/fs.h> 43#include <linux/fs.h>
45#include <linux/sysctl.h> 44#include <linux/sysctl.h>
@@ -295,8 +294,3 @@ int svc_rdma_init(void)
295 destroy_workqueue(svc_rdma_wq); 294 destroy_workqueue(svc_rdma_wq);
296 return -ENOMEM; 295 return -ENOMEM;
297} 296}
298MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
299MODULE_DESCRIPTION("SVC RDMA Transport");
300MODULE_LICENSE("Dual BSD/GPL");
301module_init(svc_rdma_init);
302module_exit(svc_rdma_cleanup);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index b681855cf970..e2fca7617242 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -50,12 +50,12 @@
50/* 50/*
51 * Decodes a read chunk list. The expected format is as follows: 51 * Decodes a read chunk list. The expected format is as follows:
52 * descrim : xdr_one 52 * descrim : xdr_one
53 * position : u32 offset into XDR stream 53 * position : __be32 offset into XDR stream
54 * handle : u32 RKEY 54 * handle : __be32 RKEY
55 * . . . 55 * . . .
56 * end-of-list: xdr_zero 56 * end-of-list: xdr_zero
57 */ 57 */
58static u32 *decode_read_list(u32 *va, u32 *vaend) 58static __be32 *decode_read_list(__be32 *va, __be32 *vaend)
59{ 59{
60 struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va; 60 struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
61 61
@@ -67,20 +67,20 @@ static u32 *decode_read_list(u32 *va, u32 *vaend)
67 } 67 }
68 ch++; 68 ch++;
69 } 69 }
70 return (u32 *)&ch->rc_position; 70 return &ch->rc_position;
71} 71}
72 72
73/* 73/*
74 * Decodes a write chunk list. The expected format is as follows: 74 * Decodes a write chunk list. The expected format is as follows:
75 * descrim : xdr_one 75 * descrim : xdr_one
76 * nchunks : <count> 76 * nchunks : <count>
77 * handle : u32 RKEY ---+ 77 * handle : __be32 RKEY ---+
78 * length : u32 <len of segment> | 78 * length : __be32 <len of segment> |
79 * offset : remove va + <count> 79 * offset : remove va + <count>
80 * . . . | 80 * . . . |
81 * ---+ 81 * ---+
82 */ 82 */
83static u32 *decode_write_list(u32 *va, u32 *vaend) 83static __be32 *decode_write_list(__be32 *va, __be32 *vaend)
84{ 84{
85 unsigned long start, end; 85 unsigned long start, end;
86 int nchunks; 86 int nchunks;
@@ -90,14 +90,14 @@ static u32 *decode_write_list(u32 *va, u32 *vaend)
90 90
91 /* Check for not write-array */ 91 /* Check for not write-array */
92 if (ary->wc_discrim == xdr_zero) 92 if (ary->wc_discrim == xdr_zero)
93 return (u32 *)&ary->wc_nchunks; 93 return &ary->wc_nchunks;
94 94
95 if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > 95 if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
96 (unsigned long)vaend) { 96 (unsigned long)vaend) {
97 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); 97 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
98 return NULL; 98 return NULL;
99 } 99 }
100 nchunks = ntohl(ary->wc_nchunks); 100 nchunks = be32_to_cpu(ary->wc_nchunks);
101 101
102 start = (unsigned long)&ary->wc_array[0]; 102 start = (unsigned long)&ary->wc_array[0];
103 end = (unsigned long)vaend; 103 end = (unsigned long)vaend;
@@ -112,10 +112,10 @@ static u32 *decode_write_list(u32 *va, u32 *vaend)
112 * rs_length is the 2nd 4B field in wc_target and taking its 112 * rs_length is the 2nd 4B field in wc_target and taking its
113 * address skips the list terminator 113 * address skips the list terminator
114 */ 114 */
115 return (u32 *)&ary->wc_array[nchunks].wc_target.rs_length; 115 return &ary->wc_array[nchunks].wc_target.rs_length;
116} 116}
117 117
118static u32 *decode_reply_array(u32 *va, u32 *vaend) 118static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
119{ 119{
120 unsigned long start, end; 120 unsigned long start, end;
121 int nchunks; 121 int nchunks;
@@ -124,14 +124,14 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend)
124 124
125 /* Check for no reply-array */ 125 /* Check for no reply-array */
126 if (ary->wc_discrim == xdr_zero) 126 if (ary->wc_discrim == xdr_zero)
127 return (u32 *)&ary->wc_nchunks; 127 return &ary->wc_nchunks;
128 128
129 if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > 129 if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
130 (unsigned long)vaend) { 130 (unsigned long)vaend) {
131 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); 131 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
132 return NULL; 132 return NULL;
133 } 133 }
134 nchunks = ntohl(ary->wc_nchunks); 134 nchunks = be32_to_cpu(ary->wc_nchunks);
135 135
136 start = (unsigned long)&ary->wc_array[0]; 136 start = (unsigned long)&ary->wc_array[0];
137 end = (unsigned long)vaend; 137 end = (unsigned long)vaend;
@@ -142,15 +142,14 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend)
142 ary, nchunks, vaend); 142 ary, nchunks, vaend);
143 return NULL; 143 return NULL;
144 } 144 }
145 return (u32 *)&ary->wc_array[nchunks]; 145 return (__be32 *)&ary->wc_array[nchunks];
146} 146}
147 147
148int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req, 148int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
149 struct svc_rqst *rqstp) 149 struct svc_rqst *rqstp)
150{ 150{
151 struct rpcrdma_msg *rmsgp = NULL; 151 struct rpcrdma_msg *rmsgp = NULL;
152 u32 *va; 152 __be32 *va, *vaend;
153 u32 *vaend;
154 u32 hdr_len; 153 u32 hdr_len;
155 154
156 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; 155 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
@@ -162,22 +161,17 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
162 return -EINVAL; 161 return -EINVAL;
163 } 162 }
164 163
165 /* Decode the header */ 164 if (rmsgp->rm_vers != rpcrdma_version)
166 rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
167 rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
168 rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
169 rmsgp->rm_type = ntohl(rmsgp->rm_type);
170
171 if (rmsgp->rm_vers != RPCRDMA_VERSION)
172 return -ENOSYS; 165 return -ENOSYS;
173 166
174 /* Pull in the extra for the padded case and bump our pointer */ 167 /* Pull in the extra for the padded case and bump our pointer */
175 if (rmsgp->rm_type == RDMA_MSGP) { 168 if (rmsgp->rm_type == rdma_msgp) {
176 int hdrlen; 169 int hdrlen;
170
177 rmsgp->rm_body.rm_padded.rm_align = 171 rmsgp->rm_body.rm_padded.rm_align =
178 ntohl(rmsgp->rm_body.rm_padded.rm_align); 172 be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
179 rmsgp->rm_body.rm_padded.rm_thresh = 173 rmsgp->rm_body.rm_padded.rm_thresh =
180 ntohl(rmsgp->rm_body.rm_padded.rm_thresh); 174 be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh);
181 175
182 va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; 176 va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
183 rqstp->rq_arg.head[0].iov_base = va; 177 rqstp->rq_arg.head[0].iov_base = va;
@@ -192,7 +186,7 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
192 * chunk list and a reply chunk list. 186 * chunk list and a reply chunk list.
193 */ 187 */
194 va = &rmsgp->rm_body.rm_chunks[0]; 188 va = &rmsgp->rm_body.rm_chunks[0];
195 vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len); 189 vaend = (__be32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
196 va = decode_read_list(va, vaend); 190 va = decode_read_list(va, vaend);
197 if (!va) 191 if (!va)
198 return -EINVAL; 192 return -EINVAL;
@@ -211,76 +205,20 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
211 return hdr_len; 205 return hdr_len;
212} 206}
213 207
214int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp)
215{
216 struct rpcrdma_msg *rmsgp = NULL;
217 struct rpcrdma_read_chunk *ch;
218 struct rpcrdma_write_array *ary;
219 u32 *va;
220 u32 hdrlen;
221
222 dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
223 rqstp);
224 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
225
226 /* Pull in the extra for the padded case and bump our pointer */
227 if (rmsgp->rm_type == RDMA_MSGP) {
228 va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
229 rqstp->rq_arg.head[0].iov_base = va;
230 hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
231 rqstp->rq_arg.head[0].iov_len -= hdrlen;
232 return hdrlen;
233 }
234
235 /*
236 * Skip all chunks to find RPC msg. These were previously processed
237 */
238 va = &rmsgp->rm_body.rm_chunks[0];
239
240 /* Skip read-list */
241 for (ch = (struct rpcrdma_read_chunk *)va;
242 ch->rc_discrim != xdr_zero; ch++);
243 va = (u32 *)&ch->rc_position;
244
245 /* Skip write-list */
246 ary = (struct rpcrdma_write_array *)va;
247 if (ary->wc_discrim == xdr_zero)
248 va = (u32 *)&ary->wc_nchunks;
249 else
250 /*
251 * rs_length is the 2nd 4B field in wc_target and taking its
252 * address skips the list terminator
253 */
254 va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
255
256 /* Skip reply-array */
257 ary = (struct rpcrdma_write_array *)va;
258 if (ary->wc_discrim == xdr_zero)
259 va = (u32 *)&ary->wc_nchunks;
260 else
261 va = (u32 *)&ary->wc_array[ary->wc_nchunks];
262
263 rqstp->rq_arg.head[0].iov_base = va;
264 hdrlen = (unsigned long)va - (unsigned long)rmsgp;
265 rqstp->rq_arg.head[0].iov_len -= hdrlen;
266
267 return hdrlen;
268}
269
270int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, 208int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
271 struct rpcrdma_msg *rmsgp, 209 struct rpcrdma_msg *rmsgp,
272 enum rpcrdma_errcode err, u32 *va) 210 enum rpcrdma_errcode err, __be32 *va)
273{ 211{
274 u32 *startp = va; 212 __be32 *startp = va;
275 213
276 *va++ = htonl(rmsgp->rm_xid); 214 *va++ = rmsgp->rm_xid;
277 *va++ = htonl(rmsgp->rm_vers); 215 *va++ = rmsgp->rm_vers;
278 *va++ = htonl(xprt->sc_max_requests); 216 *va++ = cpu_to_be32(xprt->sc_max_requests);
279 *va++ = htonl(RDMA_ERROR); 217 *va++ = rdma_error;
280 *va++ = htonl(err); 218 *va++ = cpu_to_be32(err);
281 if (err == ERR_VERS) { 219 if (err == ERR_VERS) {
282 *va++ = htonl(RPCRDMA_VERSION); 220 *va++ = rpcrdma_version;
283 *va++ = htonl(RPCRDMA_VERSION); 221 *va++ = rpcrdma_version;
284 } 222 }
285 223
286 return (int)((unsigned long)va - (unsigned long)startp); 224 return (int)((unsigned long)va - (unsigned long)startp);
@@ -297,7 +235,7 @@ int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
297 &rmsgp->rm_body.rm_chunks[1]; 235 &rmsgp->rm_body.rm_chunks[1];
298 if (wr_ary->wc_discrim) 236 if (wr_ary->wc_discrim)
299 wr_ary = (struct rpcrdma_write_array *) 237 wr_ary = (struct rpcrdma_write_array *)
300 &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]. 238 &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)].
301 wc_target.rs_length; 239 wc_target.rs_length;
302 else 240 else
303 wr_ary = (struct rpcrdma_write_array *) 241 wr_ary = (struct rpcrdma_write_array *)
@@ -306,7 +244,7 @@ int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
306 /* skip reply array */ 244 /* skip reply array */
307 if (wr_ary->wc_discrim) 245 if (wr_ary->wc_discrim)
308 wr_ary = (struct rpcrdma_write_array *) 246 wr_ary = (struct rpcrdma_write_array *)
309 &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]; 247 &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)];
310 else 248 else
311 wr_ary = (struct rpcrdma_write_array *) 249 wr_ary = (struct rpcrdma_write_array *)
312 &wr_ary->wc_nchunks; 250 &wr_ary->wc_nchunks;
@@ -325,7 +263,7 @@ void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
325 ary = (struct rpcrdma_write_array *) 263 ary = (struct rpcrdma_write_array *)
326 &rmsgp->rm_body.rm_chunks[1]; 264 &rmsgp->rm_body.rm_chunks[1];
327 ary->wc_discrim = xdr_one; 265 ary->wc_discrim = xdr_one;
328 ary->wc_nchunks = htonl(chunks); 266 ary->wc_nchunks = cpu_to_be32(chunks);
329 267
330 /* write-list terminator */ 268 /* write-list terminator */
331 ary->wc_array[chunks].wc_target.rs_handle = xdr_zero; 269 ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
@@ -338,7 +276,7 @@ void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
338 int chunks) 276 int chunks)
339{ 277{
340 ary->wc_discrim = xdr_one; 278 ary->wc_discrim = xdr_one;
341 ary->wc_nchunks = htonl(chunks); 279 ary->wc_nchunks = cpu_to_be32(chunks);
342} 280}
343 281
344void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary, 282void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
@@ -350,7 +288,7 @@ void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
350 struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target; 288 struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
351 seg->rs_handle = rs_handle; 289 seg->rs_handle = rs_handle;
352 seg->rs_offset = rs_offset; 290 seg->rs_offset = rs_offset;
353 seg->rs_length = htonl(write_len); 291 seg->rs_length = cpu_to_be32(write_len);
354} 292}
355 293
356void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt, 294void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
@@ -358,10 +296,10 @@ void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
358 struct rpcrdma_msg *rdma_resp, 296 struct rpcrdma_msg *rdma_resp,
359 enum rpcrdma_proc rdma_type) 297 enum rpcrdma_proc rdma_type)
360{ 298{
361 rdma_resp->rm_xid = htonl(rdma_argp->rm_xid); 299 rdma_resp->rm_xid = rdma_argp->rm_xid;
362 rdma_resp->rm_vers = htonl(rdma_argp->rm_vers); 300 rdma_resp->rm_vers = rdma_argp->rm_vers;
363 rdma_resp->rm_credit = htonl(xprt->sc_max_requests); 301 rdma_resp->rm_credit = cpu_to_be32(xprt->sc_max_requests);
364 rdma_resp->rm_type = htonl(rdma_type); 302 rdma_resp->rm_type = cpu_to_be32(rdma_type);
365 303
366 /* Encode <nul> chunks lists */ 304 /* Encode <nul> chunks lists */
367 rdma_resp->rm_body.rm_chunks[0] = xdr_zero; 305 rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index f9f13a32ddb8..2e1348bde325 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -85,7 +85,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
85 85
86 /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ 86 /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
87 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; 87 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
88 if (be32_to_cpu(rmsgp->rm_type) == RDMA_NOMSG) 88 if (rmsgp->rm_type == rdma_nomsg)
89 rqstp->rq_arg.pages = &rqstp->rq_pages[0]; 89 rqstp->rq_arg.pages = &rqstp->rq_pages[0];
90 else 90 else
91 rqstp->rq_arg.pages = &rqstp->rq_pages[1]; 91 rqstp->rq_arg.pages = &rqstp->rq_pages[1];
@@ -117,8 +117,8 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
117 117
118static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) 118static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
119{ 119{
120 if (rdma_node_get_transport(xprt->sc_cm_id->device->node_type) == 120 if (!rdma_cap_read_multi_sge(xprt->sc_cm_id->device,
121 RDMA_TRANSPORT_IWARP) 121 xprt->sc_cm_id->port_num))
122 return 1; 122 return 1;
123 else 123 else
124 return min_t(int, sge_count, xprt->sc_max_sge); 124 return min_t(int, sge_count, xprt->sc_max_sge);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 7de33d1af9b6..d25cd430f9ff 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -240,6 +240,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
240 u32 xdr_off; 240 u32 xdr_off;
241 int chunk_off; 241 int chunk_off;
242 int chunk_no; 242 int chunk_no;
243 int nchunks;
243 struct rpcrdma_write_array *arg_ary; 244 struct rpcrdma_write_array *arg_ary;
244 struct rpcrdma_write_array *res_ary; 245 struct rpcrdma_write_array *res_ary;
245 int ret; 246 int ret;
@@ -251,14 +252,15 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
251 &rdma_resp->rm_body.rm_chunks[1]; 252 &rdma_resp->rm_body.rm_chunks[1];
252 253
253 /* Write chunks start at the pagelist */ 254 /* Write chunks start at the pagelist */
255 nchunks = be32_to_cpu(arg_ary->wc_nchunks);
254 for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; 256 for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
255 xfer_len && chunk_no < arg_ary->wc_nchunks; 257 xfer_len && chunk_no < nchunks;
256 chunk_no++) { 258 chunk_no++) {
257 struct rpcrdma_segment *arg_ch; 259 struct rpcrdma_segment *arg_ch;
258 u64 rs_offset; 260 u64 rs_offset;
259 261
260 arg_ch = &arg_ary->wc_array[chunk_no].wc_target; 262 arg_ch = &arg_ary->wc_array[chunk_no].wc_target;
261 write_len = min(xfer_len, ntohl(arg_ch->rs_length)); 263 write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length));
262 264
263 /* Prepare the response chunk given the length actually 265 /* Prepare the response chunk given the length actually
264 * written */ 266 * written */
@@ -270,7 +272,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
270 chunk_off = 0; 272 chunk_off = 0;
271 while (write_len) { 273 while (write_len) {
272 ret = send_write(xprt, rqstp, 274 ret = send_write(xprt, rqstp,
273 ntohl(arg_ch->rs_handle), 275 be32_to_cpu(arg_ch->rs_handle),
274 rs_offset + chunk_off, 276 rs_offset + chunk_off,
275 xdr_off, 277 xdr_off,
276 write_len, 278 write_len,
@@ -318,13 +320,13 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
318 &rdma_resp->rm_body.rm_chunks[2]; 320 &rdma_resp->rm_body.rm_chunks[2];
319 321
320 /* xdr offset starts at RPC message */ 322 /* xdr offset starts at RPC message */
321 nchunks = ntohl(arg_ary->wc_nchunks); 323 nchunks = be32_to_cpu(arg_ary->wc_nchunks);
322 for (xdr_off = 0, chunk_no = 0; 324 for (xdr_off = 0, chunk_no = 0;
323 xfer_len && chunk_no < nchunks; 325 xfer_len && chunk_no < nchunks;
324 chunk_no++) { 326 chunk_no++) {
325 u64 rs_offset; 327 u64 rs_offset;
326 ch = &arg_ary->wc_array[chunk_no].wc_target; 328 ch = &arg_ary->wc_array[chunk_no].wc_target;
327 write_len = min(xfer_len, htonl(ch->rs_length)); 329 write_len = min(xfer_len, be32_to_cpu(ch->rs_length));
328 330
329 /* Prepare the reply chunk given the length actually 331 /* Prepare the reply chunk given the length actually
330 * written */ 332 * written */
@@ -335,7 +337,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
335 chunk_off = 0; 337 chunk_off = 0;
336 while (write_len) { 338 while (write_len) {
337 ret = send_write(xprt, rqstp, 339 ret = send_write(xprt, rqstp,
338 ntohl(ch->rs_handle), 340 be32_to_cpu(ch->rs_handle),
339 rs_offset + chunk_off, 341 rs_offset + chunk_off,
340 xdr_off, 342 xdr_off,
341 write_len, 343 write_len,
@@ -515,7 +517,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
515 inline_bytes = rqstp->rq_res.len; 517 inline_bytes = rqstp->rq_res.len;
516 518
517 /* Create the RDMA response header */ 519 /* Create the RDMA response header */
518 res_page = svc_rdma_get_page(); 520 res_page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
519 rdma_resp = page_address(res_page); 521 rdma_resp = page_address(res_page);
520 reply_ary = svc_rdma_get_reply_array(rdma_argp); 522 reply_ary = svc_rdma_get_reply_array(rdma_argp);
521 if (reply_ary) 523 if (reply_ary)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index f609c1c2d38d..6b36279e4288 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = {
91 .xcl_name = "rdma", 91 .xcl_name = "rdma",
92 .xcl_owner = THIS_MODULE, 92 .xcl_owner = THIS_MODULE,
93 .xcl_ops = &svc_rdma_ops, 93 .xcl_ops = &svc_rdma_ops,
94 .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA, 94 .xcl_max_payload = RPCRDMA_MAXPAYLOAD,
95 .xcl_ident = XPRT_TRANSPORT_RDMA, 95 .xcl_ident = XPRT_TRANSPORT_RDMA,
96}; 96};
97 97
@@ -99,12 +99,8 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
99{ 99{
100 struct svc_rdma_op_ctxt *ctxt; 100 struct svc_rdma_op_ctxt *ctxt;
101 101
102 while (1) { 102 ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep,
103 ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL); 103 GFP_KERNEL | __GFP_NOFAIL);
104 if (ctxt)
105 break;
106 schedule_timeout_uninterruptible(msecs_to_jiffies(500));
107 }
108 ctxt->xprt = xprt; 104 ctxt->xprt = xprt;
109 INIT_LIST_HEAD(&ctxt->dto_q); 105 INIT_LIST_HEAD(&ctxt->dto_q);
110 ctxt->count = 0; 106 ctxt->count = 0;
@@ -156,12 +152,8 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
156struct svc_rdma_req_map *svc_rdma_get_req_map(void) 152struct svc_rdma_req_map *svc_rdma_get_req_map(void)
157{ 153{
158 struct svc_rdma_req_map *map; 154 struct svc_rdma_req_map *map;
159 while (1) { 155 map = kmem_cache_alloc(svc_rdma_map_cachep,
160 map = kmem_cache_alloc(svc_rdma_map_cachep, GFP_KERNEL); 156 GFP_KERNEL | __GFP_NOFAIL);
161 if (map)
162 break;
163 schedule_timeout_uninterruptible(msecs_to_jiffies(500));
164 }
165 map->count = 0; 157 map->count = 0;
166 return map; 158 return map;
167} 159}
@@ -175,8 +167,8 @@ void svc_rdma_put_req_map(struct svc_rdma_req_map *map)
175static void cq_event_handler(struct ib_event *event, void *context) 167static void cq_event_handler(struct ib_event *event, void *context)
176{ 168{
177 struct svc_xprt *xprt = context; 169 struct svc_xprt *xprt = context;
178 dprintk("svcrdma: received CQ event id=%d, context=%p\n", 170 dprintk("svcrdma: received CQ event %s (%d), context=%p\n",
179 event->event, context); 171 ib_event_msg(event->event), event->event, context);
180 set_bit(XPT_CLOSE, &xprt->xpt_flags); 172 set_bit(XPT_CLOSE, &xprt->xpt_flags);
181} 173}
182 174
@@ -191,8 +183,9 @@ static void qp_event_handler(struct ib_event *event, void *context)
191 case IB_EVENT_COMM_EST: 183 case IB_EVENT_COMM_EST:
192 case IB_EVENT_SQ_DRAINED: 184 case IB_EVENT_SQ_DRAINED:
193 case IB_EVENT_QP_LAST_WQE_REACHED: 185 case IB_EVENT_QP_LAST_WQE_REACHED:
194 dprintk("svcrdma: QP event %d received for QP=%p\n", 186 dprintk("svcrdma: QP event %s (%d) received for QP=%p\n",
195 event->event, event->element.qp); 187 ib_event_msg(event->event), event->event,
188 event->element.qp);
196 break; 189 break;
197 /* These are considered fatal events */ 190 /* These are considered fatal events */
198 case IB_EVENT_PATH_MIG_ERR: 191 case IB_EVENT_PATH_MIG_ERR:
@@ -201,9 +194,10 @@ static void qp_event_handler(struct ib_event *event, void *context)
201 case IB_EVENT_QP_ACCESS_ERR: 194 case IB_EVENT_QP_ACCESS_ERR:
202 case IB_EVENT_DEVICE_FATAL: 195 case IB_EVENT_DEVICE_FATAL:
203 default: 196 default:
204 dprintk("svcrdma: QP ERROR event %d received for QP=%p, " 197 dprintk("svcrdma: QP ERROR event %s (%d) received for QP=%p, "
205 "closing transport\n", 198 "closing transport\n",
206 event->event, event->element.qp); 199 ib_event_msg(event->event), event->event,
200 event->element.qp);
207 set_bit(XPT_CLOSE, &xprt->xpt_flags); 201 set_bit(XPT_CLOSE, &xprt->xpt_flags);
208 break; 202 break;
209 } 203 }
@@ -402,7 +396,8 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
402 for (i = 0; i < ret; i++) { 396 for (i = 0; i < ret; i++) {
403 wc = &wc_a[i]; 397 wc = &wc_a[i];
404 if (wc->status != IB_WC_SUCCESS) { 398 if (wc->status != IB_WC_SUCCESS) {
405 dprintk("svcrdma: sq wc err status %d\n", 399 dprintk("svcrdma: sq wc err status %s (%d)\n",
400 ib_wc_status_msg(wc->status),
406 wc->status); 401 wc->status);
407 402
408 /* Close the transport */ 403 /* Close the transport */
@@ -490,18 +485,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
490 return cma_xprt; 485 return cma_xprt;
491} 486}
492 487
493struct page *svc_rdma_get_page(void)
494{
495 struct page *page;
496
497 while ((page = alloc_page(GFP_KERNEL)) == NULL) {
498 /* If we can't get memory, wait a bit and try again */
499 printk(KERN_INFO "svcrdma: out of memory...retrying in 1s\n");
500 schedule_timeout_uninterruptible(msecs_to_jiffies(1000));
501 }
502 return page;
503}
504
505int svc_rdma_post_recv(struct svcxprt_rdma *xprt) 488int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
506{ 489{
507 struct ib_recv_wr recv_wr, *bad_recv_wr; 490 struct ib_recv_wr recv_wr, *bad_recv_wr;
@@ -520,7 +503,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
520 pr_err("svcrdma: Too many sges (%d)\n", sge_no); 503 pr_err("svcrdma: Too many sges (%d)\n", sge_no);
521 goto err_put_ctxt; 504 goto err_put_ctxt;
522 } 505 }
523 page = svc_rdma_get_page(); 506 page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
524 ctxt->pages[sge_no] = page; 507 ctxt->pages[sge_no] = page;
525 pa = ib_dma_map_page(xprt->sc_cm_id->device, 508 pa = ib_dma_map_page(xprt->sc_cm_id->device,
526 page, 0, PAGE_SIZE, 509 page, 0, PAGE_SIZE,
@@ -616,7 +599,8 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
616 switch (event->event) { 599 switch (event->event) {
617 case RDMA_CM_EVENT_CONNECT_REQUEST: 600 case RDMA_CM_EVENT_CONNECT_REQUEST:
618 dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " 601 dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
619 "event=%d\n", cma_id, cma_id->context, event->event); 602 "event = %s (%d)\n", cma_id, cma_id->context,
603 rdma_event_msg(event->event), event->event);
620 handle_connect_req(cma_id, 604 handle_connect_req(cma_id,
621 event->param.conn.initiator_depth); 605 event->param.conn.initiator_depth);
622 break; 606 break;
@@ -636,7 +620,8 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
636 620
637 default: 621 default:
638 dprintk("svcrdma: Unexpected event on listening endpoint %p, " 622 dprintk("svcrdma: Unexpected event on listening endpoint %p, "
639 "event=%d\n", cma_id, event->event); 623 "event = %s (%d)\n", cma_id,
624 rdma_event_msg(event->event), event->event);
640 break; 625 break;
641 } 626 }
642 627
@@ -669,7 +654,8 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
669 break; 654 break;
670 case RDMA_CM_EVENT_DEVICE_REMOVAL: 655 case RDMA_CM_EVENT_DEVICE_REMOVAL:
671 dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, " 656 dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, "
672 "event=%d\n", cma_id, xprt, event->event); 657 "event = %s (%d)\n", cma_id, xprt,
658 rdma_event_msg(event->event), event->event);
673 if (xprt) { 659 if (xprt) {
674 set_bit(XPT_CLOSE, &xprt->xpt_flags); 660 set_bit(XPT_CLOSE, &xprt->xpt_flags);
675 svc_xprt_enqueue(xprt); 661 svc_xprt_enqueue(xprt);
@@ -677,7 +663,8 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
677 break; 663 break;
678 default: 664 default:
679 dprintk("svcrdma: Unexpected event on DTO endpoint %p, " 665 dprintk("svcrdma: Unexpected event on DTO endpoint %p, "
680 "event=%d\n", cma_id, event->event); 666 "event = %s (%d)\n", cma_id,
667 rdma_event_msg(event->event), event->event);
681 break; 668 break;
682 } 669 }
683 return 0; 670 return 0;
@@ -848,10 +835,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
848 struct svcxprt_rdma *listen_rdma; 835 struct svcxprt_rdma *listen_rdma;
849 struct svcxprt_rdma *newxprt = NULL; 836 struct svcxprt_rdma *newxprt = NULL;
850 struct rdma_conn_param conn_param; 837 struct rdma_conn_param conn_param;
838 struct ib_cq_init_attr cq_attr = {};
851 struct ib_qp_init_attr qp_attr; 839 struct ib_qp_init_attr qp_attr;
852 struct ib_device_attr devattr; 840 struct ib_device_attr devattr;
853 int uninitialized_var(dma_mr_acc); 841 int uninitialized_var(dma_mr_acc);
854 int need_dma_mr; 842 int need_dma_mr = 0;
855 int ret; 843 int ret;
856 int i; 844 int i;
857 845
@@ -900,22 +888,22 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
900 dprintk("svcrdma: error creating PD for connect request\n"); 888 dprintk("svcrdma: error creating PD for connect request\n");
901 goto errout; 889 goto errout;
902 } 890 }
891 cq_attr.cqe = newxprt->sc_sq_depth;
903 newxprt->sc_sq_cq = ib_create_cq(newxprt->sc_cm_id->device, 892 newxprt->sc_sq_cq = ib_create_cq(newxprt->sc_cm_id->device,
904 sq_comp_handler, 893 sq_comp_handler,
905 cq_event_handler, 894 cq_event_handler,
906 newxprt, 895 newxprt,
907 newxprt->sc_sq_depth, 896 &cq_attr);
908 0);
909 if (IS_ERR(newxprt->sc_sq_cq)) { 897 if (IS_ERR(newxprt->sc_sq_cq)) {
910 dprintk("svcrdma: error creating SQ CQ for connect request\n"); 898 dprintk("svcrdma: error creating SQ CQ for connect request\n");
911 goto errout; 899 goto errout;
912 } 900 }
901 cq_attr.cqe = newxprt->sc_max_requests;
913 newxprt->sc_rq_cq = ib_create_cq(newxprt->sc_cm_id->device, 902 newxprt->sc_rq_cq = ib_create_cq(newxprt->sc_cm_id->device,
914 rq_comp_handler, 903 rq_comp_handler,
915 cq_event_handler, 904 cq_event_handler,
916 newxprt, 905 newxprt,
917 newxprt->sc_max_requests, 906 &cq_attr);
918 0);
919 if (IS_ERR(newxprt->sc_rq_cq)) { 907 if (IS_ERR(newxprt->sc_rq_cq)) {
920 dprintk("svcrdma: error creating RQ CQ for connect request\n"); 908 dprintk("svcrdma: error creating RQ CQ for connect request\n");
921 goto errout; 909 goto errout;
@@ -985,35 +973,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
985 /* 973 /*
986 * Determine if a DMA MR is required and if so, what privs are required 974 * Determine if a DMA MR is required and if so, what privs are required
987 */ 975 */
988 switch (rdma_node_get_transport(newxprt->sc_cm_id->device->node_type)) { 976 if (!rdma_protocol_iwarp(newxprt->sc_cm_id->device,
989 case RDMA_TRANSPORT_IWARP: 977 newxprt->sc_cm_id->port_num) &&
990 newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV; 978 !rdma_ib_or_roce(newxprt->sc_cm_id->device,
991 if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) { 979 newxprt->sc_cm_id->port_num))
992 need_dma_mr = 1;
993 dma_mr_acc =
994 (IB_ACCESS_LOCAL_WRITE |
995 IB_ACCESS_REMOTE_WRITE);
996 } else if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
997 need_dma_mr = 1;
998 dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
999 } else
1000 need_dma_mr = 0;
1001 break;
1002 case RDMA_TRANSPORT_IB:
1003 if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
1004 need_dma_mr = 1;
1005 dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
1006 } else if (!(devattr.device_cap_flags &
1007 IB_DEVICE_LOCAL_DMA_LKEY)) {
1008 need_dma_mr = 1;
1009 dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
1010 } else
1011 need_dma_mr = 0;
1012 break;
1013 default:
1014 goto errout; 980 goto errout;
981
982 if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) ||
983 !(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
984 need_dma_mr = 1;
985 dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
986 if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
987 newxprt->sc_cm_id->port_num) &&
988 !(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG))
989 dma_mr_acc |= IB_ACCESS_REMOTE_WRITE;
1015 } 990 }
1016 991
992 if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
993 newxprt->sc_cm_id->port_num))
994 newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
995
1017 /* Create the DMA MR if needed, otherwise, use the DMA LKEY */ 996 /* Create the DMA MR if needed, otherwise, use the DMA LKEY */
1018 if (need_dma_mr) { 997 if (need_dma_mr) {
1019 /* Register all of physical memory */ 998 /* Register all of physical memory */
@@ -1319,11 +1298,11 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1319 struct ib_send_wr err_wr; 1298 struct ib_send_wr err_wr;
1320 struct page *p; 1299 struct page *p;
1321 struct svc_rdma_op_ctxt *ctxt; 1300 struct svc_rdma_op_ctxt *ctxt;
1322 u32 *va; 1301 __be32 *va;
1323 int length; 1302 int length;
1324 int ret; 1303 int ret;
1325 1304
1326 p = svc_rdma_get_page(); 1305 p = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
1327 va = page_address(p); 1306 va = page_address(p);
1328 1307
1329 /* XDR encode error */ 1308 /* XDR encode error */
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 54f23b1be986..680f888a9ddd 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -48,7 +48,6 @@
48 */ 48 */
49 49
50#include <linux/module.h> 50#include <linux/module.h>
51#include <linux/init.h>
52#include <linux/slab.h> 51#include <linux/slab.h>
53#include <linux/seq_file.h> 52#include <linux/seq_file.h>
54#include <linux/sunrpc/addr.h> 53#include <linux/sunrpc/addr.h>
@@ -59,11 +58,6 @@
59# define RPCDBG_FACILITY RPCDBG_TRANS 58# define RPCDBG_FACILITY RPCDBG_TRANS
60#endif 59#endif
61 60
62MODULE_LICENSE("Dual BSD/GPL");
63
64MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS");
65MODULE_AUTHOR("Network Appliance, Inc.");
66
67/* 61/*
68 * tunables 62 * tunables
69 */ 63 */
@@ -246,6 +240,16 @@ xprt_rdma_connect_worker(struct work_struct *work)
246 xprt_clear_connecting(xprt); 240 xprt_clear_connecting(xprt);
247} 241}
248 242
243static void
244xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
245{
246 struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
247 rx_xprt);
248
249 pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt);
250 rdma_disconnect(r_xprt->rx_ia.ri_id);
251}
252
249/* 253/*
250 * xprt_rdma_destroy 254 * xprt_rdma_destroy
251 * 255 *
@@ -618,12 +622,6 @@ xprt_rdma_send_request(struct rpc_task *task)
618 if (req->rl_reply == NULL) /* e.g. reconnection */ 622 if (req->rl_reply == NULL) /* e.g. reconnection */
619 rpcrdma_recv_buffer_get(req); 623 rpcrdma_recv_buffer_get(req);
620 624
621 if (req->rl_reply) {
622 req->rl_reply->rr_func = rpcrdma_reply_handler;
623 /* this need only be done once, but... */
624 req->rl_reply->rr_xprt = xprt;
625 }
626
627 /* Must suppress retransmit to maintain credits */ 625 /* Must suppress retransmit to maintain credits */
628 if (req->rl_connect_cookie == xprt->connect_cookie) 626 if (req->rl_connect_cookie == xprt->connect_cookie)
629 goto drop_connection; 627 goto drop_connection;
@@ -682,6 +680,17 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
682 r_xprt->rx_stats.bad_reply_count); 680 r_xprt->rx_stats.bad_reply_count);
683} 681}
684 682
683static int
684xprt_rdma_enable_swap(struct rpc_xprt *xprt)
685{
686 return -EINVAL;
687}
688
689static void
690xprt_rdma_disable_swap(struct rpc_xprt *xprt)
691{
692}
693
685/* 694/*
686 * Plumbing for rpc transport switch and kernel module 695 * Plumbing for rpc transport switch and kernel module
687 */ 696 */
@@ -700,7 +709,10 @@ static struct rpc_xprt_ops xprt_rdma_procs = {
700 .send_request = xprt_rdma_send_request, 709 .send_request = xprt_rdma_send_request,
701 .close = xprt_rdma_close, 710 .close = xprt_rdma_close,
702 .destroy = xprt_rdma_destroy, 711 .destroy = xprt_rdma_destroy,
703 .print_stats = xprt_rdma_print_stats 712 .print_stats = xprt_rdma_print_stats,
713 .enable_swap = xprt_rdma_enable_swap,
714 .disable_swap = xprt_rdma_disable_swap,
715 .inject_disconnect = xprt_rdma_inject_disconnect
704}; 716};
705 717
706static struct xprt_class xprt_rdma = { 718static struct xprt_class xprt_rdma = {
@@ -711,7 +723,7 @@ static struct xprt_class xprt_rdma = {
711 .setup = xprt_setup_rdma, 723 .setup = xprt_setup_rdma,
712}; 724};
713 725
714static void __exit xprt_rdma_cleanup(void) 726void xprt_rdma_cleanup(void)
715{ 727{
716 int rc; 728 int rc;
717 729
@@ -726,17 +738,24 @@ static void __exit xprt_rdma_cleanup(void)
726 if (rc) 738 if (rc)
727 dprintk("RPC: %s: xprt_unregister returned %i\n", 739 dprintk("RPC: %s: xprt_unregister returned %i\n",
728 __func__, rc); 740 __func__, rc);
741
742 frwr_destroy_recovery_wq();
729} 743}
730 744
731static int __init xprt_rdma_init(void) 745int xprt_rdma_init(void)
732{ 746{
733 int rc; 747 int rc;
734 748
735 rc = xprt_register_transport(&xprt_rdma); 749 rc = frwr_alloc_recovery_wq();
736
737 if (rc) 750 if (rc)
738 return rc; 751 return rc;
739 752
753 rc = xprt_register_transport(&xprt_rdma);
754 if (rc) {
755 frwr_destroy_recovery_wq();
756 return rc;
757 }
758
740 dprintk("RPCRDMA Module Init, register RPC RDMA transport\n"); 759 dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
741 760
742 dprintk("Defaults:\n"); 761 dprintk("Defaults:\n");
@@ -753,6 +772,3 @@ static int __init xprt_rdma_init(void)
753#endif 772#endif
754 return 0; 773 return 0;
755} 774}
756
757module_init(xprt_rdma_init);
758module_exit(xprt_rdma_cleanup);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 4870d272e006..891c4ede2c20 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -80,7 +80,6 @@ static void
80rpcrdma_run_tasklet(unsigned long data) 80rpcrdma_run_tasklet(unsigned long data)
81{ 81{
82 struct rpcrdma_rep *rep; 82 struct rpcrdma_rep *rep;
83 void (*func)(struct rpcrdma_rep *);
84 unsigned long flags; 83 unsigned long flags;
85 84
86 data = data; 85 data = data;
@@ -89,14 +88,9 @@ rpcrdma_run_tasklet(unsigned long data)
89 rep = list_entry(rpcrdma_tasklets_g.next, 88 rep = list_entry(rpcrdma_tasklets_g.next,
90 struct rpcrdma_rep, rr_list); 89 struct rpcrdma_rep, rr_list);
91 list_del(&rep->rr_list); 90 list_del(&rep->rr_list);
92 func = rep->rr_func;
93 rep->rr_func = NULL;
94 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); 91 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
95 92
96 if (func) 93 rpcrdma_reply_handler(rep);
97 func(rep);
98 else
99 rpcrdma_recv_buffer_put(rep);
100 94
101 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); 95 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
102 } 96 }
@@ -105,32 +99,6 @@ rpcrdma_run_tasklet(unsigned long data)
105 99
106static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); 100static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
107 101
108static const char * const async_event[] = {
109 "CQ error",
110 "QP fatal error",
111 "QP request error",
112 "QP access error",
113 "communication established",
114 "send queue drained",
115 "path migration successful",
116 "path mig error",
117 "device fatal error",
118 "port active",
119 "port error",
120 "LID change",
121 "P_key change",
122 "SM change",
123 "SRQ error",
124 "SRQ limit reached",
125 "last WQE reached",
126 "client reregister",
127 "GID change",
128};
129
130#define ASYNC_MSG(status) \
131 ((status) < ARRAY_SIZE(async_event) ? \
132 async_event[(status)] : "unknown async error")
133
134static void 102static void
135rpcrdma_schedule_tasklet(struct list_head *sched_list) 103rpcrdma_schedule_tasklet(struct list_head *sched_list)
136{ 104{
@@ -148,7 +116,7 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
148 struct rpcrdma_ep *ep = context; 116 struct rpcrdma_ep *ep = context;
149 117
150 pr_err("RPC: %s: %s on device %s ep %p\n", 118 pr_err("RPC: %s: %s on device %s ep %p\n",
151 __func__, ASYNC_MSG(event->event), 119 __func__, ib_event_msg(event->event),
152 event->device->name, context); 120 event->device->name, context);
153 if (ep->rep_connected == 1) { 121 if (ep->rep_connected == 1) {
154 ep->rep_connected = -EIO; 122 ep->rep_connected = -EIO;
@@ -163,7 +131,7 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
163 struct rpcrdma_ep *ep = context; 131 struct rpcrdma_ep *ep = context;
164 132
165 pr_err("RPC: %s: %s on device %s ep %p\n", 133 pr_err("RPC: %s: %s on device %s ep %p\n",
166 __func__, ASYNC_MSG(event->event), 134 __func__, ib_event_msg(event->event),
167 event->device->name, context); 135 event->device->name, context);
168 if (ep->rep_connected == 1) { 136 if (ep->rep_connected == 1) {
169 ep->rep_connected = -EIO; 137 ep->rep_connected = -EIO;
@@ -172,35 +140,6 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
172 } 140 }
173} 141}
174 142
175static const char * const wc_status[] = {
176 "success",
177 "local length error",
178 "local QP operation error",
179 "local EE context operation error",
180 "local protection error",
181 "WR flushed",
182 "memory management operation error",
183 "bad response error",
184 "local access error",
185 "remote invalid request error",
186 "remote access error",
187 "remote operation error",
188 "transport retry counter exceeded",
189 "RNR retry counter exceeded",
190 "local RDD violation error",
191 "remove invalid RD request",
192 "operation aborted",
193 "invalid EE context number",
194 "invalid EE context state",
195 "fatal error",
196 "response timeout error",
197 "general error",
198};
199
200#define COMPLETION_MSG(status) \
201 ((status) < ARRAY_SIZE(wc_status) ? \
202 wc_status[(status)] : "unexpected completion error")
203
204static void 143static void
205rpcrdma_sendcq_process_wc(struct ib_wc *wc) 144rpcrdma_sendcq_process_wc(struct ib_wc *wc)
206{ 145{
@@ -209,7 +148,7 @@ rpcrdma_sendcq_process_wc(struct ib_wc *wc)
209 if (wc->status != IB_WC_SUCCESS && 148 if (wc->status != IB_WC_SUCCESS &&
210 wc->status != IB_WC_WR_FLUSH_ERR) 149 wc->status != IB_WC_WR_FLUSH_ERR)
211 pr_err("RPC: %s: SEND: %s\n", 150 pr_err("RPC: %s: SEND: %s\n",
212 __func__, COMPLETION_MSG(wc->status)); 151 __func__, ib_wc_status_msg(wc->status));
213 } else { 152 } else {
214 struct rpcrdma_mw *r; 153 struct rpcrdma_mw *r;
215 154
@@ -291,7 +230,7 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
291 __func__, rep, wc->byte_len); 230 __func__, rep, wc->byte_len);
292 231
293 rep->rr_len = wc->byte_len; 232 rep->rr_len = wc->byte_len;
294 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, 233 ib_dma_sync_single_for_cpu(rep->rr_device,
295 rdmab_addr(rep->rr_rdmabuf), 234 rdmab_addr(rep->rr_rdmabuf),
296 rep->rr_len, DMA_FROM_DEVICE); 235 rep->rr_len, DMA_FROM_DEVICE);
297 prefetch(rdmab_to_msg(rep->rr_rdmabuf)); 236 prefetch(rdmab_to_msg(rep->rr_rdmabuf));
@@ -302,7 +241,7 @@ out_schedule:
302out_fail: 241out_fail:
303 if (wc->status != IB_WC_WR_FLUSH_ERR) 242 if (wc->status != IB_WC_WR_FLUSH_ERR)
304 pr_err("RPC: %s: rep %p: %s\n", 243 pr_err("RPC: %s: rep %p: %s\n",
305 __func__, rep, COMPLETION_MSG(wc->status)); 244 __func__, rep, ib_wc_status_msg(wc->status));
306 rep->rr_len = ~0U; 245 rep->rr_len = ~0U;
307 goto out_schedule; 246 goto out_schedule;
308} 247}
@@ -386,31 +325,6 @@ rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
386 rpcrdma_sendcq_process_wc(&wc); 325 rpcrdma_sendcq_process_wc(&wc);
387} 326}
388 327
389#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
390static const char * const conn[] = {
391 "address resolved",
392 "address error",
393 "route resolved",
394 "route error",
395 "connect request",
396 "connect response",
397 "connect error",
398 "unreachable",
399 "rejected",
400 "established",
401 "disconnected",
402 "device removal",
403 "multicast join",
404 "multicast error",
405 "address change",
406 "timewait exit",
407};
408
409#define CONNECTION_MSG(status) \
410 ((status) < ARRAY_SIZE(conn) ? \
411 conn[(status)] : "unrecognized connection error")
412#endif
413
414static int 328static int
415rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) 329rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
416{ 330{
@@ -476,7 +390,7 @@ connected:
476 default: 390 default:
477 dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n", 391 dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n",
478 __func__, sap, rpc_get_port(sap), ep, 392 __func__, sap, rpc_get_port(sap), ep,
479 CONNECTION_MSG(event->event)); 393 rdma_event_msg(event->event));
480 break; 394 break;
481 } 395 }
482 396
@@ -487,7 +401,7 @@ connected:
487 401
488 pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n", 402 pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
489 sap, rpc_get_port(sap), 403 sap, rpc_get_port(sap),
490 ia->ri_id->device->name, 404 ia->ri_device->name,
491 ia->ri_ops->ro_displayname, 405 ia->ri_ops->ro_displayname,
492 xprt->rx_buf.rb_max_requests, 406 xprt->rx_buf.rb_max_requests,
493 ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); 407 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
@@ -588,8 +502,9 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
588 rc = PTR_ERR(ia->ri_id); 502 rc = PTR_ERR(ia->ri_id);
589 goto out1; 503 goto out1;
590 } 504 }
505 ia->ri_device = ia->ri_id->device;
591 506
592 ia->ri_pd = ib_alloc_pd(ia->ri_id->device); 507 ia->ri_pd = ib_alloc_pd(ia->ri_device);
593 if (IS_ERR(ia->ri_pd)) { 508 if (IS_ERR(ia->ri_pd)) {
594 rc = PTR_ERR(ia->ri_pd); 509 rc = PTR_ERR(ia->ri_pd);
595 dprintk("RPC: %s: ib_alloc_pd() failed %i\n", 510 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
@@ -597,7 +512,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
597 goto out2; 512 goto out2;
598 } 513 }
599 514
600 rc = ib_query_device(ia->ri_id->device, devattr); 515 rc = ib_query_device(ia->ri_device, devattr);
601 if (rc) { 516 if (rc) {
602 dprintk("RPC: %s: ib_query_device failed %d\n", 517 dprintk("RPC: %s: ib_query_device failed %d\n",
603 __func__, rc); 518 __func__, rc);
@@ -606,7 +521,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
606 521
607 if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { 522 if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
608 ia->ri_have_dma_lkey = 1; 523 ia->ri_have_dma_lkey = 1;
609 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; 524 ia->ri_dma_lkey = ia->ri_device->local_dma_lkey;
610 } 525 }
611 526
612 if (memreg == RPCRDMA_FRMR) { 527 if (memreg == RPCRDMA_FRMR) {
@@ -621,7 +536,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
621 } 536 }
622 } 537 }
623 if (memreg == RPCRDMA_MTHCAFMR) { 538 if (memreg == RPCRDMA_MTHCAFMR) {
624 if (!ia->ri_id->device->alloc_fmr) { 539 if (!ia->ri_device->alloc_fmr) {
625 dprintk("RPC: %s: MTHCAFMR registration " 540 dprintk("RPC: %s: MTHCAFMR registration "
626 "not supported by HCA\n", __func__); 541 "not supported by HCA\n", __func__);
627 memreg = RPCRDMA_ALLPHYSICAL; 542 memreg = RPCRDMA_ALLPHYSICAL;
@@ -670,9 +585,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
670 dprintk("RPC: %s: memory registration strategy is '%s'\n", 585 dprintk("RPC: %s: memory registration strategy is '%s'\n",
671 __func__, ia->ri_ops->ro_displayname); 586 __func__, ia->ri_ops->ro_displayname);
672 587
673 /* Else will do memory reg/dereg for each chunk */
674 ia->ri_memreg_strategy = memreg;
675
676 rwlock_init(&ia->ri_qplock); 588 rwlock_init(&ia->ri_qplock);
677 return 0; 589 return 0;
678 590
@@ -702,17 +614,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
702 dprintk("RPC: %s: ib_dereg_mr returned %i\n", 614 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
703 __func__, rc); 615 __func__, rc);
704 } 616 }
617
705 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { 618 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
706 if (ia->ri_id->qp) 619 if (ia->ri_id->qp)
707 rdma_destroy_qp(ia->ri_id); 620 rdma_destroy_qp(ia->ri_id);
708 rdma_destroy_id(ia->ri_id); 621 rdma_destroy_id(ia->ri_id);
709 ia->ri_id = NULL; 622 ia->ri_id = NULL;
710 } 623 }
711 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { 624
712 rc = ib_dealloc_pd(ia->ri_pd); 625 /* If the pd is still busy, xprtrdma missed freeing a resource */
713 dprintk("RPC: %s: ib_dealloc_pd returned %i\n", 626 if (ia->ri_pd && !IS_ERR(ia->ri_pd))
714 __func__, rc); 627 WARN_ON(ib_dealloc_pd(ia->ri_pd));
715 }
716} 628}
717 629
718/* 630/*
@@ -724,6 +636,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
724{ 636{
725 struct ib_device_attr *devattr = &ia->ri_devattr; 637 struct ib_device_attr *devattr = &ia->ri_devattr;
726 struct ib_cq *sendcq, *recvcq; 638 struct ib_cq *sendcq, *recvcq;
639 struct ib_cq_init_attr cq_attr = {};
727 int rc, err; 640 int rc, err;
728 641
729 /* check provider's send/recv wr limits */ 642 /* check provider's send/recv wr limits */
@@ -771,9 +684,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
771 init_waitqueue_head(&ep->rep_connect_wait); 684 init_waitqueue_head(&ep->rep_connect_wait);
772 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); 685 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
773 686
774 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall, 687 cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1;
775 rpcrdma_cq_async_error_upcall, ep, 688 sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall,
776 ep->rep_attr.cap.max_send_wr + 1, 0); 689 rpcrdma_cq_async_error_upcall, ep, &cq_attr);
777 if (IS_ERR(sendcq)) { 690 if (IS_ERR(sendcq)) {
778 rc = PTR_ERR(sendcq); 691 rc = PTR_ERR(sendcq);
779 dprintk("RPC: %s: failed to create send CQ: %i\n", 692 dprintk("RPC: %s: failed to create send CQ: %i\n",
@@ -788,9 +701,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
788 goto out2; 701 goto out2;
789 } 702 }
790 703
791 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall, 704 cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1;
792 rpcrdma_cq_async_error_upcall, ep, 705 recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall,
793 ep->rep_attr.cap.max_recv_wr + 1, 0); 706 rpcrdma_cq_async_error_upcall, ep, &cq_attr);
794 if (IS_ERR(recvcq)) { 707 if (IS_ERR(recvcq)) {
795 rc = PTR_ERR(recvcq); 708 rc = PTR_ERR(recvcq);
796 dprintk("RPC: %s: failed to create recv CQ: %i\n", 709 dprintk("RPC: %s: failed to create recv CQ: %i\n",
@@ -896,8 +809,6 @@ retry:
896 rpcrdma_flush_cqs(ep); 809 rpcrdma_flush_cqs(ep);
897 810
898 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); 811 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
899 ia->ri_ops->ro_reset(xprt);
900
901 id = rpcrdma_create_id(xprt, ia, 812 id = rpcrdma_create_id(xprt, ia,
902 (struct sockaddr *)&xprt->rx_data.addr); 813 (struct sockaddr *)&xprt->rx_data.addr);
903 if (IS_ERR(id)) { 814 if (IS_ERR(id)) {
@@ -911,7 +822,7 @@ retry:
911 * More stuff I haven't thought of! 822 * More stuff I haven't thought of!
912 * Rrrgh! 823 * Rrrgh!
913 */ 824 */
914 if (ia->ri_id->device != id->device) { 825 if (ia->ri_device != id->device) {
915 printk("RPC: %s: can't reconnect on " 826 printk("RPC: %s: can't reconnect on "
916 "different device!\n", __func__); 827 "different device!\n", __func__);
917 rdma_destroy_id(id); 828 rdma_destroy_id(id);
@@ -1053,7 +964,8 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
1053 goto out_free; 964 goto out_free;
1054 } 965 }
1055 966
1056 rep->rr_buffer = &r_xprt->rx_buf; 967 rep->rr_device = ia->ri_device;
968 rep->rr_rxprt = r_xprt;
1057 return rep; 969 return rep;
1058 970
1059out_free: 971out_free:
@@ -1177,31 +1089,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1177 kfree(buf->rb_pool); 1089 kfree(buf->rb_pool);
1178} 1090}
1179 1091
1180/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving 1092struct rpcrdma_mw *
1181 * some req segments uninitialized. 1093rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
1182 */
1183static void
1184rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1185{ 1094{
1186 if (*mw) { 1095 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1187 list_add_tail(&(*mw)->mw_list, &buf->rb_mws); 1096 struct rpcrdma_mw *mw = NULL;
1188 *mw = NULL; 1097
1098 spin_lock(&buf->rb_mwlock);
1099 if (!list_empty(&buf->rb_mws)) {
1100 mw = list_first_entry(&buf->rb_mws,
1101 struct rpcrdma_mw, mw_list);
1102 list_del_init(&mw->mw_list);
1189 } 1103 }
1104 spin_unlock(&buf->rb_mwlock);
1105
1106 if (!mw)
1107 pr_err("RPC: %s: no MWs available\n", __func__);
1108 return mw;
1190} 1109}
1191 1110
1192/* Cycle mw's back in reverse order, and "spin" them. 1111void
1193 * This delays and scrambles reuse as much as possible. 1112rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
1194 */
1195static void
1196rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1197{ 1113{
1198 struct rpcrdma_mr_seg *seg = req->rl_segments; 1114 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1199 struct rpcrdma_mr_seg *seg1 = seg;
1200 int i;
1201 1115
1202 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++) 1116 spin_lock(&buf->rb_mwlock);
1203 rpcrdma_buffer_put_mr(&seg->rl_mw, buf); 1117 list_add_tail(&mw->mw_list, &buf->rb_mws);
1204 rpcrdma_buffer_put_mr(&seg1->rl_mw, buf); 1118 spin_unlock(&buf->rb_mwlock);
1205} 1119}
1206 1120
1207static void 1121static void
@@ -1211,115 +1125,10 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1211 req->rl_niovs = 0; 1125 req->rl_niovs = 0;
1212 if (req->rl_reply) { 1126 if (req->rl_reply) {
1213 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply; 1127 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1214 req->rl_reply->rr_func = NULL;
1215 req->rl_reply = NULL; 1128 req->rl_reply = NULL;
1216 } 1129 }
1217} 1130}
1218 1131
1219/* rpcrdma_unmap_one() was already done during deregistration.
1220 * Redo only the ib_post_send().
1221 */
1222static void
1223rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1224{
1225 struct rpcrdma_xprt *r_xprt =
1226 container_of(ia, struct rpcrdma_xprt, rx_ia);
1227 struct ib_send_wr invalidate_wr, *bad_wr;
1228 int rc;
1229
1230 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1231
1232 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
1233 r->r.frmr.fr_state = FRMR_IS_INVALID;
1234
1235 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1236 invalidate_wr.wr_id = (unsigned long)(void *)r;
1237 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1238 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1239 DECR_CQCOUNT(&r_xprt->rx_ep);
1240
1241 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1242 __func__, r, r->r.frmr.fr_mr->rkey);
1243
1244 read_lock(&ia->ri_qplock);
1245 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1246 read_unlock(&ia->ri_qplock);
1247 if (rc) {
1248 /* Force rpcrdma_buffer_get() to retry */
1249 r->r.frmr.fr_state = FRMR_IS_STALE;
1250 dprintk("RPC: %s: ib_post_send failed, %i\n",
1251 __func__, rc);
1252 }
1253}
1254
1255static void
1256rpcrdma_retry_flushed_linv(struct list_head *stale,
1257 struct rpcrdma_buffer *buf)
1258{
1259 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1260 struct list_head *pos;
1261 struct rpcrdma_mw *r;
1262 unsigned long flags;
1263
1264 list_for_each(pos, stale) {
1265 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1266 rpcrdma_retry_local_inv(r, ia);
1267 }
1268
1269 spin_lock_irqsave(&buf->rb_lock, flags);
1270 list_splice_tail(stale, &buf->rb_mws);
1271 spin_unlock_irqrestore(&buf->rb_lock, flags);
1272}
1273
1274static struct rpcrdma_req *
1275rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1276 struct list_head *stale)
1277{
1278 struct rpcrdma_mw *r;
1279 int i;
1280
1281 i = RPCRDMA_MAX_SEGS - 1;
1282 while (!list_empty(&buf->rb_mws)) {
1283 r = list_entry(buf->rb_mws.next,
1284 struct rpcrdma_mw, mw_list);
1285 list_del(&r->mw_list);
1286 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1287 list_add(&r->mw_list, stale);
1288 continue;
1289 }
1290 req->rl_segments[i].rl_mw = r;
1291 if (unlikely(i-- == 0))
1292 return req; /* Success */
1293 }
1294
1295 /* Not enough entries on rb_mws for this req */
1296 rpcrdma_buffer_put_sendbuf(req, buf);
1297 rpcrdma_buffer_put_mrs(req, buf);
1298 return NULL;
1299}
1300
1301static struct rpcrdma_req *
1302rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1303{
1304 struct rpcrdma_mw *r;
1305 int i;
1306
1307 i = RPCRDMA_MAX_SEGS - 1;
1308 while (!list_empty(&buf->rb_mws)) {
1309 r = list_entry(buf->rb_mws.next,
1310 struct rpcrdma_mw, mw_list);
1311 list_del(&r->mw_list);
1312 req->rl_segments[i].rl_mw = r;
1313 if (unlikely(i-- == 0))
1314 return req; /* Success */
1315 }
1316
1317 /* Not enough entries on rb_mws for this req */
1318 rpcrdma_buffer_put_sendbuf(req, buf);
1319 rpcrdma_buffer_put_mrs(req, buf);
1320 return NULL;
1321}
1322
1323/* 1132/*
1324 * Get a set of request/reply buffers. 1133 * Get a set of request/reply buffers.
1325 * 1134 *
@@ -1332,12 +1141,11 @@ rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1332struct rpcrdma_req * 1141struct rpcrdma_req *
1333rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) 1142rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1334{ 1143{
1335 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
1336 struct list_head stale;
1337 struct rpcrdma_req *req; 1144 struct rpcrdma_req *req;
1338 unsigned long flags; 1145 unsigned long flags;
1339 1146
1340 spin_lock_irqsave(&buffers->rb_lock, flags); 1147 spin_lock_irqsave(&buffers->rb_lock, flags);
1148
1341 if (buffers->rb_send_index == buffers->rb_max_requests) { 1149 if (buffers->rb_send_index == buffers->rb_max_requests) {
1342 spin_unlock_irqrestore(&buffers->rb_lock, flags); 1150 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1343 dprintk("RPC: %s: out of request buffers\n", __func__); 1151 dprintk("RPC: %s: out of request buffers\n", __func__);
@@ -1356,20 +1164,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1356 } 1164 }
1357 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; 1165 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
1358 1166
1359 INIT_LIST_HEAD(&stale);
1360 switch (ia->ri_memreg_strategy) {
1361 case RPCRDMA_FRMR:
1362 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1363 break;
1364 case RPCRDMA_MTHCAFMR:
1365 req = rpcrdma_buffer_get_fmrs(req, buffers);
1366 break;
1367 default:
1368 break;
1369 }
1370 spin_unlock_irqrestore(&buffers->rb_lock, flags); 1167 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1371 if (!list_empty(&stale))
1372 rpcrdma_retry_flushed_linv(&stale, buffers);
1373 return req; 1168 return req;
1374} 1169}
1375 1170
@@ -1381,19 +1176,10 @@ void
1381rpcrdma_buffer_put(struct rpcrdma_req *req) 1176rpcrdma_buffer_put(struct rpcrdma_req *req)
1382{ 1177{
1383 struct rpcrdma_buffer *buffers = req->rl_buffer; 1178 struct rpcrdma_buffer *buffers = req->rl_buffer;
1384 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
1385 unsigned long flags; 1179 unsigned long flags;
1386 1180
1387 spin_lock_irqsave(&buffers->rb_lock, flags); 1181 spin_lock_irqsave(&buffers->rb_lock, flags);
1388 rpcrdma_buffer_put_sendbuf(req, buffers); 1182 rpcrdma_buffer_put_sendbuf(req, buffers);
1389 switch (ia->ri_memreg_strategy) {
1390 case RPCRDMA_FRMR:
1391 case RPCRDMA_MTHCAFMR:
1392 rpcrdma_buffer_put_mrs(req, buffers);
1393 break;
1394 default:
1395 break;
1396 }
1397 spin_unlock_irqrestore(&buffers->rb_lock, flags); 1183 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1398} 1184}
1399 1185
@@ -1423,10 +1209,9 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1423void 1209void
1424rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) 1210rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1425{ 1211{
1426 struct rpcrdma_buffer *buffers = rep->rr_buffer; 1212 struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
1427 unsigned long flags; 1213 unsigned long flags;
1428 1214
1429 rep->rr_func = NULL;
1430 spin_lock_irqsave(&buffers->rb_lock, flags); 1215 spin_lock_irqsave(&buffers->rb_lock, flags);
1431 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep; 1216 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1432 spin_unlock_irqrestore(&buffers->rb_lock, flags); 1217 spin_unlock_irqrestore(&buffers->rb_lock, flags);
@@ -1455,9 +1240,9 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1455 /* 1240 /*
1456 * All memory passed here was kmalloc'ed, therefore phys-contiguous. 1241 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1457 */ 1242 */
1458 iov->addr = ib_dma_map_single(ia->ri_id->device, 1243 iov->addr = ib_dma_map_single(ia->ri_device,
1459 va, len, DMA_BIDIRECTIONAL); 1244 va, len, DMA_BIDIRECTIONAL);
1460 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr)) 1245 if (ib_dma_mapping_error(ia->ri_device, iov->addr))
1461 return -ENOMEM; 1246 return -ENOMEM;
1462 1247
1463 iov->length = len; 1248 iov->length = len;
@@ -1501,8 +1286,8 @@ rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1501{ 1286{
1502 int rc; 1287 int rc;
1503 1288
1504 ib_dma_unmap_single(ia->ri_id->device, 1289 ib_dma_unmap_single(ia->ri_device,
1505 iov->addr, iov->length, DMA_BIDIRECTIONAL); 1290 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1506 1291
1507 if (NULL == mr) 1292 if (NULL == mr)
1508 return 0; 1293 return 0;
@@ -1595,15 +1380,18 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
1595 send_wr.num_sge = req->rl_niovs; 1380 send_wr.num_sge = req->rl_niovs;
1596 send_wr.opcode = IB_WR_SEND; 1381 send_wr.opcode = IB_WR_SEND;
1597 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */ 1382 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
1598 ib_dma_sync_single_for_device(ia->ri_id->device, 1383 ib_dma_sync_single_for_device(ia->ri_device,
1599 req->rl_send_iov[3].addr, req->rl_send_iov[3].length, 1384 req->rl_send_iov[3].addr,
1600 DMA_TO_DEVICE); 1385 req->rl_send_iov[3].length,
1601 ib_dma_sync_single_for_device(ia->ri_id->device, 1386 DMA_TO_DEVICE);
1602 req->rl_send_iov[1].addr, req->rl_send_iov[1].length, 1387 ib_dma_sync_single_for_device(ia->ri_device,
1603 DMA_TO_DEVICE); 1388 req->rl_send_iov[1].addr,
1604 ib_dma_sync_single_for_device(ia->ri_id->device, 1389 req->rl_send_iov[1].length,
1605 req->rl_send_iov[0].addr, req->rl_send_iov[0].length, 1390 DMA_TO_DEVICE);
1606 DMA_TO_DEVICE); 1391 ib_dma_sync_single_for_device(ia->ri_device,
1392 req->rl_send_iov[0].addr,
1393 req->rl_send_iov[0].length,
1394 DMA_TO_DEVICE);
1607 1395
1608 if (DECR_CQCOUNT(ep) > 0) 1396 if (DECR_CQCOUNT(ep) > 0)
1609 send_wr.send_flags = 0; 1397 send_wr.send_flags = 0;
@@ -1636,7 +1424,7 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1636 recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; 1424 recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
1637 recv_wr.num_sge = 1; 1425 recv_wr.num_sge = 1;
1638 1426
1639 ib_dma_sync_single_for_cpu(ia->ri_id->device, 1427 ib_dma_sync_single_for_cpu(ia->ri_device,
1640 rdmab_addr(rep->rr_rdmabuf), 1428 rdmab_addr(rep->rr_rdmabuf),
1641 rdmab_length(rep->rr_rdmabuf), 1429 rdmab_length(rep->rr_rdmabuf),
1642 DMA_BIDIRECTIONAL); 1430 DMA_BIDIRECTIONAL);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 78e0b8beaa36..f49dd8b38122 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -62,6 +62,7 @@
62struct rpcrdma_ia { 62struct rpcrdma_ia {
63 const struct rpcrdma_memreg_ops *ri_ops; 63 const struct rpcrdma_memreg_ops *ri_ops;
64 rwlock_t ri_qplock; 64 rwlock_t ri_qplock;
65 struct ib_device *ri_device;
65 struct rdma_cm_id *ri_id; 66 struct rdma_cm_id *ri_id;
66 struct ib_pd *ri_pd; 67 struct ib_pd *ri_pd;
67 struct ib_mr *ri_bind_mem; 68 struct ib_mr *ri_bind_mem;
@@ -69,7 +70,6 @@ struct rpcrdma_ia {
69 int ri_have_dma_lkey; 70 int ri_have_dma_lkey;
70 struct completion ri_done; 71 struct completion ri_done;
71 int ri_async_rc; 72 int ri_async_rc;
72 enum rpcrdma_memreg ri_memreg_strategy;
73 unsigned int ri_max_frmr_depth; 73 unsigned int ri_max_frmr_depth;
74 struct ib_device_attr ri_devattr; 74 struct ib_device_attr ri_devattr;
75 struct ib_qp_attr ri_qp_attr; 75 struct ib_qp_attr ri_qp_attr;
@@ -173,9 +173,8 @@ struct rpcrdma_buffer;
173 173
174struct rpcrdma_rep { 174struct rpcrdma_rep {
175 unsigned int rr_len; 175 unsigned int rr_len;
176 struct rpcrdma_buffer *rr_buffer; 176 struct ib_device *rr_device;
177 struct rpc_xprt *rr_xprt; 177 struct rpcrdma_xprt *rr_rxprt;
178 void (*rr_func)(struct rpcrdma_rep *);
179 struct list_head rr_list; 178 struct list_head rr_list;
180 struct rpcrdma_regbuf *rr_rdmabuf; 179 struct rpcrdma_regbuf *rr_rdmabuf;
181}; 180};
@@ -203,11 +202,18 @@ struct rpcrdma_frmr {
203 struct ib_fast_reg_page_list *fr_pgl; 202 struct ib_fast_reg_page_list *fr_pgl;
204 struct ib_mr *fr_mr; 203 struct ib_mr *fr_mr;
205 enum rpcrdma_frmr_state fr_state; 204 enum rpcrdma_frmr_state fr_state;
205 struct work_struct fr_work;
206 struct rpcrdma_xprt *fr_xprt;
207};
208
209struct rpcrdma_fmr {
210 struct ib_fmr *fmr;
211 u64 *physaddrs;
206}; 212};
207 213
208struct rpcrdma_mw { 214struct rpcrdma_mw {
209 union { 215 union {
210 struct ib_fmr *fmr; 216 struct rpcrdma_fmr fmr;
211 struct rpcrdma_frmr frmr; 217 struct rpcrdma_frmr frmr;
212 } r; 218 } r;
213 void (*mw_sendcompletion)(struct ib_wc *); 219 void (*mw_sendcompletion)(struct ib_wc *);
@@ -281,15 +287,17 @@ rpcr_to_rdmar(struct rpc_rqst *rqst)
281 * One of these is associated with a transport instance 287 * One of these is associated with a transport instance
282 */ 288 */
283struct rpcrdma_buffer { 289struct rpcrdma_buffer {
284 spinlock_t rb_lock; /* protects indexes */ 290 spinlock_t rb_mwlock; /* protect rb_mws list */
285 u32 rb_max_requests;/* client max requests */ 291 struct list_head rb_mws;
286 struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ 292 struct list_head rb_all;
287 struct list_head rb_all; 293 char *rb_pool;
288 int rb_send_index; 294
295 spinlock_t rb_lock; /* protect buf arrays */
296 u32 rb_max_requests;
297 int rb_send_index;
298 int rb_recv_index;
289 struct rpcrdma_req **rb_send_bufs; 299 struct rpcrdma_req **rb_send_bufs;
290 int rb_recv_index;
291 struct rpcrdma_rep **rb_recv_bufs; 300 struct rpcrdma_rep **rb_recv_bufs;
292 char *rb_pool;
293}; 301};
294#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) 302#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
295 303
@@ -350,7 +358,6 @@ struct rpcrdma_memreg_ops {
350 struct rpcrdma_create_data_internal *); 358 struct rpcrdma_create_data_internal *);
351 size_t (*ro_maxpages)(struct rpcrdma_xprt *); 359 size_t (*ro_maxpages)(struct rpcrdma_xprt *);
352 int (*ro_init)(struct rpcrdma_xprt *); 360 int (*ro_init)(struct rpcrdma_xprt *);
353 void (*ro_reset)(struct rpcrdma_xprt *);
354 void (*ro_destroy)(struct rpcrdma_buffer *); 361 void (*ro_destroy)(struct rpcrdma_buffer *);
355 const char *ro_displayname; 362 const char *ro_displayname;
356}; 363};
@@ -413,6 +420,8 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
413int rpcrdma_buffer_create(struct rpcrdma_xprt *); 420int rpcrdma_buffer_create(struct rpcrdma_xprt *);
414void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); 421void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
415 422
423struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
424void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
416struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); 425struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
417void rpcrdma_buffer_put(struct rpcrdma_req *); 426void rpcrdma_buffer_put(struct rpcrdma_req *);
418void rpcrdma_recv_buffer_get(struct rpcrdma_req *); 427void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
@@ -425,6 +434,9 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *,
425 434
426unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *); 435unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
427 436
437int frwr_alloc_recovery_wq(void);
438void frwr_destroy_recovery_wq(void);
439
428/* 440/*
429 * Wrappers for chunk registration, shared by read/write chunk code. 441 * Wrappers for chunk registration, shared by read/write chunk code.
430 */ 442 */
@@ -480,6 +492,11 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
480 */ 492 */
481int rpcrdma_marshal_req(struct rpc_rqst *); 493int rpcrdma_marshal_req(struct rpc_rqst *);
482 494
495/* RPC/RDMA module init - xprtrdma/transport.c
496 */
497int xprt_rdma_init(void);
498void xprt_rdma_cleanup(void);
499
483/* Temporary NFS request map cache. Created in svc_rdma.c */ 500/* Temporary NFS request map cache. Created in svc_rdma.c */
484extern struct kmem_cache *svc_rdma_map_cachep; 501extern struct kmem_cache *svc_rdma_map_cachep;
485/* WR context cache. Created in svc_rdma.c */ 502/* WR context cache. Created in svc_rdma.c */
@@ -487,10 +504,4 @@ extern struct kmem_cache *svc_rdma_ctxt_cachep;
487/* Workqueue created in svc_rdma.c */ 504/* Workqueue created in svc_rdma.c */
488extern struct workqueue_struct *svc_rdma_wq; 505extern struct workqueue_struct *svc_rdma_wq;
489 506
490#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
491#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
492#else
493#define RPCSVC_MAXPAYLOAD_RDMA (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
494#endif
495
496#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ 507#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 66891e32c5e3..e193c2b5476b 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -623,24 +623,6 @@ process_status:
623} 623}
624 624
625/** 625/**
626 * xs_tcp_shutdown - gracefully shut down a TCP socket
627 * @xprt: transport
628 *
629 * Initiates a graceful shutdown of the TCP socket by calling the
630 * equivalent of shutdown(SHUT_RDWR);
631 */
632static void xs_tcp_shutdown(struct rpc_xprt *xprt)
633{
634 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
635 struct socket *sock = transport->sock;
636
637 if (sock != NULL) {
638 kernel_sock_shutdown(sock, SHUT_RDWR);
639 trace_rpc_socket_shutdown(xprt, sock);
640 }
641}
642
643/**
644 * xs_tcp_send_request - write an RPC request to a TCP socket 626 * xs_tcp_send_request - write an RPC request to a TCP socket
645 * @task: address of RPC task that manages the state of an RPC request 627 * @task: address of RPC task that manages the state of an RPC request
646 * 628 *
@@ -786,6 +768,7 @@ static void xs_sock_mark_closed(struct rpc_xprt *xprt)
786 xs_sock_reset_connection_flags(xprt); 768 xs_sock_reset_connection_flags(xprt);
787 /* Mark transport as closed and wake up all pending tasks */ 769 /* Mark transport as closed and wake up all pending tasks */
788 xprt_disconnect_done(xprt); 770 xprt_disconnect_done(xprt);
771 xprt_force_disconnect(xprt);
789} 772}
790 773
791/** 774/**
@@ -827,6 +810,9 @@ static void xs_reset_transport(struct sock_xprt *transport)
827 if (sk == NULL) 810 if (sk == NULL)
828 return; 811 return;
829 812
813 if (atomic_read(&transport->xprt.swapper))
814 sk_clear_memalloc(sk);
815
830 write_lock_bh(&sk->sk_callback_lock); 816 write_lock_bh(&sk->sk_callback_lock);
831 transport->inet = NULL; 817 transport->inet = NULL;
832 transport->sock = NULL; 818 transport->sock = NULL;
@@ -863,6 +849,13 @@ static void xs_close(struct rpc_xprt *xprt)
863 xprt_disconnect_done(xprt); 849 xprt_disconnect_done(xprt);
864} 850}
865 851
852static void xs_inject_disconnect(struct rpc_xprt *xprt)
853{
854 dprintk("RPC: injecting transport disconnect on xprt=%p\n",
855 xprt);
856 xprt_disconnect_done(xprt);
857}
858
866static void xs_xprt_free(struct rpc_xprt *xprt) 859static void xs_xprt_free(struct rpc_xprt *xprt)
867{ 860{
868 xs_free_peer_addresses(xprt); 861 xs_free_peer_addresses(xprt);
@@ -901,7 +894,6 @@ static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
901/** 894/**
902 * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets 895 * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets
903 * @sk: socket with data to read 896 * @sk: socket with data to read
904 * @len: how much data to read
905 * 897 *
906 * Currently this assumes we can read the whole reply in a single gulp. 898 * Currently this assumes we can read the whole reply in a single gulp.
907 */ 899 */
@@ -965,7 +957,6 @@ static void xs_local_data_ready(struct sock *sk)
965/** 957/**
966 * xs_udp_data_ready - "data ready" callback for UDP sockets 958 * xs_udp_data_ready - "data ready" callback for UDP sockets
967 * @sk: socket with data to read 959 * @sk: socket with data to read
968 * @len: how much data to read
969 * 960 *
970 */ 961 */
971static void xs_udp_data_ready(struct sock *sk) 962static void xs_udp_data_ready(struct sock *sk)
@@ -1389,7 +1380,6 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
1389/** 1380/**
1390 * xs_tcp_data_ready - "data ready" callback for TCP sockets 1381 * xs_tcp_data_ready - "data ready" callback for TCP sockets
1391 * @sk: socket with data to read 1382 * @sk: socket with data to read
1392 * @bytes: how much data to read
1393 * 1383 *
1394 */ 1384 */
1395static void xs_tcp_data_ready(struct sock *sk) 1385static void xs_tcp_data_ready(struct sock *sk)
@@ -1886,9 +1876,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
1886 1876
1887/** 1877/**
1888 * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint 1878 * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint
1889 * @xprt: RPC transport to connect
1890 * @transport: socket transport to connect 1879 * @transport: socket transport to connect
1891 * @create_sock: function to create a socket of the correct type
1892 */ 1880 */
1893static int xs_local_setup_socket(struct sock_xprt *transport) 1881static int xs_local_setup_socket(struct sock_xprt *transport)
1894{ 1882{
@@ -1960,43 +1948,84 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
1960 msleep_interruptible(15000); 1948 msleep_interruptible(15000);
1961} 1949}
1962 1950
1963#ifdef CONFIG_SUNRPC_SWAP 1951#if IS_ENABLED(CONFIG_SUNRPC_SWAP)
1952/*
1953 * Note that this should be called with XPRT_LOCKED held (or when we otherwise
1954 * know that we have exclusive access to the socket), to guard against
1955 * races with xs_reset_transport.
1956 */
1964static void xs_set_memalloc(struct rpc_xprt *xprt) 1957static void xs_set_memalloc(struct rpc_xprt *xprt)
1965{ 1958{
1966 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, 1959 struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
1967 xprt); 1960 xprt);
1968 1961
1969 if (xprt->swapper) 1962 /*
1963 * If there's no sock, then we have nothing to set. The
1964 * reconnecting process will get it for us.
1965 */
1966 if (!transport->inet)
1967 return;
1968 if (atomic_read(&xprt->swapper))
1970 sk_set_memalloc(transport->inet); 1969 sk_set_memalloc(transport->inet);
1971} 1970}
1972 1971
1973/** 1972/**
1974 * xs_swapper - Tag this transport as being used for swap. 1973 * xs_enable_swap - Tag this transport as being used for swap.
1975 * @xprt: transport to tag 1974 * @xprt: transport to tag
1976 * @enable: enable/disable
1977 * 1975 *
1976 * Take a reference to this transport on behalf of the rpc_clnt, and
1977 * optionally mark it for swapping if it wasn't already.
1978 */ 1978 */
1979int xs_swapper(struct rpc_xprt *xprt, int enable) 1979static int
1980xs_enable_swap(struct rpc_xprt *xprt)
1980{ 1981{
1981 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, 1982 struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt);
1982 xprt);
1983 int err = 0;
1984 1983
1985 if (enable) { 1984 if (atomic_inc_return(&xprt->swapper) != 1)
1986 xprt->swapper++; 1985 return 0;
1987 xs_set_memalloc(xprt); 1986 if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE))
1988 } else if (xprt->swapper) { 1987 return -ERESTARTSYS;
1989 xprt->swapper--; 1988 if (xs->inet)
1990 sk_clear_memalloc(transport->inet); 1989 sk_set_memalloc(xs->inet);
1991 } 1990 xprt_release_xprt(xprt, NULL);
1991 return 0;
1992}
1992 1993
1993 return err; 1994/**
1995 * xs_disable_swap - Untag this transport as being used for swap.
1996 * @xprt: transport to tag
1997 *
1998 * Drop a "swapper" reference to this xprt on behalf of the rpc_clnt. If the
1999 * swapper refcount goes to 0, untag the socket as a memalloc socket.
2000 */
2001static void
2002xs_disable_swap(struct rpc_xprt *xprt)
2003{
2004 struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt);
2005
2006 if (!atomic_dec_and_test(&xprt->swapper))
2007 return;
2008 if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE))
2009 return;
2010 if (xs->inet)
2011 sk_clear_memalloc(xs->inet);
2012 xprt_release_xprt(xprt, NULL);
1994} 2013}
1995EXPORT_SYMBOL_GPL(xs_swapper);
1996#else 2014#else
1997static void xs_set_memalloc(struct rpc_xprt *xprt) 2015static void xs_set_memalloc(struct rpc_xprt *xprt)
1998{ 2016{
1999} 2017}
2018
2019static int
2020xs_enable_swap(struct rpc_xprt *xprt)
2021{
2022 return -EINVAL;
2023}
2024
2025static void
2026xs_disable_swap(struct rpc_xprt *xprt)
2027{
2028}
2000#endif 2029#endif
2001 2030
2002static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 2031static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
@@ -2057,6 +2086,27 @@ out:
2057 xprt_wake_pending_tasks(xprt, status); 2086 xprt_wake_pending_tasks(xprt, status);
2058} 2087}
2059 2088
2089/**
2090 * xs_tcp_shutdown - gracefully shut down a TCP socket
2091 * @xprt: transport
2092 *
2093 * Initiates a graceful shutdown of the TCP socket by calling the
2094 * equivalent of shutdown(SHUT_RDWR);
2095 */
2096static void xs_tcp_shutdown(struct rpc_xprt *xprt)
2097{
2098 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
2099 struct socket *sock = transport->sock;
2100
2101 if (sock == NULL)
2102 return;
2103 if (xprt_connected(xprt)) {
2104 kernel_sock_shutdown(sock, SHUT_RDWR);
2105 trace_rpc_socket_shutdown(xprt, sock);
2106 } else
2107 xs_reset_transport(transport);
2108}
2109
2060static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 2110static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
2061{ 2111{
2062 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2112 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2067,6 +2117,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
2067 unsigned int keepidle = xprt->timeout->to_initval / HZ; 2117 unsigned int keepidle = xprt->timeout->to_initval / HZ;
2068 unsigned int keepcnt = xprt->timeout->to_retries + 1; 2118 unsigned int keepcnt = xprt->timeout->to_retries + 1;
2069 unsigned int opt_on = 1; 2119 unsigned int opt_on = 1;
2120 unsigned int timeo;
2070 2121
2071 /* TCP Keepalive options */ 2122 /* TCP Keepalive options */
2072 kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, 2123 kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
@@ -2078,6 +2129,12 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
2078 kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, 2129 kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
2079 (char *)&keepcnt, sizeof(keepcnt)); 2130 (char *)&keepcnt, sizeof(keepcnt));
2080 2131
2132 /* TCP user timeout (see RFC5482) */
2133 timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
2134 (xprt->timeout->to_retries + 1);
2135 kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
2136 (char *)&timeo, sizeof(timeo));
2137
2081 write_lock_bh(&sk->sk_callback_lock); 2138 write_lock_bh(&sk->sk_callback_lock);
2082 2139
2083 xs_save_old_callbacks(transport, sk); 2140 xs_save_old_callbacks(transport, sk);
@@ -2125,9 +2182,6 @@ out:
2125 2182
2126/** 2183/**
2127 * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint 2184 * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
2128 * @xprt: RPC transport to connect
2129 * @transport: socket transport to connect
2130 * @create_sock: function to create a socket of the correct type
2131 * 2185 *
2132 * Invoked by a work queue tasklet. 2186 * Invoked by a work queue tasklet.
2133 */ 2187 */
@@ -2463,6 +2517,8 @@ static struct rpc_xprt_ops xs_local_ops = {
2463 .close = xs_close, 2517 .close = xs_close,
2464 .destroy = xs_destroy, 2518 .destroy = xs_destroy,
2465 .print_stats = xs_local_print_stats, 2519 .print_stats = xs_local_print_stats,
2520 .enable_swap = xs_enable_swap,
2521 .disable_swap = xs_disable_swap,
2466}; 2522};
2467 2523
2468static struct rpc_xprt_ops xs_udp_ops = { 2524static struct rpc_xprt_ops xs_udp_ops = {
@@ -2482,6 +2538,9 @@ static struct rpc_xprt_ops xs_udp_ops = {
2482 .close = xs_close, 2538 .close = xs_close,
2483 .destroy = xs_destroy, 2539 .destroy = xs_destroy,
2484 .print_stats = xs_udp_print_stats, 2540 .print_stats = xs_udp_print_stats,
2541 .enable_swap = xs_enable_swap,
2542 .disable_swap = xs_disable_swap,
2543 .inject_disconnect = xs_inject_disconnect,
2485}; 2544};
2486 2545
2487static struct rpc_xprt_ops xs_tcp_ops = { 2546static struct rpc_xprt_ops xs_tcp_ops = {
@@ -2498,6 +2557,9 @@ static struct rpc_xprt_ops xs_tcp_ops = {
2498 .close = xs_tcp_shutdown, 2557 .close = xs_tcp_shutdown,
2499 .destroy = xs_destroy, 2558 .destroy = xs_destroy,
2500 .print_stats = xs_tcp_print_stats, 2559 .print_stats = xs_tcp_print_stats,
2560 .enable_swap = xs_enable_swap,
2561 .disable_swap = xs_disable_swap,
2562 .inject_disconnect = xs_inject_disconnect,
2501}; 2563};
2502 2564
2503/* 2565/*
@@ -2515,6 +2577,9 @@ static struct rpc_xprt_ops bc_tcp_ops = {
2515 .close = bc_close, 2577 .close = bc_close,
2516 .destroy = bc_destroy, 2578 .destroy = bc_destroy,
2517 .print_stats = xs_tcp_print_stats, 2579 .print_stats = xs_tcp_print_stats,
2580 .enable_swap = xs_enable_swap,
2581 .disable_swap = xs_disable_swap,
2582 .inject_disconnect = xs_inject_disconnect,
2518}; 2583};
2519 2584
2520static int xs_init_anyaddr(const int family, struct sockaddr *sap) 2585static int xs_init_anyaddr(const int family, struct sockaddr *sap)
@@ -2982,7 +3047,7 @@ static int param_set_portnr(const char *val, const struct kernel_param *kp)
2982 RPC_MAX_RESVPORT); 3047 RPC_MAX_RESVPORT);
2983} 3048}
2984 3049
2985static struct kernel_param_ops param_ops_portnr = { 3050static const struct kernel_param_ops param_ops_portnr = {
2986 .set = param_set_portnr, 3051 .set = param_set_portnr,
2987 .get = param_get_uint, 3052 .get = param_get_uint,
2988}; 3053};
@@ -3001,7 +3066,7 @@ static int param_set_slot_table_size(const char *val,
3001 RPC_MAX_SLOT_TABLE); 3066 RPC_MAX_SLOT_TABLE);
3002} 3067}
3003 3068
3004static struct kernel_param_ops param_ops_slot_table_size = { 3069static const struct kernel_param_ops param_ops_slot_table_size = {
3005 .set = param_set_slot_table_size, 3070 .set = param_set_slot_table_size,
3006 .get = param_get_uint, 3071 .get = param_get_uint,
3007}; 3072};
@@ -3017,7 +3082,7 @@ static int param_set_max_slot_table_size(const char *val,
3017 RPC_MAX_SLOT_TABLE_LIMIT); 3082 RPC_MAX_SLOT_TABLE_LIMIT);
3018} 3083}
3019 3084
3020static struct kernel_param_ops param_ops_max_slot_table_size = { 3085static const struct kernel_param_ops param_ops_max_slot_table_size = {
3021 .set = param_set_max_slot_table_size, 3086 .set = param_set_max_slot_table_size,
3022 .get = param_get_uint, 3087 .get = param_get_uint,
3023}; 3088};
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 055453d48668..9f2add3cba26 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -15,97 +15,366 @@
15#include <linux/mutex.h> 15#include <linux/mutex.h>
16#include <linux/notifier.h> 16#include <linux/notifier.h>
17#include <linux/netdevice.h> 17#include <linux/netdevice.h>
18#include <linux/if_bridge.h>
18#include <net/ip_fib.h> 19#include <net/ip_fib.h>
19#include <net/switchdev.h> 20#include <net/switchdev.h>
20 21
21/** 22/**
22 * netdev_switch_parent_id_get - Get ID of a switch 23 * switchdev_port_attr_get - Get port attribute
24 *
23 * @dev: port device 25 * @dev: port device
24 * @psid: switch ID 26 * @attr: attribute to get
27 */
28int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
29{
30 const struct switchdev_ops *ops = dev->switchdev_ops;
31 struct net_device *lower_dev;
32 struct list_head *iter;
33 struct switchdev_attr first = {
34 .id = SWITCHDEV_ATTR_UNDEFINED
35 };
36 int err = -EOPNOTSUPP;
37
38 if (ops && ops->switchdev_port_attr_get)
39 return ops->switchdev_port_attr_get(dev, attr);
40
41 if (attr->flags & SWITCHDEV_F_NO_RECURSE)
42 return err;
43
44 /* Switch device port(s) may be stacked under
45 * bond/team/vlan dev, so recurse down to get attr on
46 * each port. Return -ENODATA if attr values don't
47 * compare across ports.
48 */
49
50 netdev_for_each_lower_dev(dev, lower_dev, iter) {
51 err = switchdev_port_attr_get(lower_dev, attr);
52 if (err)
53 break;
54 if (first.id == SWITCHDEV_ATTR_UNDEFINED)
55 first = *attr;
56 else if (memcmp(&first, attr, sizeof(*attr)))
57 return -ENODATA;
58 }
59
60 return err;
61}
62EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
63
64static int __switchdev_port_attr_set(struct net_device *dev,
65 struct switchdev_attr *attr)
66{
67 const struct switchdev_ops *ops = dev->switchdev_ops;
68 struct net_device *lower_dev;
69 struct list_head *iter;
70 int err = -EOPNOTSUPP;
71
72 if (ops && ops->switchdev_port_attr_set)
73 return ops->switchdev_port_attr_set(dev, attr);
74
75 if (attr->flags & SWITCHDEV_F_NO_RECURSE)
76 return err;
77
78 /* Switch device port(s) may be stacked under
79 * bond/team/vlan dev, so recurse down to set attr on
80 * each port.
81 */
82
83 netdev_for_each_lower_dev(dev, lower_dev, iter) {
84 err = __switchdev_port_attr_set(lower_dev, attr);
85 if (err)
86 break;
87 }
88
89 return err;
90}
91
92struct switchdev_attr_set_work {
93 struct work_struct work;
94 struct net_device *dev;
95 struct switchdev_attr attr;
96};
97
98static void switchdev_port_attr_set_work(struct work_struct *work)
99{
100 struct switchdev_attr_set_work *asw =
101 container_of(work, struct switchdev_attr_set_work, work);
102 int err;
103
104 rtnl_lock();
105 err = switchdev_port_attr_set(asw->dev, &asw->attr);
106 if (err && err != -EOPNOTSUPP)
107 netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n",
108 err, asw->attr.id);
109 rtnl_unlock();
110
111 dev_put(asw->dev);
112 kfree(work);
113}
114
115static int switchdev_port_attr_set_defer(struct net_device *dev,
116 struct switchdev_attr *attr)
117{
118 struct switchdev_attr_set_work *asw;
119
120 asw = kmalloc(sizeof(*asw), GFP_ATOMIC);
121 if (!asw)
122 return -ENOMEM;
123
124 INIT_WORK(&asw->work, switchdev_port_attr_set_work);
125
126 dev_hold(dev);
127 asw->dev = dev;
128 memcpy(&asw->attr, attr, sizeof(asw->attr));
129
130 schedule_work(&asw->work);
131
132 return 0;
133}
134
135/**
136 * switchdev_port_attr_set - Set port attribute
137 *
138 * @dev: port device
139 * @attr: attribute to set
25 * 140 *
26 * Get ID of a switch this port is part of. 141 * Use a 2-phase prepare-commit transaction model to ensure
142 * system is not left in a partially updated state due to
143 * failure from driver/device.
27 */ 144 */
28int netdev_switch_parent_id_get(struct net_device *dev, 145int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
29 struct netdev_phys_item_id *psid)
30{ 146{
31 const struct swdev_ops *ops = dev->swdev_ops; 147 int err;
148
149 if (!rtnl_is_locked()) {
150 /* Running prepare-commit transaction across stacked
151 * devices requires nothing moves, so if rtnl_lock is
152 * not held, schedule a worker thread to hold rtnl_lock
153 * while setting attr.
154 */
155
156 return switchdev_port_attr_set_defer(dev, attr);
157 }
158
159 /* Phase I: prepare for attr set. Driver/device should fail
160 * here if there are going to be issues in the commit phase,
161 * such as lack of resources or support. The driver/device
162 * should reserve resources needed for the commit phase here,
163 * but should not commit the attr.
164 */
32 165
33 if (!ops || !ops->swdev_parent_id_get) 166 attr->trans = SWITCHDEV_TRANS_PREPARE;
34 return -EOPNOTSUPP; 167 err = __switchdev_port_attr_set(dev, attr);
35 return ops->swdev_parent_id_get(dev, psid); 168 if (err) {
169 /* Prepare phase failed: abort the transaction. Any
170 * resources reserved in the prepare phase are
171 * released.
172 */
173
174 if (err != -EOPNOTSUPP) {
175 attr->trans = SWITCHDEV_TRANS_ABORT;
176 __switchdev_port_attr_set(dev, attr);
177 }
178
179 return err;
180 }
181
182 /* Phase II: commit attr set. This cannot fail as a fault
183 * of driver/device. If it does, it's a bug in the driver/device
184 * because the driver said everythings was OK in phase I.
185 */
186
187 attr->trans = SWITCHDEV_TRANS_COMMIT;
188 err = __switchdev_port_attr_set(dev, attr);
189 WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
190 dev->name, attr->id);
191
192 return err;
193}
194EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
195
196static int __switchdev_port_obj_add(struct net_device *dev,
197 struct switchdev_obj *obj)
198{
199 const struct switchdev_ops *ops = dev->switchdev_ops;
200 struct net_device *lower_dev;
201 struct list_head *iter;
202 int err = -EOPNOTSUPP;
203
204 if (ops && ops->switchdev_port_obj_add)
205 return ops->switchdev_port_obj_add(dev, obj);
206
207 /* Switch device port(s) may be stacked under
208 * bond/team/vlan dev, so recurse down to add object on
209 * each port.
210 */
211
212 netdev_for_each_lower_dev(dev, lower_dev, iter) {
213 err = __switchdev_port_obj_add(lower_dev, obj);
214 if (err)
215 break;
216 }
217
218 return err;
36} 219}
37EXPORT_SYMBOL_GPL(netdev_switch_parent_id_get);
38 220
39/** 221/**
40 * netdev_switch_port_stp_update - Notify switch device port of STP 222 * switchdev_port_obj_add - Add port object
41 * state change 223 *
42 * @dev: port device 224 * @dev: port device
43 * @state: port STP state 225 * @obj: object to add
226 *
227 * Use a 2-phase prepare-commit transaction model to ensure
228 * system is not left in a partially updated state due to
229 * failure from driver/device.
230 *
231 * rtnl_lock must be held.
232 */
233int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
234{
235 int err;
236
237 ASSERT_RTNL();
238
239 /* Phase I: prepare for obj add. Driver/device should fail
240 * here if there are going to be issues in the commit phase,
241 * such as lack of resources or support. The driver/device
242 * should reserve resources needed for the commit phase here,
243 * but should not commit the obj.
244 */
245
246 obj->trans = SWITCHDEV_TRANS_PREPARE;
247 err = __switchdev_port_obj_add(dev, obj);
248 if (err) {
249 /* Prepare phase failed: abort the transaction. Any
250 * resources reserved in the prepare phase are
251 * released.
252 */
253
254 if (err != -EOPNOTSUPP) {
255 obj->trans = SWITCHDEV_TRANS_ABORT;
256 __switchdev_port_obj_add(dev, obj);
257 }
258
259 return err;
260 }
261
262 /* Phase II: commit obj add. This cannot fail as a fault
263 * of driver/device. If it does, it's a bug in the driver/device
264 * because the driver said everythings was OK in phase I.
265 */
266
267 obj->trans = SWITCHDEV_TRANS_COMMIT;
268 err = __switchdev_port_obj_add(dev, obj);
269 WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
270
271 return err;
272}
273EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
274
275/**
276 * switchdev_port_obj_del - Delete port object
44 * 277 *
45 * Notify switch device port of bridge port STP state change. 278 * @dev: port device
279 * @obj: object to delete
46 */ 280 */
47int netdev_switch_port_stp_update(struct net_device *dev, u8 state) 281int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj)
48{ 282{
49 const struct swdev_ops *ops = dev->swdev_ops; 283 const struct switchdev_ops *ops = dev->switchdev_ops;
50 struct net_device *lower_dev; 284 struct net_device *lower_dev;
51 struct list_head *iter; 285 struct list_head *iter;
52 int err = -EOPNOTSUPP; 286 int err = -EOPNOTSUPP;
53 287
54 if (ops && ops->swdev_port_stp_update) 288 if (ops && ops->switchdev_port_obj_del)
55 return ops->swdev_port_stp_update(dev, state); 289 return ops->switchdev_port_obj_del(dev, obj);
290
291 /* Switch device port(s) may be stacked under
292 * bond/team/vlan dev, so recurse down to delete object on
293 * each port.
294 */
56 295
57 netdev_for_each_lower_dev(dev, lower_dev, iter) { 296 netdev_for_each_lower_dev(dev, lower_dev, iter) {
58 err = netdev_switch_port_stp_update(lower_dev, state); 297 err = switchdev_port_obj_del(lower_dev, obj);
59 if (err && err != -EOPNOTSUPP) 298 if (err)
60 return err; 299 break;
61 } 300 }
62 301
63 return err; 302 return err;
64} 303}
65EXPORT_SYMBOL_GPL(netdev_switch_port_stp_update); 304EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
66 305
67static DEFINE_MUTEX(netdev_switch_mutex); 306/**
68static RAW_NOTIFIER_HEAD(netdev_switch_notif_chain); 307 * switchdev_port_obj_dump - Dump port objects
308 *
309 * @dev: port device
310 * @obj: object to dump
311 */
312int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj)
313{
314 const struct switchdev_ops *ops = dev->switchdev_ops;
315 struct net_device *lower_dev;
316 struct list_head *iter;
317 int err = -EOPNOTSUPP;
318
319 if (ops && ops->switchdev_port_obj_dump)
320 return ops->switchdev_port_obj_dump(dev, obj);
321
322 /* Switch device port(s) may be stacked under
323 * bond/team/vlan dev, so recurse down to dump objects on
324 * first port at bottom of stack.
325 */
326
327 netdev_for_each_lower_dev(dev, lower_dev, iter) {
328 err = switchdev_port_obj_dump(lower_dev, obj);
329 break;
330 }
331
332 return err;
333}
334EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
335
336static DEFINE_MUTEX(switchdev_mutex);
337static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
69 338
70/** 339/**
71 * register_netdev_switch_notifier - Register notifier 340 * register_switchdev_notifier - Register notifier
72 * @nb: notifier_block 341 * @nb: notifier_block
73 * 342 *
74 * Register switch device notifier. This should be used by code 343 * Register switch device notifier. This should be used by code
75 * which needs to monitor events happening in particular device. 344 * which needs to monitor events happening in particular device.
76 * Return values are same as for atomic_notifier_chain_register(). 345 * Return values are same as for atomic_notifier_chain_register().
77 */ 346 */
78int register_netdev_switch_notifier(struct notifier_block *nb) 347int register_switchdev_notifier(struct notifier_block *nb)
79{ 348{
80 int err; 349 int err;
81 350
82 mutex_lock(&netdev_switch_mutex); 351 mutex_lock(&switchdev_mutex);
83 err = raw_notifier_chain_register(&netdev_switch_notif_chain, nb); 352 err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
84 mutex_unlock(&netdev_switch_mutex); 353 mutex_unlock(&switchdev_mutex);
85 return err; 354 return err;
86} 355}
87EXPORT_SYMBOL_GPL(register_netdev_switch_notifier); 356EXPORT_SYMBOL_GPL(register_switchdev_notifier);
88 357
89/** 358/**
90 * unregister_netdev_switch_notifier - Unregister notifier 359 * unregister_switchdev_notifier - Unregister notifier
91 * @nb: notifier_block 360 * @nb: notifier_block
92 * 361 *
93 * Unregister switch device notifier. 362 * Unregister switch device notifier.
94 * Return values are same as for atomic_notifier_chain_unregister(). 363 * Return values are same as for atomic_notifier_chain_unregister().
95 */ 364 */
96int unregister_netdev_switch_notifier(struct notifier_block *nb) 365int unregister_switchdev_notifier(struct notifier_block *nb)
97{ 366{
98 int err; 367 int err;
99 368
100 mutex_lock(&netdev_switch_mutex); 369 mutex_lock(&switchdev_mutex);
101 err = raw_notifier_chain_unregister(&netdev_switch_notif_chain, nb); 370 err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
102 mutex_unlock(&netdev_switch_mutex); 371 mutex_unlock(&switchdev_mutex);
103 return err; 372 return err;
104} 373}
105EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier); 374EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
106 375
107/** 376/**
108 * call_netdev_switch_notifiers - Call notifiers 377 * call_switchdev_notifiers - Call notifiers
109 * @val: value passed unmodified to notifier function 378 * @val: value passed unmodified to notifier function
110 * @dev: port device 379 * @dev: port device
111 * @info: notifier information data 380 * @info: notifier information data
@@ -114,146 +383,502 @@ EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier);
114 * when it needs to propagate hardware event. 383 * when it needs to propagate hardware event.
115 * Return values are same as for atomic_notifier_call_chain(). 384 * Return values are same as for atomic_notifier_call_chain().
116 */ 385 */
117int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, 386int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
118 struct netdev_switch_notifier_info *info) 387 struct switchdev_notifier_info *info)
119{ 388{
120 int err; 389 int err;
121 390
122 info->dev = dev; 391 info->dev = dev;
123 mutex_lock(&netdev_switch_mutex); 392 mutex_lock(&switchdev_mutex);
124 err = raw_notifier_call_chain(&netdev_switch_notif_chain, val, info); 393 err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
125 mutex_unlock(&netdev_switch_mutex); 394 mutex_unlock(&switchdev_mutex);
395 return err;
396}
397EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
398
399struct switchdev_vlan_dump {
400 struct switchdev_obj obj;
401 struct sk_buff *skb;
402 u32 filter_mask;
403 u16 flags;
404 u16 begin;
405 u16 end;
406};
407
408static int switchdev_port_vlan_dump_put(struct net_device *dev,
409 struct switchdev_vlan_dump *dump)
410{
411 struct bridge_vlan_info vinfo;
412
413 vinfo.flags = dump->flags;
414
415 if (dump->begin == 0 && dump->end == 0) {
416 return 0;
417 } else if (dump->begin == dump->end) {
418 vinfo.vid = dump->begin;
419 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
420 sizeof(vinfo), &vinfo))
421 return -EMSGSIZE;
422 } else {
423 vinfo.vid = dump->begin;
424 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
425 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
426 sizeof(vinfo), &vinfo))
427 return -EMSGSIZE;
428 vinfo.vid = dump->end;
429 vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
430 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END;
431 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
432 sizeof(vinfo), &vinfo))
433 return -EMSGSIZE;
434 }
435
436 return 0;
437}
438
439static int switchdev_port_vlan_dump_cb(struct net_device *dev,
440 struct switchdev_obj *obj)
441{
442 struct switchdev_vlan_dump *dump =
443 container_of(obj, struct switchdev_vlan_dump, obj);
444 struct switchdev_obj_vlan *vlan = &dump->obj.u.vlan;
445 int err = 0;
446
447 if (vlan->vid_begin > vlan->vid_end)
448 return -EINVAL;
449
450 if (dump->filter_mask & RTEXT_FILTER_BRVLAN) {
451 dump->flags = vlan->flags;
452 for (dump->begin = dump->end = vlan->vid_begin;
453 dump->begin <= vlan->vid_end;
454 dump->begin++, dump->end++) {
455 err = switchdev_port_vlan_dump_put(dev, dump);
456 if (err)
457 return err;
458 }
459 } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) {
460 if (dump->begin > vlan->vid_begin &&
461 dump->begin >= vlan->vid_end) {
462 if ((dump->begin - 1) == vlan->vid_end &&
463 dump->flags == vlan->flags) {
464 /* prepend */
465 dump->begin = vlan->vid_begin;
466 } else {
467 err = switchdev_port_vlan_dump_put(dev, dump);
468 dump->flags = vlan->flags;
469 dump->begin = vlan->vid_begin;
470 dump->end = vlan->vid_end;
471 }
472 } else if (dump->end <= vlan->vid_begin &&
473 dump->end < vlan->vid_end) {
474 if ((dump->end + 1) == vlan->vid_begin &&
475 dump->flags == vlan->flags) {
476 /* append */
477 dump->end = vlan->vid_end;
478 } else {
479 err = switchdev_port_vlan_dump_put(dev, dump);
480 dump->flags = vlan->flags;
481 dump->begin = vlan->vid_begin;
482 dump->end = vlan->vid_end;
483 }
484 } else {
485 err = -EINVAL;
486 }
487 }
488
126 return err; 489 return err;
127} 490}
128EXPORT_SYMBOL_GPL(call_netdev_switch_notifiers); 491
492static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
493 u32 filter_mask)
494{
495 struct switchdev_vlan_dump dump = {
496 .obj = {
497 .id = SWITCHDEV_OBJ_PORT_VLAN,
498 .cb = switchdev_port_vlan_dump_cb,
499 },
500 .skb = skb,
501 .filter_mask = filter_mask,
502 };
503 int err = 0;
504
505 if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
506 (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
507 err = switchdev_port_obj_dump(dev, &dump.obj);
508 if (err)
509 goto err_out;
510 if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
511 /* last one */
512 err = switchdev_port_vlan_dump_put(dev, &dump);
513 }
514
515err_out:
516 return err == -EOPNOTSUPP ? 0 : err;
517}
129 518
130/** 519/**
131 * netdev_switch_port_bridge_setlink - Notify switch device port of bridge 520 * switchdev_port_bridge_getlink - Get bridge port attributes
132 * port attributes
133 * 521 *
134 * @dev: port device 522 * @dev: port device
135 * @nlh: netlink msg with bridge port attributes
136 * @flags: bridge setlink flags
137 * 523 *
138 * Notify switch device port of bridge port attributes 524 * Called for SELF on rtnl_bridge_getlink to get bridge port
525 * attributes.
139 */ 526 */
140int netdev_switch_port_bridge_setlink(struct net_device *dev, 527int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
141 struct nlmsghdr *nlh, u16 flags) 528 struct net_device *dev, u32 filter_mask,
529 int nlflags)
142{ 530{
143 const struct net_device_ops *ops = dev->netdev_ops; 531 struct switchdev_attr attr = {
532 .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
533 };
534 u16 mode = BRIDGE_MODE_UNDEF;
535 u32 mask = BR_LEARNING | BR_LEARNING_SYNC;
536 int err;
144 537
145 if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) 538 err = switchdev_port_attr_get(dev, &attr);
146 return 0; 539 if (err && err != -EOPNOTSUPP)
540 return err;
541
542 return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode,
543 attr.u.brport_flags, mask, nlflags,
544 filter_mask, switchdev_port_vlan_fill);
545}
546EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink);
547
548static int switchdev_port_br_setflag(struct net_device *dev,
549 struct nlattr *nlattr,
550 unsigned long brport_flag)
551{
552 struct switchdev_attr attr = {
553 .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
554 };
555 u8 flag = nla_get_u8(nlattr);
556 int err;
557
558 err = switchdev_port_attr_get(dev, &attr);
559 if (err)
560 return err;
561
562 if (flag)
563 attr.u.brport_flags |= brport_flag;
564 else
565 attr.u.brport_flags &= ~brport_flag;
566
567 return switchdev_port_attr_set(dev, &attr);
568}
569
570static const struct nla_policy
571switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = {
572 [IFLA_BRPORT_STATE] = { .type = NLA_U8 },
573 [IFLA_BRPORT_COST] = { .type = NLA_U32 },
574 [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 },
575 [IFLA_BRPORT_MODE] = { .type = NLA_U8 },
576 [IFLA_BRPORT_GUARD] = { .type = NLA_U8 },
577 [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 },
578 [IFLA_BRPORT_FAST_LEAVE] = { .type = NLA_U8 },
579 [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 },
580 [IFLA_BRPORT_LEARNING_SYNC] = { .type = NLA_U8 },
581 [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 },
582};
583
584static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
585 struct nlattr *protinfo)
586{
587 struct nlattr *attr;
588 int rem;
589 int err;
590
591 err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
592 switchdev_port_bridge_policy);
593 if (err)
594 return err;
595
596 nla_for_each_nested(attr, protinfo, rem) {
597 switch (nla_type(attr)) {
598 case IFLA_BRPORT_LEARNING:
599 err = switchdev_port_br_setflag(dev, attr,
600 BR_LEARNING);
601 break;
602 case IFLA_BRPORT_LEARNING_SYNC:
603 err = switchdev_port_br_setflag(dev, attr,
604 BR_LEARNING_SYNC);
605 break;
606 default:
607 err = -EOPNOTSUPP;
608 break;
609 }
610 if (err)
611 return err;
612 }
613
614 return 0;
615}
616
617static int switchdev_port_br_afspec(struct net_device *dev,
618 struct nlattr *afspec,
619 int (*f)(struct net_device *dev,
620 struct switchdev_obj *obj))
621{
622 struct nlattr *attr;
623 struct bridge_vlan_info *vinfo;
624 struct switchdev_obj obj = {
625 .id = SWITCHDEV_OBJ_PORT_VLAN,
626 };
627 struct switchdev_obj_vlan *vlan = &obj.u.vlan;
628 int rem;
629 int err;
147 630
148 if (!ops->ndo_bridge_setlink) 631 nla_for_each_nested(attr, afspec, rem) {
149 return -EOPNOTSUPP; 632 if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
633 continue;
634 if (nla_len(attr) != sizeof(struct bridge_vlan_info))
635 return -EINVAL;
636 vinfo = nla_data(attr);
637 vlan->flags = vinfo->flags;
638 if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
639 if (vlan->vid_begin)
640 return -EINVAL;
641 vlan->vid_begin = vinfo->vid;
642 } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
643 if (!vlan->vid_begin)
644 return -EINVAL;
645 vlan->vid_end = vinfo->vid;
646 if (vlan->vid_end <= vlan->vid_begin)
647 return -EINVAL;
648 err = f(dev, &obj);
649 if (err)
650 return err;
651 memset(vlan, 0, sizeof(*vlan));
652 } else {
653 if (vlan->vid_begin)
654 return -EINVAL;
655 vlan->vid_begin = vinfo->vid;
656 vlan->vid_end = vinfo->vid;
657 err = f(dev, &obj);
658 if (err)
659 return err;
660 memset(vlan, 0, sizeof(*vlan));
661 }
662 }
150 663
151 return ops->ndo_bridge_setlink(dev, nlh, flags); 664 return 0;
152} 665}
153EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_setlink);
154 666
155/** 667/**
156 * netdev_switch_port_bridge_dellink - Notify switch device port of bridge 668 * switchdev_port_bridge_setlink - Set bridge port attributes
157 * port attribute delete
158 * 669 *
159 * @dev: port device 670 * @dev: port device
160 * @nlh: netlink msg with bridge port attributes 671 * @nlh: netlink header
161 * @flags: bridge setlink flags 672 * @flags: netlink flags
162 * 673 *
163 * Notify switch device port of bridge port attribute delete 674 * Called for SELF on rtnl_bridge_setlink to set bridge port
675 * attributes.
164 */ 676 */
165int netdev_switch_port_bridge_dellink(struct net_device *dev, 677int switchdev_port_bridge_setlink(struct net_device *dev,
166 struct nlmsghdr *nlh, u16 flags) 678 struct nlmsghdr *nlh, u16 flags)
167{ 679{
168 const struct net_device_ops *ops = dev->netdev_ops; 680 struct nlattr *protinfo;
681 struct nlattr *afspec;
682 int err = 0;
169 683
170 if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) 684 protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
171 return 0; 685 IFLA_PROTINFO);
686 if (protinfo) {
687 err = switchdev_port_br_setlink_protinfo(dev, protinfo);
688 if (err)
689 return err;
690 }
172 691
173 if (!ops->ndo_bridge_dellink) 692 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
174 return -EOPNOTSUPP; 693 IFLA_AF_SPEC);
694 if (afspec)
695 err = switchdev_port_br_afspec(dev, afspec,
696 switchdev_port_obj_add);
175 697
176 return ops->ndo_bridge_dellink(dev, nlh, flags); 698 return err;
177} 699}
178EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_dellink); 700EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink);
179 701
180/** 702/**
181 * ndo_dflt_netdev_switch_port_bridge_setlink - default ndo bridge setlink 703 * switchdev_port_bridge_dellink - Set bridge port attributes
182 * op for master devices
183 * 704 *
184 * @dev: port device 705 * @dev: port device
185 * @nlh: netlink msg with bridge port attributes 706 * @nlh: netlink header
186 * @flags: bridge setlink flags 707 * @flags: netlink flags
187 * 708 *
188 * Notify master device slaves of bridge port attributes 709 * Called for SELF on rtnl_bridge_dellink to set bridge port
710 * attributes.
189 */ 711 */
190int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, 712int switchdev_port_bridge_dellink(struct net_device *dev,
191 struct nlmsghdr *nlh, u16 flags) 713 struct nlmsghdr *nlh, u16 flags)
192{ 714{
193 struct net_device *lower_dev; 715 struct nlattr *afspec;
194 struct list_head *iter;
195 int ret = 0, err = 0;
196 716
197 if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) 717 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
198 return ret; 718 IFLA_AF_SPEC);
719 if (afspec)
720 return switchdev_port_br_afspec(dev, afspec,
721 switchdev_port_obj_del);
199 722
200 netdev_for_each_lower_dev(dev, lower_dev, iter) { 723 return 0;
201 err = netdev_switch_port_bridge_setlink(lower_dev, nlh, flags); 724}
202 if (err && err != -EOPNOTSUPP) 725EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink);
203 ret = err; 726
204 } 727/**
728 * switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port
729 *
730 * @ndmsg: netlink hdr
731 * @nlattr: netlink attributes
732 * @dev: port device
733 * @addr: MAC address to add
734 * @vid: VLAN to add
735 *
736 * Add FDB entry to switch device.
737 */
738int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
739 struct net_device *dev, const unsigned char *addr,
740 u16 vid, u16 nlm_flags)
741{
742 struct switchdev_obj obj = {
743 .id = SWITCHDEV_OBJ_PORT_FDB,
744 .u.fdb = {
745 .addr = addr,
746 .vid = vid,
747 },
748 };
205 749
206 return ret; 750 return switchdev_port_obj_add(dev, &obj);
207} 751}
208EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_setlink); 752EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
209 753
210/** 754/**
211 * ndo_dflt_netdev_switch_port_bridge_dellink - default ndo bridge dellink 755 * switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port
212 * op for master devices
213 * 756 *
757 * @ndmsg: netlink hdr
758 * @nlattr: netlink attributes
214 * @dev: port device 759 * @dev: port device
215 * @nlh: netlink msg with bridge port attributes 760 * @addr: MAC address to delete
216 * @flags: bridge dellink flags 761 * @vid: VLAN to delete
217 * 762 *
218 * Notify master device slaves of bridge port attribute deletes 763 * Delete FDB entry from switch device.
219 */ 764 */
220int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, 765int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
221 struct nlmsghdr *nlh, u16 flags) 766 struct net_device *dev, const unsigned char *addr,
767 u16 vid)
222{ 768{
223 struct net_device *lower_dev; 769 struct switchdev_obj obj = {
224 struct list_head *iter; 770 .id = SWITCHDEV_OBJ_PORT_FDB,
225 int ret = 0, err = 0; 771 .u.fdb = {
772 .addr = addr,
773 .vid = vid,
774 },
775 };
226 776
227 if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) 777 return switchdev_port_obj_del(dev, &obj);
228 return ret; 778}
779EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
229 780
230 netdev_for_each_lower_dev(dev, lower_dev, iter) { 781struct switchdev_fdb_dump {
231 err = netdev_switch_port_bridge_dellink(lower_dev, nlh, flags); 782 struct switchdev_obj obj;
232 if (err && err != -EOPNOTSUPP) 783 struct sk_buff *skb;
233 ret = err; 784 struct netlink_callback *cb;
234 } 785 int idx;
786};
787
788static int switchdev_port_fdb_dump_cb(struct net_device *dev,
789 struct switchdev_obj *obj)
790{
791 struct switchdev_fdb_dump *dump =
792 container_of(obj, struct switchdev_fdb_dump, obj);
793 u32 portid = NETLINK_CB(dump->cb->skb).portid;
794 u32 seq = dump->cb->nlh->nlmsg_seq;
795 struct nlmsghdr *nlh;
796 struct ndmsg *ndm;
797
798 if (dump->idx < dump->cb->args[0])
799 goto skip;
800
801 nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
802 sizeof(*ndm), NLM_F_MULTI);
803 if (!nlh)
804 return -EMSGSIZE;
805
806 ndm = nlmsg_data(nlh);
807 ndm->ndm_family = AF_BRIDGE;
808 ndm->ndm_pad1 = 0;
809 ndm->ndm_pad2 = 0;
810 ndm->ndm_flags = NTF_SELF;
811 ndm->ndm_type = 0;
812 ndm->ndm_ifindex = dev->ifindex;
813 ndm->ndm_state = NUD_REACHABLE;
814
815 if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr))
816 goto nla_put_failure;
817
818 if (obj->u.fdb.vid && nla_put_u16(dump->skb, NDA_VLAN, obj->u.fdb.vid))
819 goto nla_put_failure;
820
821 nlmsg_end(dump->skb, nlh);
822
823skip:
824 dump->idx++;
825 return 0;
826
827nla_put_failure:
828 nlmsg_cancel(dump->skb, nlh);
829 return -EMSGSIZE;
830}
235 831
236 return ret; 832/**
833 * switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries
834 *
835 * @skb: netlink skb
836 * @cb: netlink callback
837 * @dev: port device
838 * @filter_dev: filter device
839 * @idx:
840 *
841 * Delete FDB entry from switch device.
842 */
843int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
844 struct net_device *dev,
845 struct net_device *filter_dev, int idx)
846{
847 struct switchdev_fdb_dump dump = {
848 .obj = {
849 .id = SWITCHDEV_OBJ_PORT_FDB,
850 .cb = switchdev_port_fdb_dump_cb,
851 },
852 .skb = skb,
853 .cb = cb,
854 .idx = idx,
855 };
856 int err;
857
858 err = switchdev_port_obj_dump(dev, &dump.obj);
859 if (err)
860 return err;
861
862 return dump.idx;
237} 863}
238EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_dellink); 864EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
239 865
240static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev) 866static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
241{ 867{
242 const struct swdev_ops *ops = dev->swdev_ops; 868 const struct switchdev_ops *ops = dev->switchdev_ops;
243 struct net_device *lower_dev; 869 struct net_device *lower_dev;
244 struct net_device *port_dev; 870 struct net_device *port_dev;
245 struct list_head *iter; 871 struct list_head *iter;
246 872
247 /* Recusively search down until we find a sw port dev. 873 /* Recusively search down until we find a sw port dev.
248 * (A sw port dev supports swdev_parent_id_get). 874 * (A sw port dev supports switchdev_port_attr_get).
249 */ 875 */
250 876
251 if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD && 877 if (ops && ops->switchdev_port_attr_get)
252 ops && ops->swdev_parent_id_get)
253 return dev; 878 return dev;
254 879
255 netdev_for_each_lower_dev(dev, lower_dev, iter) { 880 netdev_for_each_lower_dev(dev, lower_dev, iter) {
256 port_dev = netdev_switch_get_lowest_dev(lower_dev); 881 port_dev = switchdev_get_lowest_dev(lower_dev);
257 if (port_dev) 882 if (port_dev)
258 return port_dev; 883 return port_dev;
259 } 884 }
@@ -261,10 +886,12 @@ static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev)
261 return NULL; 886 return NULL;
262} 887}
263 888
264static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi) 889static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
265{ 890{
266 struct netdev_phys_item_id psid; 891 struct switchdev_attr attr = {
267 struct netdev_phys_item_id prev_psid; 892 .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
893 };
894 struct switchdev_attr prev_attr;
268 struct net_device *dev = NULL; 895 struct net_device *dev = NULL;
269 int nhsel; 896 int nhsel;
270 897
@@ -276,28 +903,29 @@ static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi)
276 if (!nh->nh_dev) 903 if (!nh->nh_dev)
277 return NULL; 904 return NULL;
278 905
279 dev = netdev_switch_get_lowest_dev(nh->nh_dev); 906 dev = switchdev_get_lowest_dev(nh->nh_dev);
280 if (!dev) 907 if (!dev)
281 return NULL; 908 return NULL;
282 909
283 if (netdev_switch_parent_id_get(dev, &psid)) 910 if (switchdev_port_attr_get(dev, &attr))
284 return NULL; 911 return NULL;
285 912
286 if (nhsel > 0) { 913 if (nhsel > 0) {
287 if (prev_psid.id_len != psid.id_len) 914 if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len)
288 return NULL; 915 return NULL;
289 if (memcmp(prev_psid.id, psid.id, psid.id_len)) 916 if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id,
917 attr.u.ppid.id_len))
290 return NULL; 918 return NULL;
291 } 919 }
292 920
293 prev_psid = psid; 921 prev_attr = attr;
294 } 922 }
295 923
296 return dev; 924 return dev;
297} 925}
298 926
299/** 927/**
300 * netdev_switch_fib_ipv4_add - Add IPv4 route entry to switch 928 * switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry
301 * 929 *
302 * @dst: route's IPv4 destination address 930 * @dst: route's IPv4 destination address
303 * @dst_len: destination address length (prefix length) 931 * @dst_len: destination address length (prefix length)
@@ -307,13 +935,24 @@ static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi)
307 * @nlflags: netlink flags passed in (NLM_F_*) 935 * @nlflags: netlink flags passed in (NLM_F_*)
308 * @tb_id: route table ID 936 * @tb_id: route table ID
309 * 937 *
310 * Add IPv4 route entry to switch device. 938 * Add/modify switch IPv4 route entry.
311 */ 939 */
312int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, 940int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
313 u8 tos, u8 type, u32 nlflags, u32 tb_id) 941 u8 tos, u8 type, u32 nlflags, u32 tb_id)
314{ 942{
943 struct switchdev_obj fib_obj = {
944 .id = SWITCHDEV_OBJ_IPV4_FIB,
945 .u.ipv4_fib = {
946 .dst = dst,
947 .dst_len = dst_len,
948 .fi = fi,
949 .tos = tos,
950 .type = type,
951 .nlflags = nlflags,
952 .tb_id = tb_id,
953 },
954 };
315 struct net_device *dev; 955 struct net_device *dev;
316 const struct swdev_ops *ops;
317 int err = 0; 956 int err = 0;
318 957
319 /* Don't offload route if using custom ip rules or if 958 /* Don't offload route if using custom ip rules or if
@@ -328,25 +967,20 @@ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
328 if (fi->fib_net->ipv4.fib_offload_disabled) 967 if (fi->fib_net->ipv4.fib_offload_disabled)
329 return 0; 968 return 0;
330 969
331 dev = netdev_switch_get_dev_by_nhs(fi); 970 dev = switchdev_get_dev_by_nhs(fi);
332 if (!dev) 971 if (!dev)
333 return 0; 972 return 0;
334 ops = dev->swdev_ops;
335
336 if (ops->swdev_fib_ipv4_add) {
337 err = ops->swdev_fib_ipv4_add(dev, htonl(dst), dst_len,
338 fi, tos, type, nlflags,
339 tb_id);
340 if (!err)
341 fi->fib_flags |= RTNH_F_OFFLOAD;
342 }
343 973
344 return err; 974 err = switchdev_port_obj_add(dev, &fib_obj);
975 if (!err)
976 fi->fib_flags |= RTNH_F_OFFLOAD;
977
978 return err == -EOPNOTSUPP ? 0 : err;
345} 979}
346EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_add); 980EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
347 981
348/** 982/**
349 * netdev_switch_fib_ipv4_del - Delete IPv4 route entry from switch 983 * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch
350 * 984 *
351 * @dst: route's IPv4 destination address 985 * @dst: route's IPv4 destination address
352 * @dst_len: destination address length (prefix length) 986 * @dst_len: destination address length (prefix length)
@@ -357,38 +991,45 @@ EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_add);
357 * 991 *
358 * Delete IPv4 route entry from switch device. 992 * Delete IPv4 route entry from switch device.
359 */ 993 */
360int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, 994int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
361 u8 tos, u8 type, u32 tb_id) 995 u8 tos, u8 type, u32 tb_id)
362{ 996{
997 struct switchdev_obj fib_obj = {
998 .id = SWITCHDEV_OBJ_IPV4_FIB,
999 .u.ipv4_fib = {
1000 .dst = dst,
1001 .dst_len = dst_len,
1002 .fi = fi,
1003 .tos = tos,
1004 .type = type,
1005 .nlflags = 0,
1006 .tb_id = tb_id,
1007 },
1008 };
363 struct net_device *dev; 1009 struct net_device *dev;
364 const struct swdev_ops *ops;
365 int err = 0; 1010 int err = 0;
366 1011
367 if (!(fi->fib_flags & RTNH_F_OFFLOAD)) 1012 if (!(fi->fib_flags & RTNH_F_OFFLOAD))
368 return 0; 1013 return 0;
369 1014
370 dev = netdev_switch_get_dev_by_nhs(fi); 1015 dev = switchdev_get_dev_by_nhs(fi);
371 if (!dev) 1016 if (!dev)
372 return 0; 1017 return 0;
373 ops = dev->swdev_ops;
374 1018
375 if (ops->swdev_fib_ipv4_del) { 1019 err = switchdev_port_obj_del(dev, &fib_obj);
376 err = ops->swdev_fib_ipv4_del(dev, htonl(dst), dst_len, 1020 if (!err)
377 fi, tos, type, tb_id); 1021 fi->fib_flags &= ~RTNH_F_OFFLOAD;
378 if (!err)
379 fi->fib_flags &= ~RTNH_F_OFFLOAD;
380 }
381 1022
382 return err; 1023 return err == -EOPNOTSUPP ? 0 : err;
383} 1024}
384EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_del); 1025EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del);
385 1026
386/** 1027/**
387 * netdev_switch_fib_ipv4_abort - Abort an IPv4 FIB operation 1028 * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation
388 * 1029 *
389 * @fi: route FIB info structure 1030 * @fi: route FIB info structure
390 */ 1031 */
391void netdev_switch_fib_ipv4_abort(struct fib_info *fi) 1032void switchdev_fib_ipv4_abort(struct fib_info *fi)
392{ 1033{
393 /* There was a problem installing this route to the offload 1034 /* There was a problem installing this route to the offload
394 * device. For now, until we come up with more refined 1035 * device. For now, until we come up with more refined
@@ -401,4 +1042,4 @@ void netdev_switch_fib_ipv4_abort(struct fib_info *fi)
401 fib_flush_external(fi->fib_net); 1042 fib_flush_external(fi->fib_net);
402 fi->fib_net->ipv4.fib_offload_disabled = true; 1043 fi->fib_net->ipv4.fib_offload_disabled = true;
403} 1044}
404EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_abort); 1045EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index ba7daa864d44..48fd3b5a73fb 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -38,13 +38,6 @@
38#include "addr.h" 38#include "addr.h"
39#include "core.h" 39#include "core.h"
40 40
41u32 tipc_own_addr(struct net *net)
42{
43 struct tipc_net *tn = net_generic(net, tipc_net_id);
44
45 return tn->own_addr;
46}
47
48/** 41/**
49 * in_own_cluster - test for cluster inclusion; <0.0.0> always matches 42 * in_own_cluster - test for cluster inclusion; <0.0.0> always matches
50 */ 43 */
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 7ba6d5c8ae40..93f7c983be33 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -41,10 +41,18 @@
41#include <linux/tipc.h> 41#include <linux/tipc.h>
42#include <net/net_namespace.h> 42#include <net/net_namespace.h>
43#include <net/netns/generic.h> 43#include <net/netns/generic.h>
44#include "core.h"
44 45
45#define TIPC_ZONE_MASK 0xff000000u 46#define TIPC_ZONE_MASK 0xff000000u
46#define TIPC_CLUSTER_MASK 0xfffff000u 47#define TIPC_CLUSTER_MASK 0xfffff000u
47 48
49static inline u32 tipc_own_addr(struct net *net)
50{
51 struct tipc_net *tn = net_generic(net, tipc_net_id);
52
53 return tn->own_addr;
54}
55
48static inline u32 tipc_zone_mask(u32 addr) 56static inline u32 tipc_zone_mask(u32 addr)
49{ 57{
50 return addr & TIPC_ZONE_MASK; 58 return addr & TIPC_ZONE_MASK;
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index c5cbdcb1f0b5..a816382fc8af 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -108,6 +108,11 @@ void tipc_bclink_remove_node(struct net *net, u32 addr)
108 108
109 tipc_bclink_lock(net); 109 tipc_bclink_lock(net);
110 tipc_nmap_remove(&tn->bclink->bcast_nodes, addr); 110 tipc_nmap_remove(&tn->bclink->bcast_nodes, addr);
111
112 /* Last node? => reset backlog queue */
113 if (!tn->bclink->bcast_nodes.count)
114 tipc_link_purge_backlog(&tn->bclink->link);
115
111 tipc_bclink_unlock(net); 116 tipc_bclink_unlock(net);
112} 117}
113 118
@@ -115,19 +120,15 @@ static void bclink_set_last_sent(struct net *net)
115{ 120{
116 struct tipc_net *tn = net_generic(net, tipc_net_id); 121 struct tipc_net *tn = net_generic(net, tipc_net_id);
117 struct tipc_link *bcl = tn->bcl; 122 struct tipc_link *bcl = tn->bcl;
118 struct sk_buff *skb = skb_peek(&bcl->backlogq);
119 123
120 if (skb) 124 bcl->silent_intv_cnt = mod(bcl->snd_nxt - 1);
121 bcl->fsm_msg_cnt = mod(buf_seqno(skb) - 1);
122 else
123 bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1);
124} 125}
125 126
126u32 tipc_bclink_get_last_sent(struct net *net) 127u32 tipc_bclink_get_last_sent(struct net *net)
127{ 128{
128 struct tipc_net *tn = net_generic(net, tipc_net_id); 129 struct tipc_net *tn = net_generic(net, tipc_net_id);
129 130
130 return tn->bcl->fsm_msg_cnt; 131 return tn->bcl->silent_intv_cnt;
131} 132}
132 133
133static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) 134static void bclink_update_last_sent(struct tipc_node *node, u32 seqno)
@@ -212,16 +213,16 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
212 * or both sent and unsent messages (otherwise) 213 * or both sent and unsent messages (otherwise)
213 */ 214 */
214 if (tn->bclink->bcast_nodes.count) 215 if (tn->bclink->bcast_nodes.count)
215 acked = tn->bcl->fsm_msg_cnt; 216 acked = tn->bcl->silent_intv_cnt;
216 else 217 else
217 acked = tn->bcl->next_out_no; 218 acked = tn->bcl->snd_nxt;
218 } else { 219 } else {
219 /* 220 /*
220 * Bail out if specified sequence number does not correspond 221 * Bail out if specified sequence number does not correspond
221 * to a message that has been sent and not yet acknowledged 222 * to a message that has been sent and not yet acknowledged
222 */ 223 */
223 if (less(acked, buf_seqno(skb)) || 224 if (less(acked, buf_seqno(skb)) ||
224 less(tn->bcl->fsm_msg_cnt, acked) || 225 less(tn->bcl->silent_intv_cnt, acked) ||
225 less_eq(acked, n_ptr->bclink.acked)) 226 less_eq(acked, n_ptr->bclink.acked))
226 goto exit; 227 goto exit;
227 } 228 }
@@ -803,9 +804,9 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
803 goto attr_msg_full; 804 goto attr_msg_full;
804 if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name)) 805 if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name))
805 goto attr_msg_full; 806 goto attr_msg_full;
806 if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->next_in_no)) 807 if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->rcv_nxt))
807 goto attr_msg_full; 808 goto attr_msg_full;
808 if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->next_out_no)) 809 if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->snd_nxt))
809 goto attr_msg_full; 810 goto attr_msg_full;
810 811
811 prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); 812 prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP);
@@ -866,6 +867,27 @@ int tipc_bclink_set_queue_limits(struct net *net, u32 limit)
866 return 0; 867 return 0;
867} 868}
868 869
870int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[])
871{
872 int err;
873 u32 win;
874 struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
875
876 if (!attrs[TIPC_NLA_LINK_PROP])
877 return -EINVAL;
878
879 err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], props);
880 if (err)
881 return err;
882
883 if (!props[TIPC_NLA_PROP_WIN])
884 return -EOPNOTSUPP;
885
886 win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
887
888 return tipc_bclink_set_queue_limits(net, win);
889}
890
869int tipc_bclink_init(struct net *net) 891int tipc_bclink_init(struct net *net)
870{ 892{
871 struct tipc_net *tn = net_generic(net, tipc_net_id); 893 struct tipc_net *tn = net_generic(net, tipc_net_id);
@@ -893,7 +915,7 @@ int tipc_bclink_init(struct net *net)
893 __skb_queue_head_init(&bcl->backlogq); 915 __skb_queue_head_init(&bcl->backlogq);
894 __skb_queue_head_init(&bcl->deferdq); 916 __skb_queue_head_init(&bcl->deferdq);
895 skb_queue_head_init(&bcl->wakeupq); 917 skb_queue_head_init(&bcl->wakeupq);
896 bcl->next_out_no = 1; 918 bcl->snd_nxt = 1;
897 spin_lock_init(&bclink->node.lock); 919 spin_lock_init(&bclink->node.lock);
898 __skb_queue_head_init(&bclink->arrvq); 920 __skb_queue_head_init(&bclink->arrvq);
899 skb_queue_head_init(&bclink->inputq); 921 skb_queue_head_init(&bclink->inputq);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 4bdc12277d33..3c290a48f720 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -131,6 +131,7 @@ uint tipc_bclink_get_mtu(void);
131int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list); 131int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list);
132void tipc_bclink_wakeup_users(struct net *net); 132void tipc_bclink_wakeup_users(struct net *net);
133int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); 133int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
134int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
134void tipc_bclink_input(struct net *net); 135void tipc_bclink_input(struct net *net);
135 136
136#endif 137#endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 70e3dacbf84a..00bc0e620532 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -71,8 +71,7 @@ static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = {
71 [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED } 71 [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED }
72}; 72};
73 73
74static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr, 74static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr);
75 bool shutting_down);
76 75
77/** 76/**
78 * tipc_media_find - locates specified media object by name 77 * tipc_media_find - locates specified media object by name
@@ -324,7 +323,7 @@ restart:
324 323
325 res = tipc_disc_create(net, b_ptr, &b_ptr->bcast_addr); 324 res = tipc_disc_create(net, b_ptr, &b_ptr->bcast_addr);
326 if (res) { 325 if (res) {
327 bearer_disable(net, b_ptr, false); 326 bearer_disable(net, b_ptr);
328 pr_warn("Bearer <%s> rejected, discovery object creation failed\n", 327 pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
329 name); 328 name);
330 return -EINVAL; 329 return -EINVAL;
@@ -344,7 +343,7 @@ restart:
344static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr) 343static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr)
345{ 344{
346 pr_info("Resetting bearer <%s>\n", b_ptr->name); 345 pr_info("Resetting bearer <%s>\n", b_ptr->name);
347 tipc_link_reset_list(net, b_ptr->identity); 346 tipc_link_delete_list(net, b_ptr->identity);
348 tipc_disc_reset(net, b_ptr); 347 tipc_disc_reset(net, b_ptr);
349 return 0; 348 return 0;
350} 349}
@@ -354,8 +353,7 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr)
354 * 353 *
355 * Note: This routine assumes caller holds RTNL lock. 354 * Note: This routine assumes caller holds RTNL lock.
356 */ 355 */
357static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr, 356static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr)
358 bool shutting_down)
359{ 357{
360 struct tipc_net *tn = net_generic(net, tipc_net_id); 358 struct tipc_net *tn = net_generic(net, tipc_net_id);
361 u32 i; 359 u32 i;
@@ -363,7 +361,7 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr,
363 pr_info("Disabling bearer <%s>\n", b_ptr->name); 361 pr_info("Disabling bearer <%s>\n", b_ptr->name);
364 b_ptr->media->disable_media(b_ptr); 362 b_ptr->media->disable_media(b_ptr);
365 363
366 tipc_link_delete_list(net, b_ptr->identity, shutting_down); 364 tipc_link_delete_list(net, b_ptr->identity);
367 if (b_ptr->link_req) 365 if (b_ptr->link_req)
368 tipc_disc_delete(b_ptr->link_req); 366 tipc_disc_delete(b_ptr->link_req);
369 367
@@ -541,7 +539,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
541 break; 539 break;
542 case NETDEV_UNREGISTER: 540 case NETDEV_UNREGISTER:
543 case NETDEV_CHANGENAME: 541 case NETDEV_CHANGENAME:
544 bearer_disable(dev_net(dev), b_ptr, false); 542 bearer_disable(dev_net(dev), b_ptr);
545 break; 543 break;
546 } 544 }
547 return NOTIFY_OK; 545 return NOTIFY_OK;
@@ -583,7 +581,7 @@ void tipc_bearer_stop(struct net *net)
583 for (i = 0; i < MAX_BEARERS; i++) { 581 for (i = 0; i < MAX_BEARERS; i++) {
584 b_ptr = rtnl_dereference(tn->bearer_list[i]); 582 b_ptr = rtnl_dereference(tn->bearer_list[i]);
585 if (b_ptr) { 583 if (b_ptr) {
586 bearer_disable(net, b_ptr, true); 584 bearer_disable(net, b_ptr);
587 tn->bearer_list[i] = NULL; 585 tn->bearer_list[i] = NULL;
588 } 586 }
589 } 587 }
@@ -747,7 +745,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
747 return -EINVAL; 745 return -EINVAL;
748 } 746 }
749 747
750 bearer_disable(net, bearer, false); 748 bearer_disable(net, bearer);
751 rtnl_unlock(); 749 rtnl_unlock();
752 750
753 return 0; 751 return 0;
@@ -812,7 +810,7 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
812 char *name; 810 char *name;
813 struct tipc_bearer *b; 811 struct tipc_bearer *b;
814 struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; 812 struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
815 struct net *net = genl_info_net(info); 813 struct net *net = sock_net(skb->sk);
816 814
817 if (!info->attrs[TIPC_NLA_BEARER]) 815 if (!info->attrs[TIPC_NLA_BEARER])
818 return -EINVAL; 816 return -EINVAL;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 5cad243ee8fc..dc714d977768 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -38,9 +38,9 @@
38#define _TIPC_BEARER_H 38#define _TIPC_BEARER_H
39 39
40#include "netlink.h" 40#include "netlink.h"
41#include "core.h"
41#include <net/genetlink.h> 42#include <net/genetlink.h>
42 43
43#define MAX_BEARERS 2
44#define MAX_MEDIA 3 44#define MAX_MEDIA 3
45#define MAX_NODES 4096 45#define MAX_NODES 4096
46#define WSIZE 32 46#define WSIZE 32
diff --git a/net/tipc/core.c b/net/tipc/core.c
index be1c9fa60b09..005ba5eb0ea4 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -68,7 +68,7 @@ static int __net_init tipc_init_net(struct net *net)
68 if (err) 68 if (err)
69 goto out_nametbl; 69 goto out_nametbl;
70 70
71 err = tipc_subscr_start(net); 71 err = tipc_topsrv_start(net);
72 if (err) 72 if (err)
73 goto out_subscr; 73 goto out_subscr;
74 return 0; 74 return 0;
@@ -83,7 +83,7 @@ out_sk_rht:
83 83
84static void __net_exit tipc_exit_net(struct net *net) 84static void __net_exit tipc_exit_net(struct net *net)
85{ 85{
86 tipc_subscr_stop(net); 86 tipc_topsrv_stop(net);
87 tipc_net_stop(net); 87 tipc_net_stop(net);
88 tipc_nametbl_stop(net); 88 tipc_nametbl_stop(net);
89 tipc_sk_rht_destroy(net); 89 tipc_sk_rht_destroy(net);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 3dc68c7a966d..0fcf133d5cb7 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -60,16 +60,19 @@
60#include <net/netns/generic.h> 60#include <net/netns/generic.h>
61#include <linux/rhashtable.h> 61#include <linux/rhashtable.h>
62 62
63#include "node.h" 63struct tipc_node;
64#include "bearer.h" 64struct tipc_bearer;
65#include "bcast.h" 65struct tipc_bcbearer;
66#include "netlink.h" 66struct tipc_bclink;
67#include "link.h" 67struct tipc_link;
68#include "node.h" 68struct tipc_name_table;
69#include "msg.h" 69struct tipc_server;
70 70
71#define TIPC_MOD_VER "2.0.0" 71#define TIPC_MOD_VER "2.0.0"
72 72
73#define NODE_HTABLE_SIZE 512
74#define MAX_BEARERS 3
75
73extern int tipc_net_id __read_mostly; 76extern int tipc_net_id __read_mostly;
74extern int sysctl_tipc_rmem[3] __read_mostly; 77extern int sysctl_tipc_rmem[3] __read_mostly;
75extern int sysctl_tipc_named_timeout __read_mostly; 78extern int sysctl_tipc_named_timeout __read_mostly;
@@ -106,6 +109,26 @@ struct tipc_net {
106 atomic_t subscription_count; 109 atomic_t subscription_count;
107}; 110};
108 111
112static inline u16 mod(u16 x)
113{
114 return x & 0xffffu;
115}
116
117static inline int less_eq(u16 left, u16 right)
118{
119 return mod(right - left) < 32768u;
120}
121
122static inline int more(u16 left, u16 right)
123{
124 return !less_eq(left, right);
125}
126
127static inline int less(u16 left, u16 right)
128{
129 return less_eq(left, right) && (mod(right) != mod(left));
130}
131
109#ifdef CONFIG_SYSCTL 132#ifdef CONFIG_SYSCTL
110int tipc_register_sysctl(void); 133int tipc_register_sysctl(void);
111void tipc_unregister_sysctl(void); 134void tipc_unregister_sysctl(void);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 43a515dc97b0..eaa9fe54b4ae 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -86,7 +86,7 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
86 */ 86 */
87#define STARTING_EVT 856384768 /* link processing trigger */ 87#define STARTING_EVT 856384768 /* link processing trigger */
88#define TRAFFIC_MSG_EVT 560815u /* rx'd ??? */ 88#define TRAFFIC_MSG_EVT 560815u /* rx'd ??? */
89#define TIMEOUT_EVT 560817u /* link timer expired */ 89#define SILENCE_EVT 560817u /* timer dicovered silence from peer */
90 90
91/* 91/*
92 * State value stored in 'failover_pkts' 92 * State value stored in 'failover_pkts'
@@ -106,6 +106,7 @@ static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf);
106static void tipc_link_input(struct tipc_link *l, struct sk_buff *skb); 106static void tipc_link_input(struct tipc_link *l, struct sk_buff *skb);
107static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb); 107static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb);
108static bool tipc_link_failover_rcv(struct tipc_link *l, struct sk_buff **skb); 108static bool tipc_link_failover_rcv(struct tipc_link *l, struct sk_buff **skb);
109static void link_set_timer(struct tipc_link *link, unsigned long time);
109/* 110/*
110 * Simple link routines 111 * Simple link routines
111 */ 112 */
@@ -197,11 +198,12 @@ static void link_timeout(unsigned long data)
197 } 198 }
198 199
199 /* do all other link processing performed on a periodic basis */ 200 /* do all other link processing performed on a periodic basis */
200 link_state_event(l_ptr, TIMEOUT_EVT); 201 if (l_ptr->silent_intv_cnt || tipc_bclink_acks_missing(l_ptr->owner))
201 202 link_state_event(l_ptr, SILENCE_EVT);
203 l_ptr->silent_intv_cnt++;
202 if (skb_queue_len(&l_ptr->backlogq)) 204 if (skb_queue_len(&l_ptr->backlogq))
203 tipc_link_push_packets(l_ptr); 205 tipc_link_push_packets(l_ptr);
204 206 link_set_timer(l_ptr, l_ptr->keepalive_intv);
205 tipc_node_unlock(l_ptr->owner); 207 tipc_node_unlock(l_ptr->owner);
206 tipc_link_put(l_ptr); 208 tipc_link_put(l_ptr);
207} 209}
@@ -233,8 +235,8 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
233 235
234 if (n_ptr->link_cnt >= MAX_BEARERS) { 236 if (n_ptr->link_cnt >= MAX_BEARERS) {
235 tipc_addr_string_fill(addr_string, n_ptr->addr); 237 tipc_addr_string_fill(addr_string, n_ptr->addr);
236 pr_err("Attempt to establish %uth link to %s. Max %u allowed.\n", 238 pr_err("Cannot establish %uth link to %s. Max %u allowed.\n",
237 n_ptr->link_cnt, addr_string, MAX_BEARERS); 239 n_ptr->link_cnt, addr_string, MAX_BEARERS);
238 return NULL; 240 return NULL;
239 } 241 }
240 242
@@ -261,7 +263,6 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
261 /* note: peer i/f name is updated by reset/activate message */ 263 /* note: peer i/f name is updated by reset/activate message */
262 memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr)); 264 memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr));
263 l_ptr->owner = n_ptr; 265 l_ptr->owner = n_ptr;
264 l_ptr->checkpoint = 1;
265 l_ptr->peer_session = INVALID_SESSION; 266 l_ptr->peer_session = INVALID_SESSION;
266 l_ptr->bearer_id = b_ptr->identity; 267 l_ptr->bearer_id = b_ptr->identity;
267 link_set_supervision_props(l_ptr, b_ptr->tolerance); 268 link_set_supervision_props(l_ptr, b_ptr->tolerance);
@@ -280,7 +281,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
280 l_ptr->mtu = l_ptr->advertised_mtu; 281 l_ptr->mtu = l_ptr->advertised_mtu;
281 l_ptr->priority = b_ptr->priority; 282 l_ptr->priority = b_ptr->priority;
282 tipc_link_set_queue_limits(l_ptr, b_ptr->window); 283 tipc_link_set_queue_limits(l_ptr, b_ptr->window);
283 l_ptr->next_out_no = 1; 284 l_ptr->snd_nxt = 1;
284 __skb_queue_head_init(&l_ptr->transmq); 285 __skb_queue_head_init(&l_ptr->transmq);
285 __skb_queue_head_init(&l_ptr->backlogq); 286 __skb_queue_head_init(&l_ptr->backlogq);
286 __skb_queue_head_init(&l_ptr->deferdq); 287 __skb_queue_head_init(&l_ptr->deferdq);
@@ -311,8 +312,7 @@ void tipc_link_delete(struct tipc_link *l)
311 tipc_link_put(l); 312 tipc_link_put(l);
312} 313}
313 314
314void tipc_link_delete_list(struct net *net, unsigned int bearer_id, 315void tipc_link_delete_list(struct net *net, unsigned int bearer_id)
315 bool shutting_down)
316{ 316{
317 struct tipc_net *tn = net_generic(net, tipc_net_id); 317 struct tipc_net *tn = net_generic(net, tipc_net_id);
318 struct tipc_link *link; 318 struct tipc_link *link;
@@ -404,7 +404,7 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr)
404 l_ptr->reasm_buf = NULL; 404 l_ptr->reasm_buf = NULL;
405} 405}
406 406
407static void tipc_link_purge_backlog(struct tipc_link *l) 407void tipc_link_purge_backlog(struct tipc_link *l)
408{ 408{
409 __skb_queue_purge(&l->backlogq); 409 __skb_queue_purge(&l->backlogq);
410 l->backlog[TIPC_LOW_IMPORTANCE].len = 0; 410 l->backlog[TIPC_LOW_IMPORTANCE].len = 0;
@@ -451,9 +451,9 @@ void tipc_link_reset(struct tipc_link *l_ptr)
451 451
452 if (was_active_link && tipc_node_is_up(l_ptr->owner) && (pl != l_ptr)) { 452 if (was_active_link && tipc_node_is_up(l_ptr->owner) && (pl != l_ptr)) {
453 l_ptr->flags |= LINK_FAILINGOVER; 453 l_ptr->flags |= LINK_FAILINGOVER;
454 l_ptr->failover_checkpt = l_ptr->next_in_no; 454 l_ptr->failover_checkpt = l_ptr->rcv_nxt;
455 pl->failover_pkts = FIRST_FAILOVER; 455 pl->failover_pkts = FIRST_FAILOVER;
456 pl->failover_checkpt = l_ptr->next_in_no; 456 pl->failover_checkpt = l_ptr->rcv_nxt;
457 pl->failover_skb = l_ptr->reasm_buf; 457 pl->failover_skb = l_ptr->reasm_buf;
458 } else { 458 } else {
459 kfree_skb(l_ptr->reasm_buf); 459 kfree_skb(l_ptr->reasm_buf);
@@ -469,36 +469,19 @@ void tipc_link_reset(struct tipc_link *l_ptr)
469 tipc_link_purge_backlog(l_ptr); 469 tipc_link_purge_backlog(l_ptr);
470 l_ptr->reasm_buf = NULL; 470 l_ptr->reasm_buf = NULL;
471 l_ptr->rcv_unacked = 0; 471 l_ptr->rcv_unacked = 0;
472 l_ptr->checkpoint = 1; 472 l_ptr->snd_nxt = 1;
473 l_ptr->next_out_no = 1; 473 l_ptr->silent_intv_cnt = 0;
474 l_ptr->fsm_msg_cnt = 0;
475 l_ptr->stale_count = 0; 474 l_ptr->stale_count = 0;
476 link_reset_statistics(l_ptr); 475 link_reset_statistics(l_ptr);
477} 476}
478 477
479void tipc_link_reset_list(struct net *net, unsigned int bearer_id)
480{
481 struct tipc_net *tn = net_generic(net, tipc_net_id);
482 struct tipc_link *l_ptr;
483 struct tipc_node *n_ptr;
484
485 rcu_read_lock();
486 list_for_each_entry_rcu(n_ptr, &tn->node_list, list) {
487 tipc_node_lock(n_ptr);
488 l_ptr = n_ptr->links[bearer_id];
489 if (l_ptr)
490 tipc_link_reset(l_ptr);
491 tipc_node_unlock(n_ptr);
492 }
493 rcu_read_unlock();
494}
495
496static void link_activate(struct tipc_link *link) 478static void link_activate(struct tipc_link *link)
497{ 479{
498 struct tipc_node *node = link->owner; 480 struct tipc_node *node = link->owner;
499 481
500 link->next_in_no = 1; 482 link->rcv_nxt = 1;
501 link->stats.recv_info = 1; 483 link->stats.recv_info = 1;
484 link->silent_intv_cnt = 0;
502 tipc_node_link_up(node, link); 485 tipc_node_link_up(node, link);
503 tipc_bearer_add_dest(node->net, link->bearer_id, link->addr); 486 tipc_bearer_add_dest(node->net, link->bearer_id, link->addr);
504} 487}
@@ -511,7 +494,7 @@ static void link_activate(struct tipc_link *link)
511static void link_state_event(struct tipc_link *l_ptr, unsigned int event) 494static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
512{ 495{
513 struct tipc_link *other; 496 struct tipc_link *other;
514 unsigned long cont_intv = l_ptr->cont_intv; 497 unsigned long timer_intv = l_ptr->keepalive_intv;
515 498
516 if (l_ptr->flags & LINK_STOPPED) 499 if (l_ptr->flags & LINK_STOPPED)
517 return; 500 return;
@@ -519,45 +502,33 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
519 if (!(l_ptr->flags & LINK_STARTED) && (event != STARTING_EVT)) 502 if (!(l_ptr->flags & LINK_STARTED) && (event != STARTING_EVT))
520 return; /* Not yet. */ 503 return; /* Not yet. */
521 504
522 if (l_ptr->flags & LINK_FAILINGOVER) { 505 if (l_ptr->flags & LINK_FAILINGOVER)
523 if (event == TIMEOUT_EVT)
524 link_set_timer(l_ptr, cont_intv);
525 return; 506 return;
526 }
527 507
528 switch (l_ptr->state) { 508 switch (l_ptr->state) {
529 case WORKING_WORKING: 509 case WORKING_WORKING:
530 switch (event) { 510 switch (event) {
531 case TRAFFIC_MSG_EVT: 511 case TRAFFIC_MSG_EVT:
532 case ACTIVATE_MSG: 512 case ACTIVATE_MSG:
513 l_ptr->silent_intv_cnt = 0;
533 break; 514 break;
534 case TIMEOUT_EVT: 515 case SILENCE_EVT:
535 if (l_ptr->next_in_no != l_ptr->checkpoint) { 516 if (!l_ptr->silent_intv_cnt) {
536 l_ptr->checkpoint = l_ptr->next_in_no; 517 if (tipc_bclink_acks_missing(l_ptr->owner))
537 if (tipc_bclink_acks_missing(l_ptr->owner)) {
538 tipc_link_proto_xmit(l_ptr, STATE_MSG, 518 tipc_link_proto_xmit(l_ptr, STATE_MSG,
539 0, 0, 0, 0); 519 0, 0, 0, 0);
540 l_ptr->fsm_msg_cnt++;
541 }
542 link_set_timer(l_ptr, cont_intv);
543 break; 520 break;
544 } 521 }
545 l_ptr->state = WORKING_UNKNOWN; 522 l_ptr->state = WORKING_UNKNOWN;
546 l_ptr->fsm_msg_cnt = 0;
547 tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); 523 tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0);
548 l_ptr->fsm_msg_cnt++;
549 link_set_timer(l_ptr, cont_intv / 4);
550 break; 524 break;
551 case RESET_MSG: 525 case RESET_MSG:
552 pr_debug("%s<%s>, requested by peer\n", 526 pr_debug("%s<%s>, requested by peer\n",
553 link_rst_msg, l_ptr->name); 527 link_rst_msg, l_ptr->name);
554 tipc_link_reset(l_ptr); 528 tipc_link_reset(l_ptr);
555 l_ptr->state = RESET_RESET; 529 l_ptr->state = RESET_RESET;
556 l_ptr->fsm_msg_cnt = 0;
557 tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, 530 tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
558 0, 0, 0, 0); 531 0, 0, 0, 0);
559 l_ptr->fsm_msg_cnt++;
560 link_set_timer(l_ptr, cont_intv);
561 break; 532 break;
562 default: 533 default:
563 pr_debug("%s%u in WW state\n", link_unk_evt, event); 534 pr_debug("%s%u in WW state\n", link_unk_evt, event);
@@ -568,46 +539,33 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
568 case TRAFFIC_MSG_EVT: 539 case TRAFFIC_MSG_EVT:
569 case ACTIVATE_MSG: 540 case ACTIVATE_MSG:
570 l_ptr->state = WORKING_WORKING; 541 l_ptr->state = WORKING_WORKING;
571 l_ptr->fsm_msg_cnt = 0; 542 l_ptr->silent_intv_cnt = 0;
572 link_set_timer(l_ptr, cont_intv);
573 break; 543 break;
574 case RESET_MSG: 544 case RESET_MSG:
575 pr_debug("%s<%s>, requested by peer while probing\n", 545 pr_debug("%s<%s>, requested by peer while probing\n",
576 link_rst_msg, l_ptr->name); 546 link_rst_msg, l_ptr->name);
577 tipc_link_reset(l_ptr); 547 tipc_link_reset(l_ptr);
578 l_ptr->state = RESET_RESET; 548 l_ptr->state = RESET_RESET;
579 l_ptr->fsm_msg_cnt = 0;
580 tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, 549 tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
581 0, 0, 0, 0); 550 0, 0, 0, 0);
582 l_ptr->fsm_msg_cnt++;
583 link_set_timer(l_ptr, cont_intv);
584 break; 551 break;
585 case TIMEOUT_EVT: 552 case SILENCE_EVT:
586 if (l_ptr->next_in_no != l_ptr->checkpoint) { 553 if (!l_ptr->silent_intv_cnt) {
587 l_ptr->state = WORKING_WORKING; 554 l_ptr->state = WORKING_WORKING;
588 l_ptr->fsm_msg_cnt = 0; 555 if (tipc_bclink_acks_missing(l_ptr->owner))
589 l_ptr->checkpoint = l_ptr->next_in_no;
590 if (tipc_bclink_acks_missing(l_ptr->owner)) {
591 tipc_link_proto_xmit(l_ptr, STATE_MSG, 556 tipc_link_proto_xmit(l_ptr, STATE_MSG,
592 0, 0, 0, 0); 557 0, 0, 0, 0);
593 l_ptr->fsm_msg_cnt++; 558 } else if (l_ptr->silent_intv_cnt <
594 } 559 l_ptr->abort_limit) {
595 link_set_timer(l_ptr, cont_intv);
596 } else if (l_ptr->fsm_msg_cnt < l_ptr->abort_limit) {
597 tipc_link_proto_xmit(l_ptr, STATE_MSG, 560 tipc_link_proto_xmit(l_ptr, STATE_MSG,
598 1, 0, 0, 0); 561 1, 0, 0, 0);
599 l_ptr->fsm_msg_cnt++;
600 link_set_timer(l_ptr, cont_intv / 4);
601 } else { /* Link has failed */ 562 } else { /* Link has failed */
602 pr_debug("%s<%s>, peer not responding\n", 563 pr_debug("%s<%s>, peer not responding\n",
603 link_rst_msg, l_ptr->name); 564 link_rst_msg, l_ptr->name);
604 tipc_link_reset(l_ptr); 565 tipc_link_reset(l_ptr);
605 l_ptr->state = RESET_UNKNOWN; 566 l_ptr->state = RESET_UNKNOWN;
606 l_ptr->fsm_msg_cnt = 0;
607 tipc_link_proto_xmit(l_ptr, RESET_MSG, 567 tipc_link_proto_xmit(l_ptr, RESET_MSG,
608 0, 0, 0, 0); 568 0, 0, 0, 0);
609 l_ptr->fsm_msg_cnt++;
610 link_set_timer(l_ptr, cont_intv);
611 } 569 }
612 break; 570 break;
613 default: 571 default:
@@ -623,31 +581,22 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
623 if (other && link_working_unknown(other)) 581 if (other && link_working_unknown(other))
624 break; 582 break;
625 l_ptr->state = WORKING_WORKING; 583 l_ptr->state = WORKING_WORKING;
626 l_ptr->fsm_msg_cnt = 0;
627 link_activate(l_ptr); 584 link_activate(l_ptr);
628 tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); 585 tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0);
629 l_ptr->fsm_msg_cnt++;
630 if (l_ptr->owner->working_links == 1) 586 if (l_ptr->owner->working_links == 1)
631 tipc_link_sync_xmit(l_ptr); 587 tipc_link_sync_xmit(l_ptr);
632 link_set_timer(l_ptr, cont_intv);
633 break; 588 break;
634 case RESET_MSG: 589 case RESET_MSG:
635 l_ptr->state = RESET_RESET; 590 l_ptr->state = RESET_RESET;
636 l_ptr->fsm_msg_cnt = 0;
637 tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, 591 tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
638 1, 0, 0, 0); 592 1, 0, 0, 0);
639 l_ptr->fsm_msg_cnt++;
640 link_set_timer(l_ptr, cont_intv);
641 break; 593 break;
642 case STARTING_EVT: 594 case STARTING_EVT:
643 l_ptr->flags |= LINK_STARTED; 595 l_ptr->flags |= LINK_STARTED;
644 l_ptr->fsm_msg_cnt++; 596 link_set_timer(l_ptr, timer_intv);
645 link_set_timer(l_ptr, cont_intv);
646 break; 597 break;
647 case TIMEOUT_EVT: 598 case SILENCE_EVT:
648 tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0); 599 tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0);
649 l_ptr->fsm_msg_cnt++;
650 link_set_timer(l_ptr, cont_intv);
651 break; 600 break;
652 default: 601 default:
653 pr_err("%s%u in RU state\n", link_unk_evt, event); 602 pr_err("%s%u in RU state\n", link_unk_evt, event);
@@ -661,21 +610,16 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
661 if (other && link_working_unknown(other)) 610 if (other && link_working_unknown(other))
662 break; 611 break;
663 l_ptr->state = WORKING_WORKING; 612 l_ptr->state = WORKING_WORKING;
664 l_ptr->fsm_msg_cnt = 0;
665 link_activate(l_ptr); 613 link_activate(l_ptr);
666 tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); 614 tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0);
667 l_ptr->fsm_msg_cnt++;
668 if (l_ptr->owner->working_links == 1) 615 if (l_ptr->owner->working_links == 1)
669 tipc_link_sync_xmit(l_ptr); 616 tipc_link_sync_xmit(l_ptr);
670 link_set_timer(l_ptr, cont_intv);
671 break; 617 break;
672 case RESET_MSG: 618 case RESET_MSG:
673 break; 619 break;
674 case TIMEOUT_EVT: 620 case SILENCE_EVT:
675 tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, 621 tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
676 0, 0, 0, 0); 622 0, 0, 0, 0);
677 l_ptr->fsm_msg_cnt++;
678 link_set_timer(l_ptr, cont_intv);
679 break; 623 break;
680 default: 624 default:
681 pr_err("%s%u in RR state\n", link_unk_evt, event); 625 pr_err("%s%u in RR state\n", link_unk_evt, event);
@@ -701,53 +645,58 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link,
701{ 645{
702 struct tipc_msg *msg = buf_msg(skb_peek(list)); 646 struct tipc_msg *msg = buf_msg(skb_peek(list));
703 unsigned int maxwin = link->window; 647 unsigned int maxwin = link->window;
704 unsigned int imp = msg_importance(msg); 648 unsigned int i, imp = msg_importance(msg);
705 uint mtu = link->mtu; 649 uint mtu = link->mtu;
706 uint ack = mod(link->next_in_no - 1); 650 u16 ack = mod(link->rcv_nxt - 1);
707 uint seqno = link->next_out_no; 651 u16 seqno = link->snd_nxt;
708 uint bc_last_in = link->owner->bclink.last_in; 652 u16 bc_last_in = link->owner->bclink.last_in;
709 struct tipc_media_addr *addr = &link->media_addr; 653 struct tipc_media_addr *addr = &link->media_addr;
710 struct sk_buff_head *transmq = &link->transmq; 654 struct sk_buff_head *transmq = &link->transmq;
711 struct sk_buff_head *backlogq = &link->backlogq; 655 struct sk_buff_head *backlogq = &link->backlogq;
712 struct sk_buff *skb, *tmp; 656 struct sk_buff *skb, *bskb;
713
714 /* Match backlog limit against msg importance: */
715 if (unlikely(link->backlog[imp].len >= link->backlog[imp].limit))
716 return link_schedule_user(link, list);
717 657
658 /* Match msg importance against this and all higher backlog limits: */
659 for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
660 if (unlikely(link->backlog[i].len >= link->backlog[i].limit))
661 return link_schedule_user(link, list);
662 }
718 if (unlikely(msg_size(msg) > mtu)) { 663 if (unlikely(msg_size(msg) > mtu)) {
719 __skb_queue_purge(list); 664 __skb_queue_purge(list);
720 return -EMSGSIZE; 665 return -EMSGSIZE;
721 } 666 }
722 /* Prepare each packet for sending, and add to relevant queue: */ 667 /* Prepare each packet for sending, and add to relevant queue: */
723 skb_queue_walk_safe(list, skb, tmp) { 668 while (skb_queue_len(list)) {
724 __skb_unlink(skb, list); 669 skb = skb_peek(list);
725 msg = buf_msg(skb); 670 msg = buf_msg(skb);
726 msg_set_seqno(msg, seqno); 671 msg_set_seqno(msg, seqno);
727 msg_set_ack(msg, ack); 672 msg_set_ack(msg, ack);
728 msg_set_bcast_ack(msg, bc_last_in); 673 msg_set_bcast_ack(msg, bc_last_in);
729 674
730 if (likely(skb_queue_len(transmq) < maxwin)) { 675 if (likely(skb_queue_len(transmq) < maxwin)) {
676 __skb_dequeue(list);
731 __skb_queue_tail(transmq, skb); 677 __skb_queue_tail(transmq, skb);
732 tipc_bearer_send(net, link->bearer_id, skb, addr); 678 tipc_bearer_send(net, link->bearer_id, skb, addr);
733 link->rcv_unacked = 0; 679 link->rcv_unacked = 0;
734 seqno++; 680 seqno++;
735 continue; 681 continue;
736 } 682 }
737 if (tipc_msg_bundle(skb_peek_tail(backlogq), skb, mtu)) { 683 if (tipc_msg_bundle(skb_peek_tail(backlogq), msg, mtu)) {
684 kfree_skb(__skb_dequeue(list));
738 link->stats.sent_bundled++; 685 link->stats.sent_bundled++;
739 continue; 686 continue;
740 } 687 }
741 if (tipc_msg_make_bundle(&skb, mtu, link->addr)) { 688 if (tipc_msg_make_bundle(&bskb, msg, mtu, link->addr)) {
689 kfree_skb(__skb_dequeue(list));
690 __skb_queue_tail(backlogq, bskb);
691 link->backlog[msg_importance(buf_msg(bskb))].len++;
742 link->stats.sent_bundled++; 692 link->stats.sent_bundled++;
743 link->stats.sent_bundles++; 693 link->stats.sent_bundles++;
744 imp = msg_importance(buf_msg(skb)); 694 continue;
745 } 695 }
746 __skb_queue_tail(backlogq, skb); 696 link->backlog[imp].len += skb_queue_len(list);
747 link->backlog[imp].len++; 697 skb_queue_splice_tail_init(list, backlogq);
748 seqno++;
749 } 698 }
750 link->next_out_no = seqno; 699 link->snd_nxt = seqno;
751 return 0; 700 return 0;
752} 701}
753 702
@@ -877,7 +826,8 @@ void tipc_link_push_packets(struct tipc_link *link)
877{ 826{
878 struct sk_buff *skb; 827 struct sk_buff *skb;
879 struct tipc_msg *msg; 828 struct tipc_msg *msg;
880 unsigned int ack = mod(link->next_in_no - 1); 829 u16 seqno = link->snd_nxt;
830 u16 ack = mod(link->rcv_nxt - 1);
881 831
882 while (skb_queue_len(&link->transmq) < link->window) { 832 while (skb_queue_len(&link->transmq) < link->window) {
883 skb = __skb_dequeue(&link->backlogq); 833 skb = __skb_dequeue(&link->backlogq);
@@ -886,12 +836,15 @@ void tipc_link_push_packets(struct tipc_link *link)
886 msg = buf_msg(skb); 836 msg = buf_msg(skb);
887 link->backlog[msg_importance(msg)].len--; 837 link->backlog[msg_importance(msg)].len--;
888 msg_set_ack(msg, ack); 838 msg_set_ack(msg, ack);
839 msg_set_seqno(msg, seqno);
840 seqno = mod(seqno + 1);
889 msg_set_bcast_ack(msg, link->owner->bclink.last_in); 841 msg_set_bcast_ack(msg, link->owner->bclink.last_in);
890 link->rcv_unacked = 0; 842 link->rcv_unacked = 0;
891 __skb_queue_tail(&link->transmq, skb); 843 __skb_queue_tail(&link->transmq, skb);
892 tipc_bearer_send(link->owner->net, link->bearer_id, 844 tipc_bearer_send(link->owner->net, link->bearer_id,
893 skb, &link->media_addr); 845 skb, &link->media_addr);
894 } 846 }
847 link->snd_nxt = seqno;
895} 848}
896 849
897void tipc_link_reset_all(struct tipc_node *node) 850void tipc_link_reset_all(struct tipc_node *node)
@@ -964,13 +917,13 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb,
964 msg = buf_msg(skb); 917 msg = buf_msg(skb);
965 918
966 /* Detect repeated retransmit failures */ 919 /* Detect repeated retransmit failures */
967 if (l_ptr->last_retransmitted == msg_seqno(msg)) { 920 if (l_ptr->last_retransm == msg_seqno(msg)) {
968 if (++l_ptr->stale_count > 100) { 921 if (++l_ptr->stale_count > 100) {
969 link_retransmit_failure(l_ptr, skb); 922 link_retransmit_failure(l_ptr, skb);
970 return; 923 return;
971 } 924 }
972 } else { 925 } else {
973 l_ptr->last_retransmitted = msg_seqno(msg); 926 l_ptr->last_retransm = msg_seqno(msg);
974 l_ptr->stale_count = 1; 927 l_ptr->stale_count = 1;
975 } 928 }
976 929
@@ -978,7 +931,7 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb,
978 if (!retransmits) 931 if (!retransmits)
979 break; 932 break;
980 msg = buf_msg(skb); 933 msg = buf_msg(skb);
981 msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); 934 msg_set_ack(msg, mod(l_ptr->rcv_nxt - 1));
982 msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); 935 msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
983 tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, skb, 936 tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, skb,
984 &l_ptr->media_addr); 937 &l_ptr->media_addr);
@@ -1001,11 +954,11 @@ static bool link_synch(struct tipc_link *l)
1001 goto synched; 954 goto synched;
1002 955
1003 /* Was last pre-synch packet added to input queue ? */ 956 /* Was last pre-synch packet added to input queue ? */
1004 if (less_eq(pl->next_in_no, l->synch_point)) 957 if (less_eq(pl->rcv_nxt, l->synch_point))
1005 return false; 958 return false;
1006 959
1007 /* Is it still in the input queue ? */ 960 /* Is it still in the input queue ? */
1008 post_synch = mod(pl->next_in_no - l->synch_point) - 1; 961 post_synch = mod(pl->rcv_nxt - l->synch_point) - 1;
1009 if (skb_queue_len(&pl->inputq) > post_synch) 962 if (skb_queue_len(&pl->inputq) > post_synch)
1010 return false; 963 return false;
1011synched: 964synched:
@@ -1016,13 +969,13 @@ synched:
1016static void link_retrieve_defq(struct tipc_link *link, 969static void link_retrieve_defq(struct tipc_link *link,
1017 struct sk_buff_head *list) 970 struct sk_buff_head *list)
1018{ 971{
1019 u32 seq_no; 972 u16 seq_no;
1020 973
1021 if (skb_queue_empty(&link->deferdq)) 974 if (skb_queue_empty(&link->deferdq))
1022 return; 975 return;
1023 976
1024 seq_no = buf_seqno(skb_peek(&link->deferdq)); 977 seq_no = buf_seqno(skb_peek(&link->deferdq));
1025 if (seq_no == mod(link->next_in_no)) 978 if (seq_no == link->rcv_nxt)
1026 skb_queue_splice_tail_init(&link->deferdq, list); 979 skb_queue_splice_tail_init(&link->deferdq, list);
1027} 980}
1028 981
@@ -1043,8 +996,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
1043 struct tipc_link *l_ptr; 996 struct tipc_link *l_ptr;
1044 struct sk_buff *skb1, *tmp; 997 struct sk_buff *skb1, *tmp;
1045 struct tipc_msg *msg; 998 struct tipc_msg *msg;
1046 u32 seq_no; 999 u16 seq_no;
1047 u32 ackd; 1000 u16 ackd;
1048 u32 released; 1001 u32 released;
1049 1002
1050 skb2list(skb, &head); 1003 skb2list(skb, &head);
@@ -1137,18 +1090,20 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
1137 } 1090 }
1138 1091
1139 /* Link is now in state WORKING_WORKING */ 1092 /* Link is now in state WORKING_WORKING */
1140 if (unlikely(seq_no != mod(l_ptr->next_in_no))) { 1093 if (unlikely(seq_no != l_ptr->rcv_nxt)) {
1141 link_handle_out_of_seq_msg(l_ptr, skb); 1094 link_handle_out_of_seq_msg(l_ptr, skb);
1142 link_retrieve_defq(l_ptr, &head); 1095 link_retrieve_defq(l_ptr, &head);
1143 skb = NULL; 1096 skb = NULL;
1144 goto unlock; 1097 goto unlock;
1145 } 1098 }
1099 l_ptr->silent_intv_cnt = 0;
1100
1146 /* Synchronize with parallel link if applicable */ 1101 /* Synchronize with parallel link if applicable */
1147 if (unlikely((l_ptr->flags & LINK_SYNCHING) && !msg_dup(msg))) { 1102 if (unlikely((l_ptr->flags & LINK_SYNCHING) && !msg_dup(msg))) {
1148 if (!link_synch(l_ptr)) 1103 if (!link_synch(l_ptr))
1149 goto unlock; 1104 goto unlock;
1150 } 1105 }
1151 l_ptr->next_in_no++; 1106 l_ptr->rcv_nxt++;
1152 if (unlikely(!skb_queue_empty(&l_ptr->deferdq))) 1107 if (unlikely(!skb_queue_empty(&l_ptr->deferdq)))
1153 link_retrieve_defq(l_ptr, &head); 1108 link_retrieve_defq(l_ptr, &head);
1154 if (unlikely(++l_ptr->rcv_unacked >= TIPC_MIN_LINK_WIN)) { 1109 if (unlikely(++l_ptr->rcv_unacked >= TIPC_MIN_LINK_WIN)) {
@@ -1268,7 +1223,7 @@ static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb)
1268u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb) 1223u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb)
1269{ 1224{
1270 struct sk_buff *skb1; 1225 struct sk_buff *skb1;
1271 u32 seq_no = buf_seqno(skb); 1226 u16 seq_no = buf_seqno(skb);
1272 1227
1273 /* Empty queue ? */ 1228 /* Empty queue ? */
1274 if (skb_queue_empty(list)) { 1229 if (skb_queue_empty(list)) {
@@ -1284,7 +1239,7 @@ u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb)
1284 1239
1285 /* Locate insertion point in queue, then insert; discard if duplicate */ 1240 /* Locate insertion point in queue, then insert; discard if duplicate */
1286 skb_queue_walk(list, skb1) { 1241 skb_queue_walk(list, skb1) {
1287 u32 curr_seqno = buf_seqno(skb1); 1242 u16 curr_seqno = buf_seqno(skb1);
1288 1243
1289 if (seq_no == curr_seqno) { 1244 if (seq_no == curr_seqno) {
1290 kfree_skb(skb); 1245 kfree_skb(skb);
@@ -1312,14 +1267,14 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
1312 return; 1267 return;
1313 } 1268 }
1314 1269
1315 /* Record OOS packet arrival (force mismatch on next timeout) */ 1270 /* Record OOS packet arrival */
1316 l_ptr->checkpoint--; 1271 l_ptr->silent_intv_cnt = 0;
1317 1272
1318 /* 1273 /*
1319 * Discard packet if a duplicate; otherwise add it to deferred queue 1274 * Discard packet if a duplicate; otherwise add it to deferred queue
1320 * and notify peer of gap as per protocol specification 1275 * and notify peer of gap as per protocol specification
1321 */ 1276 */
1322 if (less(seq_no, mod(l_ptr->next_in_no))) { 1277 if (less(seq_no, l_ptr->rcv_nxt)) {
1323 l_ptr->stats.duplicates++; 1278 l_ptr->stats.duplicates++;
1324 kfree_skb(buf); 1279 kfree_skb(buf);
1325 return; 1280 return;
@@ -1344,6 +1299,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
1344 struct tipc_msg *msg = l_ptr->pmsg; 1299 struct tipc_msg *msg = l_ptr->pmsg;
1345 u32 msg_size = sizeof(l_ptr->proto_msg); 1300 u32 msg_size = sizeof(l_ptr->proto_msg);
1346 int r_flag; 1301 int r_flag;
1302 u16 last_rcv;
1347 1303
1348 /* Don't send protocol message during link failover */ 1304 /* Don't send protocol message during link failover */
1349 if (l_ptr->flags & LINK_FAILINGOVER) 1305 if (l_ptr->flags & LINK_FAILINGOVER)
@@ -1360,16 +1316,14 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
1360 msg_set_last_bcast(msg, tipc_bclink_get_last_sent(l_ptr->owner->net)); 1316 msg_set_last_bcast(msg, tipc_bclink_get_last_sent(l_ptr->owner->net));
1361 1317
1362 if (msg_typ == STATE_MSG) { 1318 if (msg_typ == STATE_MSG) {
1363 u32 next_sent = mod(l_ptr->next_out_no); 1319 u16 next_sent = l_ptr->snd_nxt;
1364 1320
1365 if (!tipc_link_is_up(l_ptr)) 1321 if (!tipc_link_is_up(l_ptr))
1366 return; 1322 return;
1367 if (skb_queue_len(&l_ptr->backlogq))
1368 next_sent = buf_seqno(skb_peek(&l_ptr->backlogq));
1369 msg_set_next_sent(msg, next_sent); 1323 msg_set_next_sent(msg, next_sent);
1370 if (!skb_queue_empty(&l_ptr->deferdq)) { 1324 if (!skb_queue_empty(&l_ptr->deferdq)) {
1371 u32 rec = buf_seqno(skb_peek(&l_ptr->deferdq)); 1325 last_rcv = buf_seqno(skb_peek(&l_ptr->deferdq));
1372 gap = mod(rec - mod(l_ptr->next_in_no)); 1326 gap = mod(last_rcv - l_ptr->rcv_nxt);
1373 } 1327 }
1374 msg_set_seq_gap(msg, gap); 1328 msg_set_seq_gap(msg, gap);
1375 if (gap) 1329 if (gap)
@@ -1377,7 +1331,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
1377 msg_set_link_tolerance(msg, tolerance); 1331 msg_set_link_tolerance(msg, tolerance);
1378 msg_set_linkprio(msg, priority); 1332 msg_set_linkprio(msg, priority);
1379 msg_set_max_pkt(msg, l_ptr->mtu); 1333 msg_set_max_pkt(msg, l_ptr->mtu);
1380 msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); 1334 msg_set_ack(msg, mod(l_ptr->rcv_nxt - 1));
1381 msg_set_probe(msg, probe_msg != 0); 1335 msg_set_probe(msg, probe_msg != 0);
1382 if (probe_msg) 1336 if (probe_msg)
1383 l_ptr->stats.sent_probes++; 1337 l_ptr->stats.sent_probes++;
@@ -1397,7 +1351,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
1397 msg_set_linkprio(msg, l_ptr->priority); 1351 msg_set_linkprio(msg, l_ptr->priority);
1398 msg_set_size(msg, msg_size); 1352 msg_set_size(msg, msg_size);
1399 1353
1400 msg_set_seqno(msg, mod(l_ptr->next_out_no + (0xffff/2))); 1354 msg_set_seqno(msg, mod(l_ptr->snd_nxt + (0xffff / 2)));
1401 1355
1402 buf = tipc_buf_acquire(msg_size); 1356 buf = tipc_buf_acquire(msg_size);
1403 if (!buf) 1357 if (!buf)
@@ -1496,17 +1450,15 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr,
1496 } 1450 }
1497 1451
1498 /* Record reception; force mismatch at next timeout: */ 1452 /* Record reception; force mismatch at next timeout: */
1499 l_ptr->checkpoint--; 1453 l_ptr->silent_intv_cnt = 0;
1500 1454
1501 link_state_event(l_ptr, TRAFFIC_MSG_EVT); 1455 link_state_event(l_ptr, TRAFFIC_MSG_EVT);
1502 l_ptr->stats.recv_states++; 1456 l_ptr->stats.recv_states++;
1503 if (link_reset_unknown(l_ptr)) 1457 if (link_reset_unknown(l_ptr))
1504 break; 1458 break;
1505 1459
1506 if (less_eq(mod(l_ptr->next_in_no), msg_next_sent(msg))) { 1460 if (less_eq(l_ptr->rcv_nxt, msg_next_sent(msg)))
1507 rec_gap = mod(msg_next_sent(msg) - 1461 rec_gap = mod(msg_next_sent(msg) - l_ptr->rcv_nxt);
1508 mod(l_ptr->next_in_no));
1509 }
1510 1462
1511 if (msg_probe(msg)) 1463 if (msg_probe(msg))
1512 l_ptr->stats.recv_probes++; 1464 l_ptr->stats.recv_probes++;
@@ -1580,6 +1532,11 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr)
1580 1532
1581 tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, TUNNEL_PROTOCOL, 1533 tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, TUNNEL_PROTOCOL,
1582 FAILOVER_MSG, INT_H_SIZE, l_ptr->addr); 1534 FAILOVER_MSG, INT_H_SIZE, l_ptr->addr);
1535
1536 skb_queue_walk(&l_ptr->backlogq, skb) {
1537 msg_set_seqno(buf_msg(skb), l_ptr->snd_nxt);
1538 l_ptr->snd_nxt = mod(l_ptr->snd_nxt + 1);
1539 }
1583 skb_queue_splice_tail_init(&l_ptr->backlogq, &l_ptr->transmq); 1540 skb_queue_splice_tail_init(&l_ptr->backlogq, &l_ptr->transmq);
1584 tipc_link_purge_backlog(l_ptr); 1541 tipc_link_purge_backlog(l_ptr);
1585 msgcount = skb_queue_len(&l_ptr->transmq); 1542 msgcount = skb_queue_len(&l_ptr->transmq);
@@ -1640,6 +1597,7 @@ void tipc_link_dup_queue_xmit(struct tipc_link *link,
1640 struct tipc_msg tnl_hdr; 1597 struct tipc_msg tnl_hdr;
1641 struct sk_buff_head *queue = &link->transmq; 1598 struct sk_buff_head *queue = &link->transmq;
1642 int mcnt; 1599 int mcnt;
1600 u16 seqno;
1643 1601
1644 tipc_msg_init(link_own_addr(link), &tnl_hdr, TUNNEL_PROTOCOL, 1602 tipc_msg_init(link_own_addr(link), &tnl_hdr, TUNNEL_PROTOCOL,
1645 SYNCH_MSG, INT_H_SIZE, link->addr); 1603 SYNCH_MSG, INT_H_SIZE, link->addr);
@@ -1653,7 +1611,7 @@ tunnel_queue:
1653 struct tipc_msg *msg = buf_msg(skb); 1611 struct tipc_msg *msg = buf_msg(skb);
1654 u32 len = msg_size(msg); 1612 u32 len = msg_size(msg);
1655 1613
1656 msg_set_ack(msg, mod(link->next_in_no - 1)); 1614 msg_set_ack(msg, mod(link->rcv_nxt - 1));
1657 msg_set_bcast_ack(msg, link->owner->bclink.last_in); 1615 msg_set_bcast_ack(msg, link->owner->bclink.last_in);
1658 msg_set_size(&tnl_hdr, len + INT_H_SIZE); 1616 msg_set_size(&tnl_hdr, len + INT_H_SIZE);
1659 outskb = tipc_buf_acquire(len + INT_H_SIZE); 1617 outskb = tipc_buf_acquire(len + INT_H_SIZE);
@@ -1671,6 +1629,11 @@ tunnel_queue:
1671 } 1629 }
1672 if (queue == &link->backlogq) 1630 if (queue == &link->backlogq)
1673 return; 1631 return;
1632 seqno = link->snd_nxt;
1633 skb_queue_walk(&link->backlogq, skb) {
1634 msg_set_seqno(buf_msg(skb), seqno);
1635 seqno = mod(seqno + 1);
1636 }
1674 queue = &link->backlogq; 1637 queue = &link->backlogq;
1675 goto tunnel_queue; 1638 goto tunnel_queue;
1676} 1639}
@@ -1742,8 +1705,8 @@ static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol)
1742 return; 1705 return;
1743 1706
1744 l_ptr->tolerance = tol; 1707 l_ptr->tolerance = tol;
1745 l_ptr->cont_intv = msecs_to_jiffies(intv); 1708 l_ptr->keepalive_intv = msecs_to_jiffies(intv);
1746 l_ptr->abort_limit = tol / (jiffies_to_msecs(l_ptr->cont_intv) / 4); 1709 l_ptr->abort_limit = tol / (jiffies_to_msecs(l_ptr->keepalive_intv));
1747} 1710}
1748 1711
1749void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) 1712void tipc_link_set_queue_limits(struct tipc_link *l, u32 win)
@@ -1803,8 +1766,8 @@ static struct tipc_node *tipc_link_find_owner(struct net *net,
1803static void link_reset_statistics(struct tipc_link *l_ptr) 1766static void link_reset_statistics(struct tipc_link *l_ptr)
1804{ 1767{
1805 memset(&l_ptr->stats, 0, sizeof(l_ptr->stats)); 1768 memset(&l_ptr->stats, 0, sizeof(l_ptr->stats));
1806 l_ptr->stats.sent_info = l_ptr->next_out_no; 1769 l_ptr->stats.sent_info = l_ptr->snd_nxt;
1807 l_ptr->stats.recv_info = l_ptr->next_in_no; 1770 l_ptr->stats.recv_info = l_ptr->rcv_nxt;
1808} 1771}
1809 1772
1810static void link_print(struct tipc_link *l_ptr, const char *str) 1773static void link_print(struct tipc_link *l_ptr, const char *str)
@@ -1893,6 +1856,9 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info)
1893 1856
1894 name = nla_data(attrs[TIPC_NLA_LINK_NAME]); 1857 name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
1895 1858
1859 if (strcmp(name, tipc_bclink_name) == 0)
1860 return tipc_nl_bc_link_set(net, attrs);
1861
1896 node = tipc_link_find_owner(net, name, &bearer_id); 1862 node = tipc_link_find_owner(net, name, &bearer_id);
1897 if (!node) 1863 if (!node)
1898 return -EINVAL; 1864 return -EINVAL;
@@ -2034,9 +2000,9 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
2034 goto attr_msg_full; 2000 goto attr_msg_full;
2035 if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu)) 2001 if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu))
2036 goto attr_msg_full; 2002 goto attr_msg_full;
2037 if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->next_in_no)) 2003 if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->rcv_nxt))
2038 goto attr_msg_full; 2004 goto attr_msg_full;
2039 if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, link->next_out_no)) 2005 if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, link->snd_nxt))
2040 goto attr_msg_full; 2006 goto attr_msg_full;
2041 2007
2042 if (tipc_link_is_up(link)) 2008 if (tipc_link_is_up(link))
@@ -2175,50 +2141,53 @@ out:
2175int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) 2141int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info)
2176{ 2142{
2177 struct net *net = genl_info_net(info); 2143 struct net *net = genl_info_net(info);
2178 struct sk_buff *ans_skb;
2179 struct tipc_nl_msg msg; 2144 struct tipc_nl_msg msg;
2180 struct tipc_link *link;
2181 struct tipc_node *node;
2182 char *name; 2145 char *name;
2183 int bearer_id;
2184 int err; 2146 int err;
2185 2147
2148 msg.portid = info->snd_portid;
2149 msg.seq = info->snd_seq;
2150
2186 if (!info->attrs[TIPC_NLA_LINK_NAME]) 2151 if (!info->attrs[TIPC_NLA_LINK_NAME])
2187 return -EINVAL; 2152 return -EINVAL;
2188
2189 name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]); 2153 name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]);
2190 node = tipc_link_find_owner(net, name, &bearer_id);
2191 if (!node)
2192 return -EINVAL;
2193 2154
2194 ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); 2155 msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
2195 if (!ans_skb) 2156 if (!msg.skb)
2196 return -ENOMEM; 2157 return -ENOMEM;
2197 2158
2198 msg.skb = ans_skb; 2159 if (strcmp(name, tipc_bclink_name) == 0) {
2199 msg.portid = info->snd_portid; 2160 err = tipc_nl_add_bc_link(net, &msg);
2200 msg.seq = info->snd_seq; 2161 if (err) {
2201 2162 nlmsg_free(msg.skb);
2202 tipc_node_lock(node); 2163 return err;
2203 link = node->links[bearer_id]; 2164 }
2204 if (!link) { 2165 } else {
2205 err = -EINVAL; 2166 int bearer_id;
2206 goto err_out; 2167 struct tipc_node *node;
2207 } 2168 struct tipc_link *link;
2208
2209 err = __tipc_nl_add_link(net, &msg, link, 0);
2210 if (err)
2211 goto err_out;
2212 2169
2213 tipc_node_unlock(node); 2170 node = tipc_link_find_owner(net, name, &bearer_id);
2171 if (!node)
2172 return -EINVAL;
2214 2173
2215 return genlmsg_reply(ans_skb, info); 2174 tipc_node_lock(node);
2175 link = node->links[bearer_id];
2176 if (!link) {
2177 tipc_node_unlock(node);
2178 nlmsg_free(msg.skb);
2179 return -EINVAL;
2180 }
2216 2181
2217err_out: 2182 err = __tipc_nl_add_link(net, &msg, link, 0);
2218 tipc_node_unlock(node); 2183 tipc_node_unlock(node);
2219 nlmsg_free(ans_skb); 2184 if (err) {
2185 nlmsg_free(msg.skb);
2186 return err;
2187 }
2188 }
2220 2189
2221 return err; 2190 return genlmsg_reply(msg.skb, info);
2222} 2191}
2223 2192
2224int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) 2193int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info)
diff --git a/net/tipc/link.h b/net/tipc/link.h
index b5b4e3554d4e..ae0a0ea572f2 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -107,30 +107,29 @@ struct tipc_stats {
107 * @owner: pointer to peer node 107 * @owner: pointer to peer node
108 * @refcnt: reference counter for permanent references (owner node & timer) 108 * @refcnt: reference counter for permanent references (owner node & timer)
109 * @flags: execution state flags for link endpoint instance 109 * @flags: execution state flags for link endpoint instance
110 * @checkpoint: reference point for triggering link continuity checking
111 * @peer_session: link session # being used by peer end of link 110 * @peer_session: link session # being used by peer end of link
112 * @peer_bearer_id: bearer id used by link's peer endpoint 111 * @peer_bearer_id: bearer id used by link's peer endpoint
113 * @bearer_id: local bearer id used by link 112 * @bearer_id: local bearer id used by link
114 * @tolerance: minimum link continuity loss needed to reset link [in ms] 113 * @tolerance: minimum link continuity loss needed to reset link [in ms]
115 * @cont_intv: link continuity testing interval 114 * @keepalive_intv: link keepalive timer interval
116 * @abort_limit: # of unacknowledged continuity probes needed to reset link 115 * @abort_limit: # of unacknowledged continuity probes needed to reset link
117 * @state: current state of link FSM 116 * @state: current state of link FSM
118 * @fsm_msg_cnt: # of protocol messages link FSM has sent in current state 117 * @silent_intv_cnt: # of timer intervals without any reception from peer
119 * @proto_msg: template for control messages generated by link 118 * @proto_msg: template for control messages generated by link
120 * @pmsg: convenience pointer to "proto_msg" field 119 * @pmsg: convenience pointer to "proto_msg" field
121 * @priority: current link priority 120 * @priority: current link priority
122 * @net_plane: current link network plane ('A' through 'H') 121 * @net_plane: current link network plane ('A' through 'H')
123 * @backlog_limit: backlog queue congestion thresholds (indexed by importance) 122 * @backlog_limit: backlog queue congestion thresholds (indexed by importance)
124 * @exp_msg_count: # of tunnelled messages expected during link changeover 123 * @exp_msg_count: # of tunnelled messages expected during link changeover
125 * @reset_checkpoint: seq # of last acknowledged message at time of link reset 124 * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset
126 * @mtu: current maximum packet size for this link 125 * @mtu: current maximum packet size for this link
127 * @advertised_mtu: advertised own mtu when link is being established 126 * @advertised_mtu: advertised own mtu when link is being established
128 * @transmitq: queue for sent, non-acked messages 127 * @transmitq: queue for sent, non-acked messages
129 * @backlogq: queue for messages waiting to be sent 128 * @backlogq: queue for messages waiting to be sent
130 * @next_out_no: next sequence number to use for outbound messages 129 * @snt_nxt: next sequence number to use for outbound messages
131 * @last_retransmitted: sequence number of most recently retransmitted message 130 * @last_retransmitted: sequence number of most recently retransmitted message
132 * @stale_count: # of identical retransmit requests made by peer 131 * @stale_count: # of identical retransmit requests made by peer
133 * @next_in_no: next sequence number to expect for inbound messages 132 * @rcv_nxt: next sequence number to expect for inbound messages
134 * @deferred_queue: deferred queue saved OOS b'cast message received from node 133 * @deferred_queue: deferred queue saved OOS b'cast message received from node
135 * @unacked_window: # of inbound messages rx'd without ack'ing back to peer 134 * @unacked_window: # of inbound messages rx'd without ack'ing back to peer
136 * @inputq: buffer queue for messages to be delivered upwards 135 * @inputq: buffer queue for messages to be delivered upwards
@@ -151,15 +150,14 @@ struct tipc_link {
151 150
152 /* Management and link supervision data */ 151 /* Management and link supervision data */
153 unsigned int flags; 152 unsigned int flags;
154 u32 checkpoint;
155 u32 peer_session; 153 u32 peer_session;
156 u32 peer_bearer_id; 154 u32 peer_bearer_id;
157 u32 bearer_id; 155 u32 bearer_id;
158 u32 tolerance; 156 u32 tolerance;
159 unsigned long cont_intv; 157 unsigned long keepalive_intv;
160 u32 abort_limit; 158 u32 abort_limit;
161 int state; 159 int state;
162 u32 fsm_msg_cnt; 160 u32 silent_intv_cnt;
163 struct { 161 struct {
164 unchar hdr[INT_H_SIZE]; 162 unchar hdr[INT_H_SIZE];
165 unchar body[TIPC_MAX_IF_NAME]; 163 unchar body[TIPC_MAX_IF_NAME];
@@ -185,13 +183,13 @@ struct tipc_link {
185 u16 len; 183 u16 len;
186 u16 limit; 184 u16 limit;
187 } backlog[5]; 185 } backlog[5];
188 u32 next_out_no; 186 u16 snd_nxt;
187 u16 last_retransm;
189 u32 window; 188 u32 window;
190 u32 last_retransmitted;
191 u32 stale_count; 189 u32 stale_count;
192 190
193 /* Reception */ 191 /* Reception */
194 u32 next_in_no; 192 u16 rcv_nxt;
195 u32 rcv_unacked; 193 u32 rcv_unacked;
196 struct sk_buff_head deferdq; 194 struct sk_buff_head deferdq;
197 struct sk_buff_head inputq; 195 struct sk_buff_head inputq;
@@ -213,17 +211,16 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
213 struct tipc_bearer *b_ptr, 211 struct tipc_bearer *b_ptr,
214 const struct tipc_media_addr *media_addr); 212 const struct tipc_media_addr *media_addr);
215void tipc_link_delete(struct tipc_link *link); 213void tipc_link_delete(struct tipc_link *link);
216void tipc_link_delete_list(struct net *net, unsigned int bearer_id, 214void tipc_link_delete_list(struct net *net, unsigned int bearer_id);
217 bool shutting_down);
218void tipc_link_failover_send_queue(struct tipc_link *l_ptr); 215void tipc_link_failover_send_queue(struct tipc_link *l_ptr);
219void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct tipc_link *dest); 216void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct tipc_link *dest);
220void tipc_link_reset_fragments(struct tipc_link *l_ptr); 217void tipc_link_reset_fragments(struct tipc_link *l_ptr);
221int tipc_link_is_up(struct tipc_link *l_ptr); 218int tipc_link_is_up(struct tipc_link *l_ptr);
222int tipc_link_is_active(struct tipc_link *l_ptr); 219int tipc_link_is_active(struct tipc_link *l_ptr);
223void tipc_link_purge_queues(struct tipc_link *l_ptr); 220void tipc_link_purge_queues(struct tipc_link *l_ptr);
221void tipc_link_purge_backlog(struct tipc_link *l);
224void tipc_link_reset_all(struct tipc_node *node); 222void tipc_link_reset_all(struct tipc_node *node);
225void tipc_link_reset(struct tipc_link *l_ptr); 223void tipc_link_reset(struct tipc_link *l_ptr);
226void tipc_link_reset_list(struct net *net, unsigned int bearer_id);
227int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, 224int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
228 u32 selector); 225 u32 selector);
229int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest, 226int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest,
@@ -247,39 +244,6 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info);
247int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); 244int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]);
248void link_prepare_wakeup(struct tipc_link *l); 245void link_prepare_wakeup(struct tipc_link *l);
249 246
250/*
251 * Link sequence number manipulation routines (uses modulo 2**16 arithmetic)
252 */
253static inline u32 buf_seqno(struct sk_buff *buf)
254{
255 return msg_seqno(buf_msg(buf));
256}
257
258static inline u32 mod(u32 x)
259{
260 return x & 0xffffu;
261}
262
263static inline int less_eq(u32 left, u32 right)
264{
265 return mod(right - left) < 32768u;
266}
267
268static inline int more(u32 left, u32 right)
269{
270 return !less_eq(left, right);
271}
272
273static inline int less(u32 left, u32 right)
274{
275 return less_eq(left, right) && (mod(right) != mod(left));
276}
277
278static inline u32 lesser(u32 left, u32 right)
279{
280 return less_eq(left, right) ? left : right;
281}
282
283static inline u32 link_own_addr(struct tipc_link *l) 247static inline u32 link_own_addr(struct tipc_link *l)
284{ 248{
285 return msg_prevnode(l->pmsg); 249 return msg_prevnode(l->pmsg);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index c3e96e815418..08b4cc7d496d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -331,16 +331,15 @@ error:
331 331
332/** 332/**
333 * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one 333 * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one
334 * @bskb: the buffer to append to ("bundle") 334 * @skb: the buffer to append to ("bundle")
335 * @skb: buffer to be appended 335 * @msg: message to be appended
336 * @mtu: max allowable size for the bundle buffer 336 * @mtu: max allowable size for the bundle buffer
337 * Consumes buffer if successful 337 * Consumes buffer if successful
338 * Returns true if bundling could be performed, otherwise false 338 * Returns true if bundling could be performed, otherwise false
339 */ 339 */
340bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu) 340bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu)
341{ 341{
342 struct tipc_msg *bmsg; 342 struct tipc_msg *bmsg;
343 struct tipc_msg *msg = buf_msg(skb);
344 unsigned int bsz; 343 unsigned int bsz;
345 unsigned int msz = msg_size(msg); 344 unsigned int msz = msg_size(msg);
346 u32 start, pad; 345 u32 start, pad;
@@ -348,9 +347,9 @@ bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu)
348 347
349 if (likely(msg_user(msg) == MSG_FRAGMENTER)) 348 if (likely(msg_user(msg) == MSG_FRAGMENTER))
350 return false; 349 return false;
351 if (!bskb) 350 if (!skb)
352 return false; 351 return false;
353 bmsg = buf_msg(bskb); 352 bmsg = buf_msg(skb);
354 bsz = msg_size(bmsg); 353 bsz = msg_size(bmsg);
355 start = align(bsz); 354 start = align(bsz);
356 pad = start - bsz; 355 pad = start - bsz;
@@ -359,18 +358,20 @@ bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu)
359 return false; 358 return false;
360 if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) 359 if (unlikely(msg_user(msg) == BCAST_PROTOCOL))
361 return false; 360 return false;
362 if (likely(msg_user(bmsg) != MSG_BUNDLER)) 361 if (unlikely(msg_user(bmsg) != MSG_BUNDLER))
363 return false; 362 return false;
364 if (unlikely(skb_tailroom(bskb) < (pad + msz))) 363 if (unlikely(skb_tailroom(skb) < (pad + msz)))
365 return false; 364 return false;
366 if (unlikely(max < (start + msz))) 365 if (unlikely(max < (start + msz)))
367 return false; 366 return false;
367 if ((msg_importance(msg) < TIPC_SYSTEM_IMPORTANCE) &&
368 (msg_importance(bmsg) == TIPC_SYSTEM_IMPORTANCE))
369 return false;
368 370
369 skb_put(bskb, pad + msz); 371 skb_put(skb, pad + msz);
370 skb_copy_to_linear_data_offset(bskb, start, skb->data, msz); 372 skb_copy_to_linear_data_offset(skb, start, msg, msz);
371 msg_set_size(bmsg, start + msz); 373 msg_set_size(bmsg, start + msz);
372 msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1); 374 msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1);
373 kfree_skb(skb);
374 return true; 375 return true;
375} 376}
376 377
@@ -416,18 +417,18 @@ none:
416 417
417/** 418/**
418 * tipc_msg_make_bundle(): Create bundle buf and append message to its tail 419 * tipc_msg_make_bundle(): Create bundle buf and append message to its tail
419 * @list: the buffer chain 420 * @list: the buffer chain, where head is the buffer to replace/append
420 * @skb: buffer to be appended and replaced 421 * @skb: buffer to be created, appended to and returned in case of success
422 * @msg: message to be appended
421 * @mtu: max allowable size for the bundle buffer, inclusive header 423 * @mtu: max allowable size for the bundle buffer, inclusive header
422 * @dnode: destination node for message. (Not always present in header) 424 * @dnode: destination node for message. (Not always present in header)
423 * Replaces buffer if successful
424 * Returns true if success, otherwise false 425 * Returns true if success, otherwise false
425 */ 426 */
426bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode) 427bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
428 u32 mtu, u32 dnode)
427{ 429{
428 struct sk_buff *bskb; 430 struct sk_buff *_skb;
429 struct tipc_msg *bmsg; 431 struct tipc_msg *bmsg;
430 struct tipc_msg *msg = buf_msg(*skb);
431 u32 msz = msg_size(msg); 432 u32 msz = msg_size(msg);
432 u32 max = mtu - INT_H_SIZE; 433 u32 max = mtu - INT_H_SIZE;
433 434
@@ -440,19 +441,23 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode)
440 if (msz > (max / 2)) 441 if (msz > (max / 2))
441 return false; 442 return false;
442 443
443 bskb = tipc_buf_acquire(max); 444 _skb = tipc_buf_acquire(max);
444 if (!bskb) 445 if (!_skb)
445 return false; 446 return false;
446 447
447 skb_trim(bskb, INT_H_SIZE); 448 skb_trim(_skb, INT_H_SIZE);
448 bmsg = buf_msg(bskb); 449 bmsg = buf_msg(_skb);
449 tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0, 450 tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0,
450 INT_H_SIZE, dnode); 451 INT_H_SIZE, dnode);
452 if (msg_isdata(msg))
453 msg_set_importance(bmsg, TIPC_CRITICAL_IMPORTANCE);
454 else
455 msg_set_importance(bmsg, TIPC_SYSTEM_IMPORTANCE);
451 msg_set_seqno(bmsg, msg_seqno(msg)); 456 msg_set_seqno(bmsg, msg_seqno(msg));
452 msg_set_ack(bmsg, msg_ack(msg)); 457 msg_set_ack(bmsg, msg_ack(msg));
453 msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); 458 msg_set_bcast_ack(bmsg, msg_bcast_ack(msg));
454 tipc_msg_bundle(bskb, *skb, mtu); 459 tipc_msg_bundle(_skb, msg, mtu);
455 *skb = bskb; 460 *skb = _skb;
456 return true; 461 return true;
457} 462}
458 463
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index e1d3595e2ee9..19c45fb66238 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -313,12 +313,12 @@ static inline void msg_set_lookup_scope(struct tipc_msg *m, u32 n)
313 msg_set_bits(m, 1, 19, 0x3, n); 313 msg_set_bits(m, 1, 19, 0x3, n);
314} 314}
315 315
316static inline u32 msg_bcast_ack(struct tipc_msg *m) 316static inline u16 msg_bcast_ack(struct tipc_msg *m)
317{ 317{
318 return msg_bits(m, 1, 0, 0xffff); 318 return msg_bits(m, 1, 0, 0xffff);
319} 319}
320 320
321static inline void msg_set_bcast_ack(struct tipc_msg *m, u32 n) 321static inline void msg_set_bcast_ack(struct tipc_msg *m, u16 n)
322{ 322{
323 msg_set_bits(m, 1, 0, 0xffff, n); 323 msg_set_bits(m, 1, 0, 0xffff, n);
324} 324}
@@ -327,22 +327,22 @@ static inline void msg_set_bcast_ack(struct tipc_msg *m, u32 n)
327/* 327/*
328 * Word 2 328 * Word 2
329 */ 329 */
330static inline u32 msg_ack(struct tipc_msg *m) 330static inline u16 msg_ack(struct tipc_msg *m)
331{ 331{
332 return msg_bits(m, 2, 16, 0xffff); 332 return msg_bits(m, 2, 16, 0xffff);
333} 333}
334 334
335static inline void msg_set_ack(struct tipc_msg *m, u32 n) 335static inline void msg_set_ack(struct tipc_msg *m, u16 n)
336{ 336{
337 msg_set_bits(m, 2, 16, 0xffff, n); 337 msg_set_bits(m, 2, 16, 0xffff, n);
338} 338}
339 339
340static inline u32 msg_seqno(struct tipc_msg *m) 340static inline u16 msg_seqno(struct tipc_msg *m)
341{ 341{
342 return msg_bits(m, 2, 0, 0xffff); 342 return msg_bits(m, 2, 0, 0xffff);
343} 343}
344 344
345static inline void msg_set_seqno(struct tipc_msg *m, u32 n) 345static inline void msg_set_seqno(struct tipc_msg *m, u16 n)
346{ 346{
347 msg_set_bits(m, 2, 0, 0xffff, n); 347 msg_set_bits(m, 2, 0, 0xffff, n);
348} 348}
@@ -352,18 +352,22 @@ static inline void msg_set_seqno(struct tipc_msg *m, u32 n)
352 */ 352 */
353static inline u32 msg_importance(struct tipc_msg *m) 353static inline u32 msg_importance(struct tipc_msg *m)
354{ 354{
355 if (unlikely(msg_user(m) == MSG_FRAGMENTER)) 355 int usr = msg_user(m);
356
357 if (likely((usr <= TIPC_CRITICAL_IMPORTANCE) && !msg_errcode(m)))
358 return usr;
359 if ((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER))
356 return msg_bits(m, 5, 13, 0x7); 360 return msg_bits(m, 5, 13, 0x7);
357 if (likely(msg_isdata(m) && !msg_errcode(m)))
358 return msg_user(m);
359 return TIPC_SYSTEM_IMPORTANCE; 361 return TIPC_SYSTEM_IMPORTANCE;
360} 362}
361 363
362static inline void msg_set_importance(struct tipc_msg *m, u32 i) 364static inline void msg_set_importance(struct tipc_msg *m, u32 i)
363{ 365{
364 if (unlikely(msg_user(m) == MSG_FRAGMENTER)) 366 int usr = msg_user(m);
367
368 if (likely((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER)))
365 msg_set_bits(m, 5, 13, 0x7, i); 369 msg_set_bits(m, 5, 13, 0x7, i);
366 else if (likely(i < TIPC_SYSTEM_IMPORTANCE)) 370 else if (i < TIPC_SYSTEM_IMPORTANCE)
367 msg_set_user(m, i); 371 msg_set_user(m, i);
368 else 372 else
369 pr_warn("Trying to set illegal importance in message\n"); 373 pr_warn("Trying to set illegal importance in message\n");
@@ -772,9 +776,9 @@ struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
772 uint data_sz, u32 dnode, u32 onode, 776 uint data_sz, u32 dnode, u32 onode,
773 u32 dport, u32 oport, int errcode); 777 u32 dport, u32 oport, int errcode);
774int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); 778int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf);
775bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu); 779bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu);
776 780bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
777bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode); 781 u32 mtu, u32 dnode);
778bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); 782bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
779int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, 783int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
780 int offset, int dsz, int mtu, struct sk_buff_head *list); 784 int offset, int dsz, int mtu, struct sk_buff_head *list);
@@ -782,6 +786,11 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, u32 *dnode,
782 int *err); 786 int *err);
783struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); 787struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list);
784 788
789static inline u16 buf_seqno(struct sk_buff *skb)
790{
791 return msg_seqno(buf_msg(skb));
792}
793
785/* tipc_skb_peek(): peek and reserve first buffer in list 794/* tipc_skb_peek(): peek and reserve first buffer in list
786 * @list: list to be peeked in 795 * @list: list to be peeked in
787 * Returns pointer to first buffer in list, if any 796 * Returns pointer to first buffer in list, if any
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index ab0ac62a1287..0f47f08bf38f 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -330,13 +330,9 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
330 330
331 /* Any subscriptions waiting for notification? */ 331 /* Any subscriptions waiting for notification? */
332 list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { 332 list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
333 tipc_subscr_report_overlap(s, 333 tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
334 publ->lower, 334 TIPC_PUBLISHED, publ->ref,
335 publ->upper, 335 publ->node, created_subseq);
336 TIPC_PUBLISHED,
337 publ->ref,
338 publ->node,
339 created_subseq);
340 } 336 }
341 return publ; 337 return publ;
342} 338}
@@ -404,13 +400,9 @@ found:
404 400
405 /* Notify any waiting subscriptions */ 401 /* Notify any waiting subscriptions */
406 list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { 402 list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
407 tipc_subscr_report_overlap(s, 403 tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
408 publ->lower, 404 TIPC_WITHDRAWN, publ->ref,
409 publ->upper, 405 publ->node, removed_subseq);
410 TIPC_WITHDRAWN,
411 publ->ref,
412 publ->node,
413 removed_subseq);
414 } 406 }
415 407
416 return publ; 408 return publ;
@@ -432,19 +424,17 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
432 return; 424 return;
433 425
434 while (sseq != &nseq->sseqs[nseq->first_free]) { 426 while (sseq != &nseq->sseqs[nseq->first_free]) {
435 if (tipc_subscr_overlap(s, sseq->lower, sseq->upper)) { 427 if (tipc_subscrp_check_overlap(s, sseq->lower, sseq->upper)) {
436 struct publication *crs; 428 struct publication *crs;
437 struct name_info *info = sseq->info; 429 struct name_info *info = sseq->info;
438 int must_report = 1; 430 int must_report = 1;
439 431
440 list_for_each_entry(crs, &info->zone_list, zone_list) { 432 list_for_each_entry(crs, &info->zone_list, zone_list) {
441 tipc_subscr_report_overlap(s, 433 tipc_subscrp_report_overlap(s, sseq->lower,
442 sseq->lower, 434 sseq->upper,
443 sseq->upper, 435 TIPC_PUBLISHED,
444 TIPC_PUBLISHED, 436 crs->ref, crs->node,
445 crs->ref, 437 must_report);
446 crs->node,
447 must_report);
448 must_report = 0; 438 must_report = 0;
449 } 439 }
450 } 440 }
diff --git a/net/tipc/net.c b/net/tipc/net.c
index a54f3cbe2246..d6d1399ae229 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -40,6 +40,7 @@
40#include "subscr.h" 40#include "subscr.h"
41#include "socket.h" 41#include "socket.h"
42#include "node.h" 42#include "node.h"
43#include "bcast.h"
43 44
44static const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { 45static const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
45 [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC }, 46 [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC },
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index ce9121e8e990..53e0fee80086 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -55,6 +55,7 @@ struct tipc_nl_compat_msg {
55 int rep_type; 55 int rep_type;
56 int rep_size; 56 int rep_size;
57 int req_type; 57 int req_type;
58 struct net *net;
58 struct sk_buff *rep; 59 struct sk_buff *rep;
59 struct tlv_desc *req; 60 struct tlv_desc *req;
60 struct sock *dst_sk; 61 struct sock *dst_sk;
@@ -68,7 +69,8 @@ struct tipc_nl_compat_cmd_dump {
68 69
69struct tipc_nl_compat_cmd_doit { 70struct tipc_nl_compat_cmd_doit {
70 int (*doit)(struct sk_buff *skb, struct genl_info *info); 71 int (*doit)(struct sk_buff *skb, struct genl_info *info);
71 int (*transcode)(struct sk_buff *skb, struct tipc_nl_compat_msg *msg); 72 int (*transcode)(struct tipc_nl_compat_cmd_doit *cmd,
73 struct sk_buff *skb, struct tipc_nl_compat_msg *msg);
72}; 74};
73 75
74static int tipc_skb_tailroom(struct sk_buff *skb) 76static int tipc_skb_tailroom(struct sk_buff *skb)
@@ -281,7 +283,7 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
281 if (!trans_buf) 283 if (!trans_buf)
282 return -ENOMEM; 284 return -ENOMEM;
283 285
284 err = (*cmd->transcode)(trans_buf, msg); 286 err = (*cmd->transcode)(cmd, trans_buf, msg);
285 if (err) 287 if (err)
286 goto trans_out; 288 goto trans_out;
287 289
@@ -353,7 +355,8 @@ static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg,
353 nla_len(bearer[TIPC_NLA_BEARER_NAME])); 355 nla_len(bearer[TIPC_NLA_BEARER_NAME]));
354} 356}
355 357
356static int tipc_nl_compat_bearer_enable(struct sk_buff *skb, 358static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd,
359 struct sk_buff *skb,
357 struct tipc_nl_compat_msg *msg) 360 struct tipc_nl_compat_msg *msg)
358{ 361{
359 struct nlattr *prop; 362 struct nlattr *prop;
@@ -385,7 +388,8 @@ static int tipc_nl_compat_bearer_enable(struct sk_buff *skb,
385 return 0; 388 return 0;
386} 389}
387 390
388static int tipc_nl_compat_bearer_disable(struct sk_buff *skb, 391static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd,
392 struct sk_buff *skb,
389 struct tipc_nl_compat_msg *msg) 393 struct tipc_nl_compat_msg *msg)
390{ 394{
391 char *name; 395 char *name;
@@ -576,11 +580,81 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
576 &link_info, sizeof(link_info)); 580 &link_info, sizeof(link_info));
577} 581}
578 582
579static int tipc_nl_compat_link_set(struct sk_buff *skb, 583static int __tipc_add_link_prop(struct sk_buff *skb,
580 struct tipc_nl_compat_msg *msg) 584 struct tipc_nl_compat_msg *msg,
585 struct tipc_link_config *lc)
586{
587 switch (msg->cmd) {
588 case TIPC_CMD_SET_LINK_PRI:
589 return nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(lc->value));
590 case TIPC_CMD_SET_LINK_TOL:
591 return nla_put_u32(skb, TIPC_NLA_PROP_TOL, ntohl(lc->value));
592 case TIPC_CMD_SET_LINK_WINDOW:
593 return nla_put_u32(skb, TIPC_NLA_PROP_WIN, ntohl(lc->value));
594 }
595
596 return -EINVAL;
597}
598
599static int tipc_nl_compat_media_set(struct sk_buff *skb,
600 struct tipc_nl_compat_msg *msg)
581{ 601{
582 struct nlattr *link;
583 struct nlattr *prop; 602 struct nlattr *prop;
603 struct nlattr *media;
604 struct tipc_link_config *lc;
605
606 lc = (struct tipc_link_config *)TLV_DATA(msg->req);
607
608 media = nla_nest_start(skb, TIPC_NLA_MEDIA);
609 if (!media)
610 return -EMSGSIZE;
611
612 if (nla_put_string(skb, TIPC_NLA_MEDIA_NAME, lc->name))
613 return -EMSGSIZE;
614
615 prop = nla_nest_start(skb, TIPC_NLA_MEDIA_PROP);
616 if (!prop)
617 return -EMSGSIZE;
618
619 __tipc_add_link_prop(skb, msg, lc);
620 nla_nest_end(skb, prop);
621 nla_nest_end(skb, media);
622
623 return 0;
624}
625
626static int tipc_nl_compat_bearer_set(struct sk_buff *skb,
627 struct tipc_nl_compat_msg *msg)
628{
629 struct nlattr *prop;
630 struct nlattr *bearer;
631 struct tipc_link_config *lc;
632
633 lc = (struct tipc_link_config *)TLV_DATA(msg->req);
634
635 bearer = nla_nest_start(skb, TIPC_NLA_BEARER);
636 if (!bearer)
637 return -EMSGSIZE;
638
639 if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, lc->name))
640 return -EMSGSIZE;
641
642 prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP);
643 if (!prop)
644 return -EMSGSIZE;
645
646 __tipc_add_link_prop(skb, msg, lc);
647 nla_nest_end(skb, prop);
648 nla_nest_end(skb, bearer);
649
650 return 0;
651}
652
653static int __tipc_nl_compat_link_set(struct sk_buff *skb,
654 struct tipc_nl_compat_msg *msg)
655{
656 struct nlattr *prop;
657 struct nlattr *link;
584 struct tipc_link_config *lc; 658 struct tipc_link_config *lc;
585 659
586 lc = (struct tipc_link_config *)TLV_DATA(msg->req); 660 lc = (struct tipc_link_config *)TLV_DATA(msg->req);
@@ -596,24 +670,40 @@ static int tipc_nl_compat_link_set(struct sk_buff *skb,
596 if (!prop) 670 if (!prop)
597 return -EMSGSIZE; 671 return -EMSGSIZE;
598 672
599 if (msg->cmd == TIPC_CMD_SET_LINK_PRI) { 673 __tipc_add_link_prop(skb, msg, lc);
600 if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(lc->value)))
601 return -EMSGSIZE;
602 } else if (msg->cmd == TIPC_CMD_SET_LINK_TOL) {
603 if (nla_put_u32(skb, TIPC_NLA_PROP_TOL, ntohl(lc->value)))
604 return -EMSGSIZE;
605 } else if (msg->cmd == TIPC_CMD_SET_LINK_WINDOW) {
606 if (nla_put_u32(skb, TIPC_NLA_PROP_WIN, ntohl(lc->value)))
607 return -EMSGSIZE;
608 }
609
610 nla_nest_end(skb, prop); 674 nla_nest_end(skb, prop);
611 nla_nest_end(skb, link); 675 nla_nest_end(skb, link);
612 676
613 return 0; 677 return 0;
614} 678}
615 679
616static int tipc_nl_compat_link_reset_stats(struct sk_buff *skb, 680static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd,
681 struct sk_buff *skb,
682 struct tipc_nl_compat_msg *msg)
683{
684 struct tipc_link_config *lc;
685 struct tipc_bearer *bearer;
686 struct tipc_media *media;
687
688 lc = (struct tipc_link_config *)TLV_DATA(msg->req);
689
690 media = tipc_media_find(lc->name);
691 if (media) {
692 cmd->doit = &tipc_nl_media_set;
693 return tipc_nl_compat_media_set(skb, msg);
694 }
695
696 bearer = tipc_bearer_find(msg->net, lc->name);
697 if (bearer) {
698 cmd->doit = &tipc_nl_bearer_set;
699 return tipc_nl_compat_bearer_set(skb, msg);
700 }
701
702 return __tipc_nl_compat_link_set(skb, msg);
703}
704
705static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd,
706 struct sk_buff *skb,
617 struct tipc_nl_compat_msg *msg) 707 struct tipc_nl_compat_msg *msg)
618{ 708{
619 char *name; 709 char *name;
@@ -851,7 +941,8 @@ static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg,
851 sizeof(node_info)); 941 sizeof(node_info));
852} 942}
853 943
854static int tipc_nl_compat_net_set(struct sk_buff *skb, 944static int tipc_nl_compat_net_set(struct tipc_nl_compat_cmd_doit *cmd,
945 struct sk_buff *skb,
855 struct tipc_nl_compat_msg *msg) 946 struct tipc_nl_compat_msg *msg)
856{ 947{
857 u32 val; 948 u32 val;
@@ -1007,7 +1098,6 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
1007 struct nlmsghdr *req_nlh; 1098 struct nlmsghdr *req_nlh;
1008 struct nlmsghdr *rep_nlh; 1099 struct nlmsghdr *rep_nlh;
1009 struct tipc_genlmsghdr *req_userhdr = info->userhdr; 1100 struct tipc_genlmsghdr *req_userhdr = info->userhdr;
1010 struct net *net = genl_info_net(info);
1011 1101
1012 memset(&msg, 0, sizeof(msg)); 1102 memset(&msg, 0, sizeof(msg));
1013 1103
@@ -1015,6 +1105,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
1015 msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN; 1105 msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN;
1016 msg.cmd = req_userhdr->cmd; 1106 msg.cmd = req_userhdr->cmd;
1017 msg.dst_sk = info->dst_sk; 1107 msg.dst_sk = info->dst_sk;
1108 msg.net = genl_info_net(info);
1018 1109
1019 if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) { 1110 if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) {
1020 msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN); 1111 msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN);
@@ -1030,7 +1121,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
1030 } 1121 }
1031 1122
1032 err = tipc_nl_compat_handle(&msg); 1123 err = tipc_nl_compat_handle(&msg);
1033 if (err == -EOPNOTSUPP) 1124 if ((err == -EOPNOTSUPP) || (err == -EPERM))
1034 msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); 1125 msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED);
1035 else if (err == -EINVAL) 1126 else if (err == -EINVAL)
1036 msg.rep = tipc_get_err_tlv(TIPC_CFG_TLV_ERROR); 1127 msg.rep = tipc_get_err_tlv(TIPC_CFG_TLV_ERROR);
@@ -1043,7 +1134,7 @@ send:
1043 rep_nlh = nlmsg_hdr(msg.rep); 1134 rep_nlh = nlmsg_hdr(msg.rep);
1044 memcpy(rep_nlh, info->nlhdr, len); 1135 memcpy(rep_nlh, info->nlhdr, len);
1045 rep_nlh->nlmsg_len = msg.rep->len; 1136 rep_nlh->nlmsg_len = msg.rep->len;
1046 genlmsg_unicast(net, msg.rep, NETLINK_CB(skb).portid); 1137 genlmsg_unicast(msg.net, msg.rep, NETLINK_CB(skb).portid);
1047 1138
1048 return err; 1139 return err;
1049} 1140}
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 22c059ad2999..0b1d61a5f853 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * net/tipc/node.c: TIPC node management routines 2 * net/tipc/node.c: TIPC node management routines
3 * 3 *
4 * Copyright (c) 2000-2006, 2012-2014, Ericsson AB 4 * Copyright (c) 2000-2006, 2012-2015, Ericsson AB
5 * Copyright (c) 2005-2006, 2010-2014, Wind River Systems 5 * Copyright (c) 2005-2006, 2010-2014, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
@@ -39,6 +39,7 @@
39#include "node.h" 39#include "node.h"
40#include "name_distr.h" 40#include "name_distr.h"
41#include "socket.h" 41#include "socket.h"
42#include "bcast.h"
42 43
43static void node_lost_contact(struct tipc_node *n_ptr); 44static void node_lost_contact(struct tipc_node *n_ptr);
44static void node_established_contact(struct tipc_node *n_ptr); 45static void node_established_contact(struct tipc_node *n_ptr);
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 02d5c20dc551..5a834cf142c8 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -45,8 +45,6 @@
45/* Out-of-range value for node signature */ 45/* Out-of-range value for node signature */
46#define INVALID_NODE_SIG 0x10000 46#define INVALID_NODE_SIG 0x10000
47 47
48#define NODE_HTABLE_SIZE 512
49
50/* Flags used to take different actions according to flag type 48/* Flags used to take different actions according to flag type
51 * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down 49 * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down
52 * TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down 50 * TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 77ff03ed1e18..922e04a43396 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -309,6 +309,10 @@ static int tipc_accept_from_sock(struct tipc_conn *con)
309 309
310 /* Notify that new connection is incoming */ 310 /* Notify that new connection is incoming */
311 newcon->usr_data = s->tipc_conn_new(newcon->conid); 311 newcon->usr_data = s->tipc_conn_new(newcon->conid);
312 if (!newcon->usr_data) {
313 sock_release(newsock);
314 return -ENOMEM;
315 }
312 316
313 /* Wake up receive process in case of 'SYN+' message */ 317 /* Wake up receive process in case of 'SYN+' message */
314 newsock->sk->sk_data_ready(newsock->sk); 318 newsock->sk->sk_data_ready(newsock->sk);
@@ -321,7 +325,7 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
321 struct socket *sock = NULL; 325 struct socket *sock = NULL;
322 int ret; 326 int ret;
323 327
324 ret = __sock_create(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock, 1); 328 ret = sock_create_kern(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock);
325 if (ret < 0) 329 if (ret < 0)
326 return NULL; 330 return NULL;
327 ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, 331 ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index f485600c4507..3a7567f690f3 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -41,6 +41,7 @@
41#include "link.h" 41#include "link.h"
42#include "name_distr.h" 42#include "name_distr.h"
43#include "socket.h" 43#include "socket.h"
44#include "bcast.h"
44 45
45#define SS_LISTENING -1 /* socket is listening */ 46#define SS_LISTENING -1 /* socket is listening */
46#define SS_READY -2 /* socket is connectionless */ 47#define SS_READY -2 /* socket is connectionless */
@@ -342,7 +343,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
342 } 343 }
343 344
344 /* Allocate socket's protocol area */ 345 /* Allocate socket's protocol area */
345 sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); 346 sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern);
346 if (sk == NULL) 347 if (sk == NULL)
347 return -ENOMEM; 348 return -ENOMEM;
348 349
@@ -409,7 +410,7 @@ static int tipc_release(struct socket *sock)
409 struct net *net; 410 struct net *net;
410 struct tipc_sock *tsk; 411 struct tipc_sock *tsk;
411 struct sk_buff *skb; 412 struct sk_buff *skb;
412 u32 dnode, probing_state; 413 u32 dnode;
413 414
414 /* 415 /*
415 * Exit if socket isn't fully initialized (occurs when a failed accept() 416 * Exit if socket isn't fully initialized (occurs when a failed accept()
@@ -447,10 +448,7 @@ static int tipc_release(struct socket *sock)
447 } 448 }
448 449
449 tipc_sk_withdraw(tsk, 0, NULL); 450 tipc_sk_withdraw(tsk, 0, NULL);
450 probing_state = tsk->probing_state; 451 sk_stop_timer(sk, &sk->sk_timer);
451 if (del_timer_sync(&sk->sk_timer) &&
452 probing_state != TIPC_CONN_PROBING)
453 sock_put(sk);
454 tipc_sk_remove(tsk); 452 tipc_sk_remove(tsk);
455 if (tsk->connected) { 453 if (tsk->connected) {
456 skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, 454 skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
@@ -2009,6 +2007,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
2009 res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); 2007 res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1);
2010 if (res) 2008 if (res)
2011 goto exit; 2009 goto exit;
2010 security_sk_clone(sock->sk, new_sock->sk);
2012 2011
2013 new_sk = new_sock->sk; 2012 new_sk = new_sock->sk;
2014 new_tsock = tipc_sk(new_sk); 2013 new_tsock = tipc_sk(new_sk);
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 1c147c869c2e..350cca33ee0a 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -40,16 +40,21 @@
40 40
41/** 41/**
42 * struct tipc_subscriber - TIPC network topology subscriber 42 * struct tipc_subscriber - TIPC network topology subscriber
43 * @kref: reference counter to tipc_subscription object
43 * @conid: connection identifier to server connecting to subscriber 44 * @conid: connection identifier to server connecting to subscriber
44 * @lock: control access to subscriber 45 * @lock: control access to subscriber
45 * @subscription_list: list of subscription objects for this subscriber 46 * @subscrp_list: list of subscription objects for this subscriber
46 */ 47 */
47struct tipc_subscriber { 48struct tipc_subscriber {
49 struct kref kref;
48 int conid; 50 int conid;
49 spinlock_t lock; 51 spinlock_t lock;
50 struct list_head subscription_list; 52 struct list_head subscrp_list;
51}; 53};
52 54
55static void tipc_subscrp_delete(struct tipc_subscription *sub);
56static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
57
53/** 58/**
54 * htohl - convert value to endianness used by destination 59 * htohl - convert value to endianness used by destination
55 * @in: value to convert 60 * @in: value to convert
@@ -62,9 +67,9 @@ static u32 htohl(u32 in, int swap)
62 return swap ? swab32(in) : in; 67 return swap ? swab32(in) : in;
63} 68}
64 69
65static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, 70static void tipc_subscrp_send_event(struct tipc_subscription *sub,
66 u32 found_upper, u32 event, u32 port_ref, 71 u32 found_lower, u32 found_upper,
67 u32 node) 72 u32 event, u32 port_ref, u32 node)
68{ 73{
69 struct tipc_net *tn = net_generic(sub->net, tipc_net_id); 74 struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
70 struct tipc_subscriber *subscriber = sub->subscriber; 75 struct tipc_subscriber *subscriber = sub->subscriber;
@@ -82,12 +87,13 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower,
82} 87}
83 88
84/** 89/**
85 * tipc_subscr_overlap - test for subscription overlap with the given values 90 * tipc_subscrp_check_overlap - test for subscription overlap with the
91 * given values
86 * 92 *
87 * Returns 1 if there is overlap, otherwise 0. 93 * Returns 1 if there is overlap, otherwise 0.
88 */ 94 */
89int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, 95int tipc_subscrp_check_overlap(struct tipc_subscription *sub, u32 found_lower,
90 u32 found_upper) 96 u32 found_upper)
91{ 97{
92 if (found_lower < sub->seq.lower) 98 if (found_lower < sub->seq.lower)
93 found_lower = sub->seq.lower; 99 found_lower = sub->seq.lower;
@@ -98,138 +104,121 @@ int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower,
98 return 1; 104 return 1;
99} 105}
100 106
101/** 107void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
102 * tipc_subscr_report_overlap - issue event if there is subscription overlap 108 u32 found_upper, u32 event, u32 port_ref,
103 * 109 u32 node, int must)
104 * Protected by nameseq.lock in name_table.c
105 */
106void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower,
107 u32 found_upper, u32 event, u32 port_ref,
108 u32 node, int must)
109{ 110{
110 if (!tipc_subscr_overlap(sub, found_lower, found_upper)) 111 if (!tipc_subscrp_check_overlap(sub, found_lower, found_upper))
111 return; 112 return;
112 if (!must && !(sub->filter & TIPC_SUB_PORTS)) 113 if (!must && !(sub->filter & TIPC_SUB_PORTS))
113 return; 114 return;
114 115
115 subscr_send_event(sub, found_lower, found_upper, event, port_ref, node); 116 tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
117 node);
116} 118}
117 119
118static void subscr_timeout(unsigned long data) 120static void tipc_subscrp_timeout(unsigned long data)
119{ 121{
120 struct tipc_subscription *sub = (struct tipc_subscription *)data; 122 struct tipc_subscription *sub = (struct tipc_subscription *)data;
121 struct tipc_subscriber *subscriber = sub->subscriber; 123 struct tipc_subscriber *subscriber = sub->subscriber;
122 struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
123 124
124 /* The spin lock per subscriber is used to protect its members */ 125 /* Notify subscriber of timeout */
125 spin_lock_bh(&subscriber->lock); 126 tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
127 TIPC_SUBSCR_TIMEOUT, 0, 0);
126 128
127 /* Validate timeout (in case subscription is being cancelled) */ 129 spin_lock_bh(&subscriber->lock);
128 if (sub->timeout == TIPC_WAIT_FOREVER) { 130 tipc_subscrp_delete(sub);
129 spin_unlock_bh(&subscriber->lock); 131 spin_unlock_bh(&subscriber->lock);
130 return;
131 }
132 132
133 /* Unlink subscription from name table */ 133 tipc_subscrb_put(subscriber);
134 tipc_nametbl_unsubscribe(sub); 134}
135 135
136 /* Unlink subscription from subscriber */ 136static void tipc_subscrb_kref_release(struct kref *kref)
137 list_del(&sub->subscription_list); 137{
138 struct tipc_subscriber *subcriber = container_of(kref,
139 struct tipc_subscriber, kref);
138 140
139 spin_unlock_bh(&subscriber->lock); 141 kfree(subcriber);
142}
140 143
141 /* Notify subscriber of timeout */ 144static void tipc_subscrb_put(struct tipc_subscriber *subscriber)
142 subscr_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, 145{
143 TIPC_SUBSCR_TIMEOUT, 0, 0); 146 kref_put(&subscriber->kref, tipc_subscrb_kref_release);
147}
144 148
145 /* Now destroy subscription */ 149static void tipc_subscrb_get(struct tipc_subscriber *subscriber)
146 kfree(sub); 150{
147 atomic_dec(&tn->subscription_count); 151 kref_get(&subscriber->kref);
148} 152}
149 153
150/** 154static struct tipc_subscriber *tipc_subscrb_create(int conid)
151 * subscr_del - delete a subscription within a subscription list
152 *
153 * Called with subscriber lock held.
154 */
155static void subscr_del(struct tipc_subscription *sub)
156{ 155{
157 struct tipc_net *tn = net_generic(sub->net, tipc_net_id); 156 struct tipc_subscriber *subscriber;
158 157
159 tipc_nametbl_unsubscribe(sub); 158 subscriber = kzalloc(sizeof(*subscriber), GFP_ATOMIC);
160 list_del(&sub->subscription_list); 159 if (!subscriber) {
161 kfree(sub); 160 pr_warn("Subscriber rejected, no memory\n");
162 atomic_dec(&tn->subscription_count); 161 return NULL;
162 }
163 kref_init(&subscriber->kref);
164 INIT_LIST_HEAD(&subscriber->subscrp_list);
165 subscriber->conid = conid;
166 spin_lock_init(&subscriber->lock);
167
168 return subscriber;
163} 169}
164 170
165static void subscr_release(struct tipc_subscriber *subscriber) 171static void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
166{ 172{
167 struct tipc_subscription *sub; 173 struct tipc_subscription *sub, *temp;
168 struct tipc_subscription *sub_temp;
169 174
170 spin_lock_bh(&subscriber->lock); 175 spin_lock_bh(&subscriber->lock);
171
172 /* Destroy any existing subscriptions for subscriber */ 176 /* Destroy any existing subscriptions for subscriber */
173 list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, 177 list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list,
174 subscription_list) { 178 subscrp_list) {
175 if (sub->timeout != TIPC_WAIT_FOREVER) { 179 if (del_timer(&sub->timer)) {
176 spin_unlock_bh(&subscriber->lock); 180 tipc_subscrp_delete(sub);
177 del_timer_sync(&sub->timer); 181 tipc_subscrb_put(subscriber);
178 spin_lock_bh(&subscriber->lock);
179 } 182 }
180 subscr_del(sub);
181 } 183 }
182 spin_unlock_bh(&subscriber->lock); 184 spin_unlock_bh(&subscriber->lock);
183 185
184 /* Now destroy subscriber */ 186 tipc_subscrb_put(subscriber);
185 kfree(subscriber);
186} 187}
187 188
188/** 189static void tipc_subscrp_delete(struct tipc_subscription *sub)
189 * subscr_cancel - handle subscription cancellation request
190 *
191 * Called with subscriber lock held. Routine must temporarily release lock
192 * to enable the subscription timeout routine to finish without deadlocking;
193 * the lock is then reclaimed to allow caller to release it upon return.
194 *
195 * Note that fields of 's' use subscriber's endianness!
196 */
197static void subscr_cancel(struct tipc_subscr *s,
198 struct tipc_subscriber *subscriber)
199{ 190{
200 struct tipc_subscription *sub; 191 struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
201 struct tipc_subscription *sub_temp; 192
202 int found = 0; 193 tipc_nametbl_unsubscribe(sub);
194 list_del(&sub->subscrp_list);
195 kfree(sub);
196 atomic_dec(&tn->subscription_count);
197}
203 198
199static void tipc_subscrp_cancel(struct tipc_subscr *s,
200 struct tipc_subscriber *subscriber)
201{
202 struct tipc_subscription *sub, *temp;
203
204 spin_lock_bh(&subscriber->lock);
204 /* Find first matching subscription, exit if not found */ 205 /* Find first matching subscription, exit if not found */
205 list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, 206 list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list,
206 subscription_list) { 207 subscrp_list) {
207 if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) { 208 if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) {
208 found = 1; 209 if (del_timer(&sub->timer)) {
210 tipc_subscrp_delete(sub);
211 tipc_subscrb_put(subscriber);
212 }
209 break; 213 break;
210 } 214 }
211 } 215 }
212 if (!found) 216 spin_unlock_bh(&subscriber->lock);
213 return;
214
215 /* Cancel subscription timer (if used), then delete subscription */
216 if (sub->timeout != TIPC_WAIT_FOREVER) {
217 sub->timeout = TIPC_WAIT_FOREVER;
218 spin_unlock_bh(&subscriber->lock);
219 del_timer_sync(&sub->timer);
220 spin_lock_bh(&subscriber->lock);
221 }
222 subscr_del(sub);
223} 217}
224 218
225/** 219static int tipc_subscrp_create(struct net *net, struct tipc_subscr *s,
226 * subscr_subscribe - create subscription for subscriber 220 struct tipc_subscriber *subscriber,
227 * 221 struct tipc_subscription **sub_p)
228 * Called with subscriber lock held.
229 */
230static int subscr_subscribe(struct net *net, struct tipc_subscr *s,
231 struct tipc_subscriber *subscriber,
232 struct tipc_subscription **sub_p)
233{ 222{
234 struct tipc_net *tn = net_generic(net, tipc_net_id); 223 struct tipc_net *tn = net_generic(net, tipc_net_id);
235 struct tipc_subscription *sub; 224 struct tipc_subscription *sub;
@@ -241,7 +230,7 @@ static int subscr_subscribe(struct net *net, struct tipc_subscr *s,
241 /* Detect & process a subscription cancellation request */ 230 /* Detect & process a subscription cancellation request */
242 if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { 231 if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
243 s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); 232 s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
244 subscr_cancel(s, subscriber); 233 tipc_subscrp_cancel(s, subscriber);
245 return 0; 234 return 0;
246 } 235 }
247 236
@@ -273,62 +262,51 @@ static int subscr_subscribe(struct net *net, struct tipc_subscr *s,
273 kfree(sub); 262 kfree(sub);
274 return -EINVAL; 263 return -EINVAL;
275 } 264 }
276 list_add(&sub->subscription_list, &subscriber->subscription_list); 265 spin_lock_bh(&subscriber->lock);
266 list_add(&sub->subscrp_list, &subscriber->subscrp_list);
267 spin_unlock_bh(&subscriber->lock);
277 sub->subscriber = subscriber; 268 sub->subscriber = subscriber;
278 sub->swap = swap; 269 sub->swap = swap;
279 memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); 270 memcpy(&sub->evt.s, s, sizeof(*s));
280 atomic_inc(&tn->subscription_count); 271 atomic_inc(&tn->subscription_count);
281 if (sub->timeout != TIPC_WAIT_FOREVER) { 272 setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub);
282 setup_timer(&sub->timer, subscr_timeout, (unsigned long)sub); 273 if (sub->timeout != TIPC_WAIT_FOREVER)
283 mod_timer(&sub->timer, jiffies + sub->timeout); 274 sub->timeout += jiffies;
284 } 275 if (!mod_timer(&sub->timer, sub->timeout))
276 tipc_subscrb_get(subscriber);
285 *sub_p = sub; 277 *sub_p = sub;
286 return 0; 278 return 0;
287} 279}
288 280
289/* Handle one termination request for the subscriber */ 281/* Handle one termination request for the subscriber */
290static void subscr_conn_shutdown_event(int conid, void *usr_data) 282static void tipc_subscrb_shutdown_cb(int conid, void *usr_data)
291{ 283{
292 subscr_release((struct tipc_subscriber *)usr_data); 284 tipc_subscrb_delete((struct tipc_subscriber *)usr_data);
293} 285}
294 286
295/* Handle one request to create a new subscription for the subscriber */ 287/* Handle one request to create a new subscription for the subscriber */
296static void subscr_conn_msg_event(struct net *net, int conid, 288static void tipc_subscrb_rcv_cb(struct net *net, int conid,
297 struct sockaddr_tipc *addr, void *usr_data, 289 struct sockaddr_tipc *addr, void *usr_data,
298 void *buf, size_t len) 290 void *buf, size_t len)
299{ 291{
300 struct tipc_subscriber *subscriber = usr_data; 292 struct tipc_subscriber *subscriber = usr_data;
301 struct tipc_subscription *sub = NULL; 293 struct tipc_subscription *sub = NULL;
302 struct tipc_net *tn = net_generic(net, tipc_net_id); 294 struct tipc_net *tn = net_generic(net, tipc_net_id);
303 295
304 spin_lock_bh(&subscriber->lock); 296 tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscriber, &sub);
305 subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, &sub);
306 if (sub) 297 if (sub)
307 tipc_nametbl_subscribe(sub); 298 tipc_nametbl_subscribe(sub);
308 else 299 else
309 tipc_conn_terminate(tn->topsrv, subscriber->conid); 300 tipc_conn_terminate(tn->topsrv, subscriber->conid);
310 spin_unlock_bh(&subscriber->lock);
311} 301}
312 302
313/* Handle one request to establish a new subscriber */ 303/* Handle one request to establish a new subscriber */
314static void *subscr_named_msg_event(int conid) 304static void *tipc_subscrb_connect_cb(int conid)
315{ 305{
316 struct tipc_subscriber *subscriber; 306 return (void *)tipc_subscrb_create(conid);
317
318 /* Create subscriber object */
319 subscriber = kzalloc(sizeof(struct tipc_subscriber), GFP_ATOMIC);
320 if (subscriber == NULL) {
321 pr_warn("Subscriber rejected, no memory\n");
322 return NULL;
323 }
324 INIT_LIST_HEAD(&subscriber->subscription_list);
325 subscriber->conid = conid;
326 spin_lock_init(&subscriber->lock);
327
328 return (void *)subscriber;
329} 307}
330 308
331int tipc_subscr_start(struct net *net) 309int tipc_topsrv_start(struct net *net)
332{ 310{
333 struct tipc_net *tn = net_generic(net, tipc_net_id); 311 struct tipc_net *tn = net_generic(net, tipc_net_id);
334 const char name[] = "topology_server"; 312 const char name[] = "topology_server";
@@ -355,9 +333,9 @@ int tipc_subscr_start(struct net *net)
355 topsrv->imp = TIPC_CRITICAL_IMPORTANCE; 333 topsrv->imp = TIPC_CRITICAL_IMPORTANCE;
356 topsrv->type = SOCK_SEQPACKET; 334 topsrv->type = SOCK_SEQPACKET;
357 topsrv->max_rcvbuf_size = sizeof(struct tipc_subscr); 335 topsrv->max_rcvbuf_size = sizeof(struct tipc_subscr);
358 topsrv->tipc_conn_recvmsg = subscr_conn_msg_event; 336 topsrv->tipc_conn_recvmsg = tipc_subscrb_rcv_cb;
359 topsrv->tipc_conn_new = subscr_named_msg_event; 337 topsrv->tipc_conn_new = tipc_subscrb_connect_cb;
360 topsrv->tipc_conn_shutdown = subscr_conn_shutdown_event; 338 topsrv->tipc_conn_shutdown = tipc_subscrb_shutdown_cb;
361 339
362 strncpy(topsrv->name, name, strlen(name) + 1); 340 strncpy(topsrv->name, name, strlen(name) + 1);
363 tn->topsrv = topsrv; 341 tn->topsrv = topsrv;
@@ -366,7 +344,7 @@ int tipc_subscr_start(struct net *net)
366 return tipc_server_start(topsrv); 344 return tipc_server_start(topsrv);
367} 345}
368 346
369void tipc_subscr_stop(struct net *net) 347void tipc_topsrv_stop(struct net *net)
370{ 348{
371 struct tipc_net *tn = net_generic(net, tipc_net_id); 349 struct tipc_net *tn = net_generic(net, tipc_net_id);
372 struct tipc_server *topsrv = tn->topsrv; 350 struct tipc_server *topsrv = tn->topsrv;
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 33488bd9fe3c..92ee18cc5fe6 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -54,7 +54,7 @@ struct tipc_subscriber;
54 * @filter: event filtering to be done for subscription 54 * @filter: event filtering to be done for subscription
55 * @timer: timer governing subscription duration (optional) 55 * @timer: timer governing subscription duration (optional)
56 * @nameseq_list: adjacent subscriptions in name sequence's subscription list 56 * @nameseq_list: adjacent subscriptions in name sequence's subscription list
57 * @subscription_list: adjacent subscriptions in subscriber's subscription list 57 * @subscrp_list: adjacent subscriptions in subscriber's subscription list
58 * @server_ref: object reference of server port associated with subscription 58 * @server_ref: object reference of server port associated with subscription
59 * @swap: indicates if subscriber uses opposite endianness in its messages 59 * @swap: indicates if subscriber uses opposite endianness in its messages
60 * @evt: template for events generated by subscription 60 * @evt: template for events generated by subscription
@@ -67,17 +67,17 @@ struct tipc_subscription {
67 u32 filter; 67 u32 filter;
68 struct timer_list timer; 68 struct timer_list timer;
69 struct list_head nameseq_list; 69 struct list_head nameseq_list;
70 struct list_head subscription_list; 70 struct list_head subscrp_list;
71 int swap; 71 int swap;
72 struct tipc_event evt; 72 struct tipc_event evt;
73}; 73};
74 74
75int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, 75int tipc_subscrp_check_overlap(struct tipc_subscription *sub, u32 found_lower,
76 u32 found_upper); 76 u32 found_upper);
77void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, 77void tipc_subscrp_report_overlap(struct tipc_subscription *sub,
78 u32 found_upper, u32 event, u32 port_ref, 78 u32 found_lower, u32 found_upper, u32 event,
79 u32 node, int must); 79 u32 port_ref, u32 node, int must);
80int tipc_subscr_start(struct net *net); 80int tipc_topsrv_start(struct net *net);
81void tipc_subscr_stop(struct net *net); 81void tipc_topsrv_stop(struct net *net);
82 82
83#endif 83#endif
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 06430598cf51..03ee4d359f6a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -140,12 +140,17 @@ static struct hlist_head *unix_sockets_unbound(void *addr)
140#ifdef CONFIG_SECURITY_NETWORK 140#ifdef CONFIG_SECURITY_NETWORK
141static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 141static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
142{ 142{
143 memcpy(UNIXSID(skb), &scm->secid, sizeof(u32)); 143 UNIXCB(skb).secid = scm->secid;
144} 144}
145 145
146static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 146static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
147{ 147{
148 scm->secid = *UNIXSID(skb); 148 scm->secid = UNIXCB(skb).secid;
149}
150
151static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
152{
153 return (scm->secid == UNIXCB(skb).secid);
149} 154}
150#else 155#else
151static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 156static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
@@ -153,6 +158,11 @@ static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
153 158
154static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 159static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155{ } 160{ }
161
162static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
163{
164 return true;
165}
156#endif /* CONFIG_SECURITY_NETWORK */ 166#endif /* CONFIG_SECURITY_NETWORK */
157 167
158/* 168/*
@@ -518,6 +528,11 @@ static int unix_ioctl(struct socket *, unsigned int, unsigned long);
518static int unix_shutdown(struct socket *, int); 528static int unix_shutdown(struct socket *, int);
519static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 529static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
520static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 530static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
531static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
532 size_t size, int flags);
533static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
534 struct pipe_inode_info *, size_t size,
535 unsigned int flags);
521static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 536static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
522static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 537static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
523static int unix_dgram_connect(struct socket *, struct sockaddr *, 538static int unix_dgram_connect(struct socket *, struct sockaddr *,
@@ -558,7 +573,8 @@ static const struct proto_ops unix_stream_ops = {
558 .sendmsg = unix_stream_sendmsg, 573 .sendmsg = unix_stream_sendmsg,
559 .recvmsg = unix_stream_recvmsg, 574 .recvmsg = unix_stream_recvmsg,
560 .mmap = sock_no_mmap, 575 .mmap = sock_no_mmap,
561 .sendpage = sock_no_sendpage, 576 .sendpage = unix_stream_sendpage,
577 .splice_read = unix_stream_splice_read,
562 .set_peek_off = unix_set_peek_off, 578 .set_peek_off = unix_set_peek_off,
563}; 579};
564 580
@@ -620,7 +636,7 @@ static struct proto unix_proto = {
620 */ 636 */
621static struct lock_class_key af_unix_sk_receive_queue_lock_key; 637static struct lock_class_key af_unix_sk_receive_queue_lock_key;
622 638
623static struct sock *unix_create1(struct net *net, struct socket *sock) 639static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
624{ 640{
625 struct sock *sk = NULL; 641 struct sock *sk = NULL;
626 struct unix_sock *u; 642 struct unix_sock *u;
@@ -629,7 +645,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
629 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) 645 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
630 goto out; 646 goto out;
631 647
632 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto); 648 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
633 if (!sk) 649 if (!sk)
634 goto out; 650 goto out;
635 651
@@ -688,7 +704,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
688 return -ESOCKTNOSUPPORT; 704 return -ESOCKTNOSUPPORT;
689 } 705 }
690 706
691 return unix_create1(net, sock) ? 0 : -ENOMEM; 707 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
692} 708}
693 709
694static int unix_release(struct socket *sock) 710static int unix_release(struct socket *sock)
@@ -1088,7 +1104,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1088 err = -ENOMEM; 1104 err = -ENOMEM;
1089 1105
1090 /* create new sock for complete connection */ 1106 /* create new sock for complete connection */
1091 newsk = unix_create1(sock_net(sk), NULL); 1107 newsk = unix_create1(sock_net(sk), NULL, 0);
1092 if (newsk == NULL) 1108 if (newsk == NULL)
1093 goto out; 1109 goto out;
1094 1110
@@ -1408,6 +1424,7 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
1408 UNIXCB(skb).uid = scm->creds.uid; 1424 UNIXCB(skb).uid = scm->creds.uid;
1409 UNIXCB(skb).gid = scm->creds.gid; 1425 UNIXCB(skb).gid = scm->creds.gid;
1410 UNIXCB(skb).fp = NULL; 1426 UNIXCB(skb).fp = NULL;
1427 unix_get_secdata(scm, skb);
1411 if (scm->fp && send_fds) 1428 if (scm->fp && send_fds)
1412 err = unix_attach_fds(scm, skb); 1429 err = unix_attach_fds(scm, skb);
1413 1430
@@ -1503,7 +1520,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1503 if (err < 0) 1520 if (err < 0)
1504 goto out_free; 1521 goto out_free;
1505 max_level = err + 1; 1522 max_level = err + 1;
1506 unix_get_secdata(&scm, skb);
1507 1523
1508 skb_put(skb, len - data_len); 1524 skb_put(skb, len - data_len);
1509 skb->data_len = data_len; 1525 skb->data_len = data_len;
@@ -1720,6 +1736,101 @@ out_err:
1720 return sent ? : err; 1736 return sent ? : err;
1721} 1737}
1722 1738
1739static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1740 int offset, size_t size, int flags)
1741{
1742 int err = 0;
1743 bool send_sigpipe = true;
1744 struct sock *other, *sk = socket->sk;
1745 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1746
1747 if (flags & MSG_OOB)
1748 return -EOPNOTSUPP;
1749
1750 other = unix_peer(sk);
1751 if (!other || sk->sk_state != TCP_ESTABLISHED)
1752 return -ENOTCONN;
1753
1754 if (false) {
1755alloc_skb:
1756 unix_state_unlock(other);
1757 mutex_unlock(&unix_sk(other)->readlock);
1758 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1759 &err, 0);
1760 if (!newskb)
1761 return err;
1762 }
1763
1764 /* we must acquire readlock as we modify already present
1765 * skbs in the sk_receive_queue and mess with skb->len
1766 */
1767 err = mutex_lock_interruptible(&unix_sk(other)->readlock);
1768 if (err) {
1769 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1770 send_sigpipe = false;
1771 goto err;
1772 }
1773
1774 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1775 err = -EPIPE;
1776 goto err_unlock;
1777 }
1778
1779 unix_state_lock(other);
1780
1781 if (sock_flag(other, SOCK_DEAD) ||
1782 other->sk_shutdown & RCV_SHUTDOWN) {
1783 err = -EPIPE;
1784 goto err_state_unlock;
1785 }
1786
1787 skb = skb_peek_tail(&other->sk_receive_queue);
1788 if (tail && tail == skb) {
1789 skb = newskb;
1790 } else if (!skb) {
1791 if (newskb)
1792 skb = newskb;
1793 else
1794 goto alloc_skb;
1795 } else if (newskb) {
1796 /* this is fast path, we don't necessarily need to
1797 * call to kfree_skb even though with newskb == NULL
1798 * this - does no harm
1799 */
1800 consume_skb(newskb);
1801 }
1802
1803 if (skb_append_pagefrags(skb, page, offset, size)) {
1804 tail = skb;
1805 goto alloc_skb;
1806 }
1807
1808 skb->len += size;
1809 skb->data_len += size;
1810 skb->truesize += size;
1811 atomic_add(size, &sk->sk_wmem_alloc);
1812
1813 if (newskb)
1814 __skb_queue_tail(&other->sk_receive_queue, newskb);
1815
1816 unix_state_unlock(other);
1817 mutex_unlock(&unix_sk(other)->readlock);
1818
1819 other->sk_data_ready(other);
1820
1821 return size;
1822
1823err_state_unlock:
1824 unix_state_unlock(other);
1825err_unlock:
1826 mutex_unlock(&unix_sk(other)->readlock);
1827err:
1828 kfree_skb(newskb);
1829 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
1830 send_sig(SIGPIPE, current, 0);
1831 return err;
1832}
1833
1723static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 1834static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
1724 size_t len) 1835 size_t len)
1725{ 1836{
@@ -1860,8 +1971,9 @@ out:
1860 * Sleep until more data has arrived. But check for races.. 1971 * Sleep until more data has arrived. But check for races..
1861 */ 1972 */
1862static long unix_stream_data_wait(struct sock *sk, long timeo, 1973static long unix_stream_data_wait(struct sock *sk, long timeo,
1863 struct sk_buff *last) 1974 struct sk_buff *last, unsigned int last_len)
1864{ 1975{
1976 struct sk_buff *tail;
1865 DEFINE_WAIT(wait); 1977 DEFINE_WAIT(wait);
1866 1978
1867 unix_state_lock(sk); 1979 unix_state_lock(sk);
@@ -1869,7 +1981,9 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
1869 for (;;) { 1981 for (;;) {
1870 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1982 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1871 1983
1872 if (skb_peek_tail(&sk->sk_receive_queue) != last || 1984 tail = skb_peek_tail(&sk->sk_receive_queue);
1985 if (tail != last ||
1986 (tail && tail->len != last_len) ||
1873 sk->sk_err || 1987 sk->sk_err ||
1874 (sk->sk_shutdown & RCV_SHUTDOWN) || 1988 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1875 signal_pending(current) || 1989 signal_pending(current) ||
@@ -1897,38 +2011,50 @@ static unsigned int unix_skb_len(const struct sk_buff *skb)
1897 return skb->len - UNIXCB(skb).consumed; 2011 return skb->len - UNIXCB(skb).consumed;
1898} 2012}
1899 2013
1900static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2014struct unix_stream_read_state {
1901 size_t size, int flags) 2015 int (*recv_actor)(struct sk_buff *, int, int,
2016 struct unix_stream_read_state *);
2017 struct socket *socket;
2018 struct msghdr *msg;
2019 struct pipe_inode_info *pipe;
2020 size_t size;
2021 int flags;
2022 unsigned int splice_flags;
2023};
2024
2025static int unix_stream_read_generic(struct unix_stream_read_state *state)
1902{ 2026{
1903 struct scm_cookie scm; 2027 struct scm_cookie scm;
2028 struct socket *sock = state->socket;
1904 struct sock *sk = sock->sk; 2029 struct sock *sk = sock->sk;
1905 struct unix_sock *u = unix_sk(sk); 2030 struct unix_sock *u = unix_sk(sk);
1906 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1907 int copied = 0; 2031 int copied = 0;
2032 int flags = state->flags;
1908 int noblock = flags & MSG_DONTWAIT; 2033 int noblock = flags & MSG_DONTWAIT;
1909 int check_creds = 0; 2034 bool check_creds = false;
1910 int target; 2035 int target;
1911 int err = 0; 2036 int err = 0;
1912 long timeo; 2037 long timeo;
1913 int skip; 2038 int skip;
2039 size_t size = state->size;
2040 unsigned int last_len;
1914 2041
1915 err = -EINVAL; 2042 err = -EINVAL;
1916 if (sk->sk_state != TCP_ESTABLISHED) 2043 if (sk->sk_state != TCP_ESTABLISHED)
1917 goto out; 2044 goto out;
1918 2045
1919 err = -EOPNOTSUPP; 2046 err = -EOPNOTSUPP;
1920 if (flags&MSG_OOB) 2047 if (flags & MSG_OOB)
1921 goto out; 2048 goto out;
1922 2049
1923 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); 2050 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
1924 timeo = sock_rcvtimeo(sk, noblock); 2051 timeo = sock_rcvtimeo(sk, noblock);
1925 2052
2053 memset(&scm, 0, sizeof(scm));
2054
1926 /* Lock the socket to prevent queue disordering 2055 /* Lock the socket to prevent queue disordering
1927 * while sleeps in memcpy_tomsg 2056 * while sleeps in memcpy_tomsg
1928 */ 2057 */
1929
1930 memset(&scm, 0, sizeof(scm));
1931
1932 err = mutex_lock_interruptible(&u->readlock); 2058 err = mutex_lock_interruptible(&u->readlock);
1933 if (unlikely(err)) { 2059 if (unlikely(err)) {
1934 /* recvmsg() in non blocking mode is supposed to return -EAGAIN 2060 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
@@ -1948,6 +2074,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
1948 goto unlock; 2074 goto unlock;
1949 } 2075 }
1950 last = skb = skb_peek(&sk->sk_receive_queue); 2076 last = skb = skb_peek(&sk->sk_receive_queue);
2077 last_len = last ? last->len : 0;
1951again: 2078again:
1952 if (skb == NULL) { 2079 if (skb == NULL) {
1953 unix_sk(sk)->recursion_level = 0; 2080 unix_sk(sk)->recursion_level = 0;
@@ -1970,16 +2097,17 @@ again:
1970 break; 2097 break;
1971 mutex_unlock(&u->readlock); 2098 mutex_unlock(&u->readlock);
1972 2099
1973 timeo = unix_stream_data_wait(sk, timeo, last); 2100 timeo = unix_stream_data_wait(sk, timeo, last,
2101 last_len);
1974 2102
1975 if (signal_pending(current) 2103 if (signal_pending(current) ||
1976 || mutex_lock_interruptible(&u->readlock)) { 2104 mutex_lock_interruptible(&u->readlock)) {
1977 err = sock_intr_errno(timeo); 2105 err = sock_intr_errno(timeo);
1978 goto out; 2106 goto out;
1979 } 2107 }
1980 2108
1981 continue; 2109 continue;
1982 unlock: 2110unlock:
1983 unix_state_unlock(sk); 2111 unix_state_unlock(sk);
1984 break; 2112 break;
1985 } 2113 }
@@ -1988,6 +2116,7 @@ again:
1988 while (skip >= unix_skb_len(skb)) { 2116 while (skip >= unix_skb_len(skb)) {
1989 skip -= unix_skb_len(skb); 2117 skip -= unix_skb_len(skb);
1990 last = skb; 2118 last = skb;
2119 last_len = skb->len;
1991 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2120 skb = skb_peek_next(skb, &sk->sk_receive_queue);
1992 if (!skb) 2121 if (!skb)
1993 goto again; 2122 goto again;
@@ -1999,23 +2128,27 @@ again:
1999 /* Never glue messages from different writers */ 2128 /* Never glue messages from different writers */
2000 if ((UNIXCB(skb).pid != scm.pid) || 2129 if ((UNIXCB(skb).pid != scm.pid) ||
2001 !uid_eq(UNIXCB(skb).uid, scm.creds.uid) || 2130 !uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
2002 !gid_eq(UNIXCB(skb).gid, scm.creds.gid)) 2131 !gid_eq(UNIXCB(skb).gid, scm.creds.gid) ||
2132 !unix_secdata_eq(&scm, skb))
2003 break; 2133 break;
2004 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 2134 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2005 /* Copy credentials */ 2135 /* Copy credentials */
2006 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2136 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2007 check_creds = 1; 2137 unix_set_secdata(&scm, skb);
2138 check_creds = true;
2008 } 2139 }
2009 2140
2010 /* Copy address just once */ 2141 /* Copy address just once */
2011 if (sunaddr) { 2142 if (state->msg && state->msg->msg_name) {
2012 unix_copy_addr(msg, skb->sk); 2143 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2144 state->msg->msg_name);
2145 unix_copy_addr(state->msg, skb->sk);
2013 sunaddr = NULL; 2146 sunaddr = NULL;
2014 } 2147 }
2015 2148
2016 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2149 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2017 if (skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2150 chunk = state->recv_actor(skb, skip, chunk, state);
2018 msg, chunk)) { 2151 if (chunk < 0) {
2019 if (copied == 0) 2152 if (copied == 0)
2020 copied = -EFAULT; 2153 copied = -EFAULT;
2021 break; 2154 break;
@@ -2053,11 +2186,85 @@ again:
2053 } while (size); 2186 } while (size);
2054 2187
2055 mutex_unlock(&u->readlock); 2188 mutex_unlock(&u->readlock);
2056 scm_recv(sock, msg, &scm, flags); 2189 if (state->msg)
2190 scm_recv(sock, state->msg, &scm, flags);
2191 else
2192 scm_destroy(&scm);
2057out: 2193out:
2058 return copied ? : err; 2194 return copied ? : err;
2059} 2195}
2060 2196
2197static int unix_stream_read_actor(struct sk_buff *skb,
2198 int skip, int chunk,
2199 struct unix_stream_read_state *state)
2200{
2201 int ret;
2202
2203 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2204 state->msg, chunk);
2205 return ret ?: chunk;
2206}
2207
2208static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2209 size_t size, int flags)
2210{
2211 struct unix_stream_read_state state = {
2212 .recv_actor = unix_stream_read_actor,
2213 .socket = sock,
2214 .msg = msg,
2215 .size = size,
2216 .flags = flags
2217 };
2218
2219 return unix_stream_read_generic(&state);
2220}
2221
2222static ssize_t skb_unix_socket_splice(struct sock *sk,
2223 struct pipe_inode_info *pipe,
2224 struct splice_pipe_desc *spd)
2225{
2226 int ret;
2227 struct unix_sock *u = unix_sk(sk);
2228
2229 mutex_unlock(&u->readlock);
2230 ret = splice_to_pipe(pipe, spd);
2231 mutex_lock(&u->readlock);
2232
2233 return ret;
2234}
2235
2236static int unix_stream_splice_actor(struct sk_buff *skb,
2237 int skip, int chunk,
2238 struct unix_stream_read_state *state)
2239{
2240 return skb_splice_bits(skb, state->socket->sk,
2241 UNIXCB(skb).consumed + skip,
2242 state->pipe, chunk, state->splice_flags,
2243 skb_unix_socket_splice);
2244}
2245
2246static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2247 struct pipe_inode_info *pipe,
2248 size_t size, unsigned int flags)
2249{
2250 struct unix_stream_read_state state = {
2251 .recv_actor = unix_stream_splice_actor,
2252 .socket = sock,
2253 .pipe = pipe,
2254 .size = size,
2255 .splice_flags = flags,
2256 };
2257
2258 if (unlikely(*ppos))
2259 return -ESPIPE;
2260
2261 if (sock->file->f_flags & O_NONBLOCK ||
2262 flags & SPLICE_F_NONBLOCK)
2263 state.flags = MSG_DONTWAIT;
2264
2265 return unix_stream_read_generic(&state);
2266}
2267
2061static int unix_shutdown(struct socket *sock, int mode) 2268static int unix_shutdown(struct socket *sock, int mode)
2062{ 2269{
2063 struct sock *sk = sock->sk; 2270 struct sock *sk = sock->sk;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 2ec86e652a19..df5fc6b340f1 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -581,13 +581,14 @@ struct sock *__vsock_create(struct net *net,
581 struct socket *sock, 581 struct socket *sock,
582 struct sock *parent, 582 struct sock *parent,
583 gfp_t priority, 583 gfp_t priority,
584 unsigned short type) 584 unsigned short type,
585 int kern)
585{ 586{
586 struct sock *sk; 587 struct sock *sk;
587 struct vsock_sock *psk; 588 struct vsock_sock *psk;
588 struct vsock_sock *vsk; 589 struct vsock_sock *vsk;
589 590
590 sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto); 591 sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto, kern);
591 if (!sk) 592 if (!sk)
592 return NULL; 593 return NULL;
593 594
@@ -1866,7 +1867,7 @@ static int vsock_create(struct net *net, struct socket *sock,
1866 1867
1867 sock->state = SS_UNCONNECTED; 1868 sock->state = SS_UNCONNECTED;
1868 1869
1869 return __vsock_create(net, sock, NULL, GFP_KERNEL, 0) ? 0 : -ENOMEM; 1870 return __vsock_create(net, sock, NULL, GFP_KERNEL, 0, kern) ? 0 : -ENOMEM;
1870} 1871}
1871 1872
1872static const struct net_proto_family vsock_family_ops = { 1873static const struct net_proto_family vsock_family_ops = {
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index c294da095461..1f63daff3965 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1022,7 +1022,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
1022 } 1022 }
1023 1023
1024 pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, 1024 pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
1025 sk->sk_type); 1025 sk->sk_type, 0);
1026 if (!pending) { 1026 if (!pending) {
1027 vmci_transport_send_reset(sk, pkt); 1027 vmci_transport_send_reset(sk, pkt);
1028 return -ENOMEM; 1028 return -ENOMEM;
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 7aaf7415dc4c..915b328b9ac5 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -698,19 +698,20 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
698EXPORT_SYMBOL(cfg80211_chandef_usable); 698EXPORT_SYMBOL(cfg80211_chandef_usable);
699 699
700/* 700/*
701 * For GO only, check if the channel can be used under permissive conditions 701 * Check if the channel can be used under permissive conditions mandated by
702 * mandated by the some regulatory bodies, i.e., the channel is marked with 702 * some regulatory bodies, i.e., the channel is marked with
703 * IEEE80211_CHAN_GO_CONCURRENT and there is an additional station interface 703 * IEEE80211_CHAN_IR_CONCURRENT and there is an additional station interface
704 * associated to an AP on the same channel or on the same UNII band 704 * associated to an AP on the same channel or on the same UNII band
705 * (assuming that the AP is an authorized master). 705 * (assuming that the AP is an authorized master).
706 * In addition allow the GO to operate on a channel on which indoor operation is 706 * In addition allow operation on a channel on which indoor operation is
707 * allowed, iff we are currently operating in an indoor environment. 707 * allowed, iff we are currently operating in an indoor environment.
708 */ 708 */
709static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev, 709static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy,
710 enum nl80211_iftype iftype,
710 struct ieee80211_channel *chan) 711 struct ieee80211_channel *chan)
711{ 712{
712 struct wireless_dev *wdev_iter; 713 struct wireless_dev *wdev;
713 struct wiphy *wiphy = wiphy_idx_to_wiphy(rdev->wiphy_idx); 714 struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
714 715
715 ASSERT_RTNL(); 716 ASSERT_RTNL();
716 717
@@ -718,32 +719,48 @@ static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev,
718 !(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR)) 719 !(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR))
719 return false; 720 return false;
720 721
722 /* only valid for GO and TDLS off-channel (station/p2p-CL) */
723 if (iftype != NL80211_IFTYPE_P2P_GO &&
724 iftype != NL80211_IFTYPE_STATION &&
725 iftype != NL80211_IFTYPE_P2P_CLIENT)
726 return false;
727
721 if (regulatory_indoor_allowed() && 728 if (regulatory_indoor_allowed() &&
722 (chan->flags & IEEE80211_CHAN_INDOOR_ONLY)) 729 (chan->flags & IEEE80211_CHAN_INDOOR_ONLY))
723 return true; 730 return true;
724 731
725 if (!(chan->flags & IEEE80211_CHAN_GO_CONCURRENT)) 732 if (!(chan->flags & IEEE80211_CHAN_IR_CONCURRENT))
726 return false; 733 return false;
727 734
728 /* 735 /*
729 * Generally, it is possible to rely on another device/driver to allow 736 * Generally, it is possible to rely on another device/driver to allow
730 * the GO concurrent relaxation, however, since the device can further 737 * the IR concurrent relaxation, however, since the device can further
731 * enforce the relaxation (by doing a similar verifications as this), 738 * enforce the relaxation (by doing a similar verifications as this),
732 * and thus fail the GO instantiation, consider only the interfaces of 739 * and thus fail the GO instantiation, consider only the interfaces of
733 * the current registered device. 740 * the current registered device.
734 */ 741 */
735 list_for_each_entry(wdev_iter, &rdev->wdev_list, list) { 742 list_for_each_entry(wdev, &rdev->wdev_list, list) {
736 struct ieee80211_channel *other_chan = NULL; 743 struct ieee80211_channel *other_chan = NULL;
737 int r1, r2; 744 int r1, r2;
738 745
739 if (wdev_iter->iftype != NL80211_IFTYPE_STATION || 746 wdev_lock(wdev);
740 !netif_running(wdev_iter->netdev)) 747 if (wdev->iftype == NL80211_IFTYPE_STATION &&
741 continue; 748 wdev->current_bss)
742 749 other_chan = wdev->current_bss->pub.channel;
743 wdev_lock(wdev_iter); 750
744 if (wdev_iter->current_bss) 751 /*
745 other_chan = wdev_iter->current_bss->pub.channel; 752 * If a GO already operates on the same GO_CONCURRENT channel,
746 wdev_unlock(wdev_iter); 753 * this one (maybe the same one) can beacon as well. We allow
754 * the operation even if the station we relied on with
755 * GO_CONCURRENT is disconnected now. But then we must make sure
756 * we're not outdoor on an indoor-only channel.
757 */
758 if (iftype == NL80211_IFTYPE_P2P_GO &&
759 wdev->iftype == NL80211_IFTYPE_P2P_GO &&
760 wdev->beacon_interval &&
761 !(chan->flags & IEEE80211_CHAN_INDOOR_ONLY))
762 other_chan = wdev->chandef.chan;
763 wdev_unlock(wdev);
747 764
748 if (!other_chan) 765 if (!other_chan)
749 continue; 766 continue;
@@ -784,7 +801,6 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
784 struct cfg80211_chan_def *chandef, 801 struct cfg80211_chan_def *chandef,
785 enum nl80211_iftype iftype) 802 enum nl80211_iftype iftype)
786{ 803{
787 struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
788 bool res; 804 bool res;
789 u32 prohibited_flags = IEEE80211_CHAN_DISABLED | 805 u32 prohibited_flags = IEEE80211_CHAN_DISABLED |
790 IEEE80211_CHAN_RADAR; 806 IEEE80211_CHAN_RADAR;
@@ -792,13 +808,12 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
792 trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype); 808 trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype);
793 809
794 /* 810 /*
795 * Under certain conditions suggested by the some regulatory bodies 811 * Under certain conditions suggested by some regulatory bodies a
796 * a GO can operate on channels marked with IEEE80211_NO_IR 812 * GO/STA can IR on channels marked with IEEE80211_NO_IR. Set this flag
797 * so set this flag only if such relaxations are not enabled and 813 * only if such relaxations are not enabled and the conditions are not
798 * the conditions are not met. 814 * met.
799 */ 815 */
800 if (iftype != NL80211_IFTYPE_P2P_GO || 816 if (!cfg80211_ir_permissive_chan(wiphy, iftype, chandef->chan))
801 !cfg80211_go_permissive_chan(rdev, chandef->chan))
802 prohibited_flags |= IEEE80211_CHAN_NO_IR; 817 prohibited_flags |= IEEE80211_CHAN_NO_IR;
803 818
804 if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 && 819 if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 &&
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 801cd49c5a0c..311eef26bf88 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -222,6 +222,7 @@ struct cfg80211_event {
222 const u8 *ie; 222 const u8 *ie;
223 size_t ie_len; 223 size_t ie_len;
224 u16 reason; 224 u16 reason;
225 bool locally_generated;
225 } dc; 226 } dc;
226 struct { 227 struct {
227 u8 bssid[ETH_ALEN]; 228 u8 bssid[ETH_ALEN];
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index dd78445c7d50..c264effd00a6 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -639,8 +639,8 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
639 if ((chan->flags & IEEE80211_CHAN_INDOOR_ONLY) && 639 if ((chan->flags & IEEE80211_CHAN_INDOOR_ONLY) &&
640 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_INDOOR_ONLY)) 640 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_INDOOR_ONLY))
641 goto nla_put_failure; 641 goto nla_put_failure;
642 if ((chan->flags & IEEE80211_CHAN_GO_CONCURRENT) && 642 if ((chan->flags & IEEE80211_CHAN_IR_CONCURRENT) &&
643 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_GO_CONCURRENT)) 643 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_IR_CONCURRENT))
644 goto nla_put_failure; 644 goto nla_put_failure;
645 if ((chan->flags & IEEE80211_CHAN_NO_20MHZ) && 645 if ((chan->flags & IEEE80211_CHAN_NO_20MHZ) &&
646 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_20MHZ)) 646 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_20MHZ))
@@ -4061,7 +4061,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
4061 return -EINVAL; 4061 return -EINVAL;
4062 break; 4062 break;
4063 case CFG80211_STA_MESH_PEER_USER: 4063 case CFG80211_STA_MESH_PEER_USER:
4064 if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION) 4064 if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION &&
4065 params->plink_action != NL80211_PLINK_ACTION_BLOCK)
4065 return -EINVAL; 4066 return -EINVAL;
4066 break; 4067 break;
4067 } 4068 }
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 0e347f888fe9..d359e0610198 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -989,8 +989,8 @@ static u32 map_regdom_flags(u32 rd_flags)
989 channel_flags |= IEEE80211_CHAN_NO_OFDM; 989 channel_flags |= IEEE80211_CHAN_NO_OFDM;
990 if (rd_flags & NL80211_RRF_NO_OUTDOOR) 990 if (rd_flags & NL80211_RRF_NO_OUTDOOR)
991 channel_flags |= IEEE80211_CHAN_INDOOR_ONLY; 991 channel_flags |= IEEE80211_CHAN_INDOOR_ONLY;
992 if (rd_flags & NL80211_RRF_GO_CONCURRENT) 992 if (rd_flags & NL80211_RRF_IR_CONCURRENT)
993 channel_flags |= IEEE80211_CHAN_GO_CONCURRENT; 993 channel_flags |= IEEE80211_CHAN_IR_CONCURRENT;
994 if (rd_flags & NL80211_RRF_NO_HT40MINUS) 994 if (rd_flags & NL80211_RRF_NO_HT40MINUS)
995 channel_flags |= IEEE80211_CHAN_NO_HT40MINUS; 995 channel_flags |= IEEE80211_CHAN_NO_HT40MINUS;
996 if (rd_flags & NL80211_RRF_NO_HT40PLUS) 996 if (rd_flags & NL80211_RRF_NO_HT40PLUS)
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index d11454f87bac..8020b5b094d4 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -938,7 +938,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
938} 938}
939 939
940void cfg80211_disconnected(struct net_device *dev, u16 reason, 940void cfg80211_disconnected(struct net_device *dev, u16 reason,
941 const u8 *ie, size_t ie_len, gfp_t gfp) 941 const u8 *ie, size_t ie_len,
942 bool locally_generated, gfp_t gfp)
942{ 943{
943 struct wireless_dev *wdev = dev->ieee80211_ptr; 944 struct wireless_dev *wdev = dev->ieee80211_ptr;
944 struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); 945 struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
@@ -954,6 +955,7 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason,
954 ev->dc.ie_len = ie_len; 955 ev->dc.ie_len = ie_len;
955 memcpy((void *)ev->dc.ie, ie, ie_len); 956 memcpy((void *)ev->dc.ie, ie, ie_len);
956 ev->dc.reason = reason; 957 ev->dc.reason = reason;
958 ev->dc.locally_generated = locally_generated;
957 959
958 spin_lock_irqsave(&wdev->event_lock, flags); 960 spin_lock_irqsave(&wdev->event_lock, flags);
959 list_add_tail(&ev->list, &wdev->event_list); 961 list_add_tail(&ev->list, &wdev->event_list);
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 9ee6bc1a7610..9cee0220665d 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -86,7 +86,7 @@ static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env)
86 return 0; 86 return 0;
87} 87}
88 88
89#ifdef CONFIG_PM 89#ifdef CONFIG_PM_SLEEP
90static void cfg80211_leave_all(struct cfg80211_registered_device *rdev) 90static void cfg80211_leave_all(struct cfg80211_registered_device *rdev)
91{ 91{
92 struct wireless_dev *wdev; 92 struct wireless_dev *wdev;
@@ -95,7 +95,7 @@ static void cfg80211_leave_all(struct cfg80211_registered_device *rdev)
95 cfg80211_leave(rdev, wdev); 95 cfg80211_leave(rdev, wdev);
96} 96}
97 97
98static int wiphy_suspend(struct device *dev, pm_message_t state) 98static int wiphy_suspend(struct device *dev)
99{ 99{
100 struct cfg80211_registered_device *rdev = dev_to_rdev(dev); 100 struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
101 int ret = 0; 101 int ret = 0;
@@ -136,6 +136,11 @@ static int wiphy_resume(struct device *dev)
136 136
137 return ret; 137 return ret;
138} 138}
139
140static SIMPLE_DEV_PM_OPS(wiphy_pm_ops, wiphy_suspend, wiphy_resume);
141#define WIPHY_PM_OPS (&wiphy_pm_ops)
142#else
143#define WIPHY_PM_OPS NULL
139#endif 144#endif
140 145
141static const void *wiphy_namespace(struct device *d) 146static const void *wiphy_namespace(struct device *d)
@@ -151,10 +156,7 @@ struct class ieee80211_class = {
151 .dev_release = wiphy_dev_release, 156 .dev_release = wiphy_dev_release,
152 .dev_groups = ieee80211_groups, 157 .dev_groups = ieee80211_groups,
153 .dev_uevent = wiphy_uevent, 158 .dev_uevent = wiphy_uevent,
154#ifdef CONFIG_PM 159 .pm = WIPHY_PM_OPS,
155 .suspend = wiphy_suspend,
156 .resume = wiphy_resume,
157#endif
158 .ns_type = &net_ns_type_operations, 160 .ns_type = &net_ns_type_operations,
159 .namespace = wiphy_namespace, 161 .namespace = wiphy_namespace,
160}; 162};
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 70051ab52f4f..baf7218cec15 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -887,7 +887,8 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev)
887 case EVENT_DISCONNECTED: 887 case EVENT_DISCONNECTED:
888 __cfg80211_disconnected(wdev->netdev, 888 __cfg80211_disconnected(wdev->netdev,
889 ev->dc.ie, ev->dc.ie_len, 889 ev->dc.ie, ev->dc.ie_len,
890 ev->dc.reason, true); 890 ev->dc.reason,
891 !ev->dc.locally_generated);
891 break; 892 break;
892 case EVENT_IBSS_JOINED: 893 case EVENT_IBSS_JOINED:
893 __cfg80211_ibss_joined(wdev->netdev, ev->ij.bssid, 894 __cfg80211_ibss_joined(wdev->netdev, ev->ij.bssid,
@@ -944,7 +945,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
944 ntype == NL80211_IFTYPE_P2P_CLIENT)) 945 ntype == NL80211_IFTYPE_P2P_CLIENT))
945 return -EBUSY; 946 return -EBUSY;
946 947
947 if (ntype != otype && netif_running(dev)) { 948 if (ntype != otype) {
948 dev->ieee80211_ptr->use_4addr = false; 949 dev->ieee80211_ptr->use_4addr = false;
949 dev->ieee80211_ptr->mesh_id_up_len = 0; 950 dev->ieee80211_ptr->mesh_id_up_len = 0;
950 wdev_lock(dev->ieee80211_ptr); 951 wdev_lock(dev->ieee80211_ptr);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index c3ab230e4493..a750f330b8dd 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -515,10 +515,10 @@ static struct proto x25_proto = {
515 .obj_size = sizeof(struct x25_sock), 515 .obj_size = sizeof(struct x25_sock),
516}; 516};
517 517
518static struct sock *x25_alloc_socket(struct net *net) 518static struct sock *x25_alloc_socket(struct net *net, int kern)
519{ 519{
520 struct x25_sock *x25; 520 struct x25_sock *x25;
521 struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto); 521 struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto, kern);
522 522
523 if (!sk) 523 if (!sk)
524 goto out; 524 goto out;
@@ -553,7 +553,7 @@ static int x25_create(struct net *net, struct socket *sock, int protocol,
553 goto out; 553 goto out;
554 554
555 rc = -ENOBUFS; 555 rc = -ENOBUFS;
556 if ((sk = x25_alloc_socket(net)) == NULL) 556 if ((sk = x25_alloc_socket(net, kern)) == NULL)
557 goto out; 557 goto out;
558 558
559 x25 = x25_sk(sk); 559 x25 = x25_sk(sk);
@@ -602,7 +602,7 @@ static struct sock *x25_make_new(struct sock *osk)
602 if (osk->sk_type != SOCK_SEQPACKET) 602 if (osk->sk_type != SOCK_SEQPACKET)
603 goto out; 603 goto out;
604 604
605 if ((sk = x25_alloc_socket(sock_net(osk))) == NULL) 605 if ((sk = x25_alloc_socket(sock_net(osk), 0)) == NULL)
606 goto out; 606 goto out;
607 607
608 x25 = x25_sk(sk); 608 x25 = x25_sk(sk);
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 12e82a5e4ad5..42f7c76cf853 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -31,6 +31,7 @@ static struct xfrm_algo_desc aead_list[] = {
31 31
32 .uinfo = { 32 .uinfo = {
33 .aead = { 33 .aead = {
34 .geniv = "seqniv",
34 .icv_truncbits = 64, 35 .icv_truncbits = 64,
35 } 36 }
36 }, 37 },
@@ -49,6 +50,7 @@ static struct xfrm_algo_desc aead_list[] = {
49 50
50 .uinfo = { 51 .uinfo = {
51 .aead = { 52 .aead = {
53 .geniv = "seqniv",
52 .icv_truncbits = 96, 54 .icv_truncbits = 96,
53 } 55 }
54 }, 56 },
@@ -67,6 +69,7 @@ static struct xfrm_algo_desc aead_list[] = {
67 69
68 .uinfo = { 70 .uinfo = {
69 .aead = { 71 .aead = {
72 .geniv = "seqniv",
70 .icv_truncbits = 128, 73 .icv_truncbits = 128,
71 } 74 }
72 }, 75 },
@@ -85,6 +88,7 @@ static struct xfrm_algo_desc aead_list[] = {
85 88
86 .uinfo = { 89 .uinfo = {
87 .aead = { 90 .aead = {
91 .geniv = "seqniv",
88 .icv_truncbits = 64, 92 .icv_truncbits = 64,
89 } 93 }
90 }, 94 },
@@ -103,6 +107,7 @@ static struct xfrm_algo_desc aead_list[] = {
103 107
104 .uinfo = { 108 .uinfo = {
105 .aead = { 109 .aead = {
110 .geniv = "seqniv",
106 .icv_truncbits = 96, 111 .icv_truncbits = 96,
107 } 112 }
108 }, 113 },
@@ -121,6 +126,7 @@ static struct xfrm_algo_desc aead_list[] = {
121 126
122 .uinfo = { 127 .uinfo = {
123 .aead = { 128 .aead = {
129 .geniv = "seqniv",
124 .icv_truncbits = 128, 130 .icv_truncbits = 128,
125 } 131 }
126 }, 132 },
@@ -139,6 +145,7 @@ static struct xfrm_algo_desc aead_list[] = {
139 145
140 .uinfo = { 146 .uinfo = {
141 .aead = { 147 .aead = {
148 .geniv = "seqiv",
142 .icv_truncbits = 128, 149 .icv_truncbits = 128,
143 } 150 }
144 }, 151 },
@@ -152,6 +159,18 @@ static struct xfrm_algo_desc aead_list[] = {
152 .sadb_alg_maxbits = 256 159 .sadb_alg_maxbits = 256
153 } 160 }
154}, 161},
162{
163 .name = "rfc7539esp(chacha20,poly1305)",
164
165 .uinfo = {
166 .aead = {
167 .geniv = "seqniv",
168 .icv_truncbits = 128,
169 }
170 },
171
172 .pfkey_supported = 0,
173},
155}; 174};
156 175
157static struct xfrm_algo_desc aalg_list[] = { 176static struct xfrm_algo_desc aalg_list[] = {
@@ -353,6 +372,7 @@ static struct xfrm_algo_desc ealg_list[] = {
353 372
354 .uinfo = { 373 .uinfo = {
355 .encr = { 374 .encr = {
375 .geniv = "echainiv",
356 .blockbits = 64, 376 .blockbits = 64,
357 .defkeybits = 64, 377 .defkeybits = 64,
358 } 378 }
@@ -373,6 +393,7 @@ static struct xfrm_algo_desc ealg_list[] = {
373 393
374 .uinfo = { 394 .uinfo = {
375 .encr = { 395 .encr = {
396 .geniv = "echainiv",
376 .blockbits = 64, 397 .blockbits = 64,
377 .defkeybits = 192, 398 .defkeybits = 192,
378 } 399 }
@@ -393,6 +414,7 @@ static struct xfrm_algo_desc ealg_list[] = {
393 414
394 .uinfo = { 415 .uinfo = {
395 .encr = { 416 .encr = {
417 .geniv = "echainiv",
396 .blockbits = 64, 418 .blockbits = 64,
397 .defkeybits = 128, 419 .defkeybits = 128,
398 } 420 }
@@ -413,6 +435,7 @@ static struct xfrm_algo_desc ealg_list[] = {
413 435
414 .uinfo = { 436 .uinfo = {
415 .encr = { 437 .encr = {
438 .geniv = "echainiv",
416 .blockbits = 64, 439 .blockbits = 64,
417 .defkeybits = 128, 440 .defkeybits = 128,
418 } 441 }
@@ -433,6 +456,7 @@ static struct xfrm_algo_desc ealg_list[] = {
433 456
434 .uinfo = { 457 .uinfo = {
435 .encr = { 458 .encr = {
459 .geniv = "echainiv",
436 .blockbits = 128, 460 .blockbits = 128,
437 .defkeybits = 128, 461 .defkeybits = 128,
438 } 462 }
@@ -453,6 +477,7 @@ static struct xfrm_algo_desc ealg_list[] = {
453 477
454 .uinfo = { 478 .uinfo = {
455 .encr = { 479 .encr = {
480 .geniv = "echainiv",
456 .blockbits = 128, 481 .blockbits = 128,
457 .defkeybits = 128, 482 .defkeybits = 128,
458 } 483 }
@@ -473,6 +498,7 @@ static struct xfrm_algo_desc ealg_list[] = {
473 498
474 .uinfo = { 499 .uinfo = {
475 .encr = { 500 .encr = {
501 .geniv = "echainiv",
476 .blockbits = 128, 502 .blockbits = 128,
477 .defkeybits = 128, 503 .defkeybits = 128,
478 } 504 }
@@ -493,6 +519,7 @@ static struct xfrm_algo_desc ealg_list[] = {
493 519
494 .uinfo = { 520 .uinfo = {
495 .encr = { 521 .encr = {
522 .geniv = "echainiv",
496 .blockbits = 128, 523 .blockbits = 128,
497 .defkeybits = 128, 524 .defkeybits = 128,
498 } 525 }
@@ -512,6 +539,7 @@ static struct xfrm_algo_desc ealg_list[] = {
512 539
513 .uinfo = { 540 .uinfo = {
514 .encr = { 541 .encr = {
542 .geniv = "seqiv",
515 .blockbits = 128, 543 .blockbits = 128,
516 .defkeybits = 160, /* 128-bit key + 32-bit nonce */ 544 .defkeybits = 160, /* 128-bit key + 32-bit nonce */
517 } 545 }
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index b58286ecd156..60ce7014e1b0 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -31,7 +31,7 @@ int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo)
31 return -EAFNOSUPPORT; 31 return -EAFNOSUPPORT;
32 spin_lock_bh(&xfrm_input_afinfo_lock); 32 spin_lock_bh(&xfrm_input_afinfo_lock);
33 if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL)) 33 if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL))
34 err = -ENOBUFS; 34 err = -EEXIST;
35 else 35 else
36 rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo); 36 rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo);
37 spin_unlock_bh(&xfrm_input_afinfo_lock); 37 spin_unlock_bh(&xfrm_input_afinfo_lock);
@@ -254,13 +254,13 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
254 skb->sp->xvec[skb->sp->len++] = x; 254 skb->sp->xvec[skb->sp->len++] = x;
255 255
256 spin_lock(&x->lock); 256 spin_lock(&x->lock);
257 if (unlikely(x->km.state == XFRM_STATE_ACQ)) {
258 XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
259 goto drop_unlock;
260 }
261 257
262 if (unlikely(x->km.state != XFRM_STATE_VALID)) { 258 if (unlikely(x->km.state != XFRM_STATE_VALID)) {
263 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID); 259 if (x->km.state == XFRM_STATE_ACQ)
260 XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
261 else
262 XFRM_INC_STATS(net,
263 LINUX_MIB_XFRMINSTATEINVALID);
264 goto drop_unlock; 264 goto drop_unlock;
265 } 265 }
266 266
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index fbcedbe33190..68ada2ca4b60 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -38,6 +38,18 @@ static int xfrm_skb_check_space(struct sk_buff *skb)
38 return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); 38 return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC);
39} 39}
40 40
41/* Children define the path of the packet through the
42 * Linux networking. Thus, destinations are stackable.
43 */
44
45static struct dst_entry *skb_dst_pop(struct sk_buff *skb)
46{
47 struct dst_entry *child = dst_clone(skb_dst(skb)->child);
48
49 skb_dst_drop(skb);
50 return child;
51}
52
41static int xfrm_output_one(struct sk_buff *skb, int err) 53static int xfrm_output_one(struct sk_buff *skb, int err)
42{ 54{
43 struct dst_entry *dst = skb_dst(skb); 55 struct dst_entry *dst = skb_dst(skb);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 638af0655aaf..18cead7645be 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -315,14 +315,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
315} 315}
316EXPORT_SYMBOL(xfrm_policy_destroy); 316EXPORT_SYMBOL(xfrm_policy_destroy);
317 317
318static void xfrm_queue_purge(struct sk_buff_head *list)
319{
320 struct sk_buff *skb;
321
322 while ((skb = skb_dequeue(list)) != NULL)
323 kfree_skb(skb);
324}
325
326/* Rule must be locked. Release descentant resources, announce 318/* Rule must be locked. Release descentant resources, announce
327 * entry dead. The rule must be unlinked from lists to the moment. 319 * entry dead. The rule must be unlinked from lists to the moment.
328 */ 320 */
@@ -335,7 +327,7 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
335 327
336 if (del_timer(&policy->polq.hold_timer)) 328 if (del_timer(&policy->polq.hold_timer))
337 xfrm_pol_put(policy); 329 xfrm_pol_put(policy);
338 xfrm_queue_purge(&policy->polq.hold_queue); 330 skb_queue_purge(&policy->polq.hold_queue);
339 331
340 if (del_timer(&policy->timer)) 332 if (del_timer(&policy->timer))
341 xfrm_pol_put(policy); 333 xfrm_pol_put(policy);
@@ -708,6 +700,9 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
708 struct xfrm_policy_queue *pq = &old->polq; 700 struct xfrm_policy_queue *pq = &old->polq;
709 struct sk_buff_head list; 701 struct sk_buff_head list;
710 702
703 if (skb_queue_empty(&pq->hold_queue))
704 return;
705
711 __skb_queue_head_init(&list); 706 __skb_queue_head_init(&list);
712 707
713 spin_lock_bh(&pq->hold_queue.lock); 708 spin_lock_bh(&pq->hold_queue.lock);
@@ -716,9 +711,6 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
716 xfrm_pol_put(old); 711 xfrm_pol_put(old);
717 spin_unlock_bh(&pq->hold_queue.lock); 712 spin_unlock_bh(&pq->hold_queue.lock);
718 713
719 if (skb_queue_empty(&list))
720 return;
721
722 pq = &new->polq; 714 pq = &new->polq;
723 715
724 spin_lock_bh(&pq->hold_queue.lock); 716 spin_lock_bh(&pq->hold_queue.lock);
@@ -1012,7 +1004,9 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
1012 if (list_empty(&walk->walk.all)) 1004 if (list_empty(&walk->walk.all))
1013 x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); 1005 x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
1014 else 1006 else
1015 x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); 1007 x = list_first_entry(&walk->walk.all,
1008 struct xfrm_policy_walk_entry, all);
1009
1016 list_for_each_entry_from(x, &net->xfrm.policy_all, all) { 1010 list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
1017 if (x->dead) 1011 if (x->dead)
1018 continue; 1012 continue;
@@ -1120,6 +1114,9 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
1120 } 1114 }
1121 chain = &net->xfrm.policy_inexact[dir]; 1115 chain = &net->xfrm.policy_inexact[dir];
1122 hlist_for_each_entry(pol, chain, bydst) { 1116 hlist_for_each_entry(pol, chain, bydst) {
1117 if ((pol->priority >= priority) && ret)
1118 break;
1119
1123 err = xfrm_policy_match(pol, fl, type, family, dir); 1120 err = xfrm_policy_match(pol, fl, type, family, dir);
1124 if (err) { 1121 if (err) {
1125 if (err == -ESRCH) 1122 if (err == -ESRCH)
@@ -1128,13 +1125,13 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
1128 ret = ERR_PTR(err); 1125 ret = ERR_PTR(err);
1129 goto fail; 1126 goto fail;
1130 } 1127 }
1131 } else if (pol->priority < priority) { 1128 } else {
1132 ret = pol; 1129 ret = pol;
1133 break; 1130 break;
1134 } 1131 }
1135 } 1132 }
1136 if (ret) 1133
1137 xfrm_pol_hold(ret); 1134 xfrm_pol_hold(ret);
1138fail: 1135fail:
1139 read_unlock_bh(&net->xfrm.xfrm_policy_lock); 1136 read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1140 1137
@@ -1955,7 +1952,7 @@ out:
1955 1952
1956purge_queue: 1953purge_queue:
1957 pq->timeout = 0; 1954 pq->timeout = 0;
1958 xfrm_queue_purge(&pq->hold_queue); 1955 skb_queue_purge(&pq->hold_queue);
1959 xfrm_pol_put(pol); 1956 xfrm_pol_put(pol);
1960} 1957}
1961 1958
@@ -2814,7 +2811,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2814 return -EAFNOSUPPORT; 2811 return -EAFNOSUPPORT;
2815 spin_lock(&xfrm_policy_afinfo_lock); 2812 spin_lock(&xfrm_policy_afinfo_lock);
2816 if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) 2813 if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2817 err = -ENOBUFS; 2814 err = -EEXIST;
2818 else { 2815 else {
2819 struct dst_ops *dst_ops = afinfo->dst_ops; 2816 struct dst_ops *dst_ops = afinfo->dst_ops;
2820 if (likely(dst_ops->kmem_cachep == NULL)) 2817 if (likely(dst_ops->kmem_cachep == NULL))
@@ -3209,16 +3206,17 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
3209 } 3206 }
3210 chain = &net->xfrm.policy_inexact[dir]; 3207 chain = &net->xfrm.policy_inexact[dir];
3211 hlist_for_each_entry(pol, chain, bydst) { 3208 hlist_for_each_entry(pol, chain, bydst) {
3209 if ((pol->priority >= priority) && ret)
3210 break;
3211
3212 if (xfrm_migrate_selector_match(sel, &pol->selector) && 3212 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
3213 pol->type == type && 3213 pol->type == type) {
3214 pol->priority < priority) {
3215 ret = pol; 3214 ret = pol;
3216 break; 3215 break;
3217 } 3216 }
3218 } 3217 }
3219 3218
3220 if (ret) 3219 xfrm_pol_hold(ret);
3221 xfrm_pol_hold(ret);
3222 3220
3223 read_unlock_bh(&net->xfrm.xfrm_policy_lock); 3221 read_unlock_bh(&net->xfrm.xfrm_policy_lock);
3224 3222
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 96688cd0f6f1..9895a8c56d8c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1626,7 +1626,7 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
1626 if (list_empty(&walk->all)) 1626 if (list_empty(&walk->all))
1627 x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all); 1627 x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all);
1628 else 1628 else
1629 x = list_entry(&walk->all, struct xfrm_state_walk, all); 1629 x = list_first_entry(&walk->all, struct xfrm_state_walk, all);
1630 list_for_each_entry_from(x, &net->xfrm.state_all, all) { 1630 list_for_each_entry_from(x, &net->xfrm.state_all, all) {
1631 if (x->state == XFRM_STATE_DEAD) 1631 if (x->state == XFRM_STATE_DEAD)
1632 continue; 1632 continue;
@@ -1908,7 +1908,7 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1908 return -EAFNOSUPPORT; 1908 return -EAFNOSUPPORT;
1909 spin_lock_bh(&xfrm_state_afinfo_lock); 1909 spin_lock_bh(&xfrm_state_afinfo_lock);
1910 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) 1910 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1911 err = -ENOBUFS; 1911 err = -EEXIST;
1912 else 1912 else
1913 rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo); 1913 rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo);
1914 spin_unlock_bh(&xfrm_state_afinfo_lock); 1914 spin_unlock_bh(&xfrm_state_afinfo_lock);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 2091664295ba..bd16c6c7e1e7 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -289,6 +289,31 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
289 return 0; 289 return 0;
290} 290}
291 291
292static int attach_crypt(struct xfrm_state *x, struct nlattr *rta)
293{
294 struct xfrm_algo *p, *ualg;
295 struct xfrm_algo_desc *algo;
296
297 if (!rta)
298 return 0;
299
300 ualg = nla_data(rta);
301
302 algo = xfrm_ealg_get_byname(ualg->alg_name, 1);
303 if (!algo)
304 return -ENOSYS;
305 x->props.ealgo = algo->desc.sadb_alg_id;
306
307 p = kmemdup(ualg, xfrm_alg_len(ualg), GFP_KERNEL);
308 if (!p)
309 return -ENOMEM;
310
311 strcpy(p->alg_name, algo->name);
312 x->ealg = p;
313 x->geniv = algo->uinfo.encr.geniv;
314 return 0;
315}
316
292static int attach_auth(struct xfrm_algo_auth **algpp, u8 *props, 317static int attach_auth(struct xfrm_algo_auth **algpp, u8 *props,
293 struct nlattr *rta) 318 struct nlattr *rta)
294{ 319{
@@ -349,8 +374,7 @@ static int attach_auth_trunc(struct xfrm_algo_auth **algpp, u8 *props,
349 return 0; 374 return 0;
350} 375}
351 376
352static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props, 377static int attach_aead(struct xfrm_state *x, struct nlattr *rta)
353 struct nlattr *rta)
354{ 378{
355 struct xfrm_algo_aead *p, *ualg; 379 struct xfrm_algo_aead *p, *ualg;
356 struct xfrm_algo_desc *algo; 380 struct xfrm_algo_desc *algo;
@@ -363,14 +387,15 @@ static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props,
363 algo = xfrm_aead_get_byname(ualg->alg_name, ualg->alg_icv_len, 1); 387 algo = xfrm_aead_get_byname(ualg->alg_name, ualg->alg_icv_len, 1);
364 if (!algo) 388 if (!algo)
365 return -ENOSYS; 389 return -ENOSYS;
366 *props = algo->desc.sadb_alg_id; 390 x->props.ealgo = algo->desc.sadb_alg_id;
367 391
368 p = kmemdup(ualg, aead_len(ualg), GFP_KERNEL); 392 p = kmemdup(ualg, aead_len(ualg), GFP_KERNEL);
369 if (!p) 393 if (!p)
370 return -ENOMEM; 394 return -ENOMEM;
371 395
372 strcpy(p->alg_name, algo->name); 396 strcpy(p->alg_name, algo->name);
373 *algpp = p; 397 x->aead = p;
398 x->geniv = algo->uinfo.aead.geniv;
374 return 0; 399 return 0;
375} 400}
376 401
@@ -515,8 +540,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
515 if (attrs[XFRMA_SA_EXTRA_FLAGS]) 540 if (attrs[XFRMA_SA_EXTRA_FLAGS])
516 x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]); 541 x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
517 542
518 if ((err = attach_aead(&x->aead, &x->props.ealgo, 543 if ((err = attach_aead(x, attrs[XFRMA_ALG_AEAD])))
519 attrs[XFRMA_ALG_AEAD])))
520 goto error; 544 goto error;
521 if ((err = attach_auth_trunc(&x->aalg, &x->props.aalgo, 545 if ((err = attach_auth_trunc(&x->aalg, &x->props.aalgo,
522 attrs[XFRMA_ALG_AUTH_TRUNC]))) 546 attrs[XFRMA_ALG_AUTH_TRUNC])))
@@ -526,9 +550,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
526 attrs[XFRMA_ALG_AUTH]))) 550 attrs[XFRMA_ALG_AUTH])))
527 goto error; 551 goto error;
528 } 552 }
529 if ((err = attach_one_algo(&x->ealg, &x->props.ealgo, 553 if ((err = attach_crypt(x, attrs[XFRMA_ALG_CRYPT])))
530 xfrm_ealg_get_byname,
531 attrs[XFRMA_ALG_CRYPT])))
532 goto error; 554 goto error;
533 if ((err = attach_one_algo(&x->calg, &x->props.calgo, 555 if ((err = attach_one_algo(&x->calg, &x->props.calgo,
534 xfrm_calg_get_byname, 556 xfrm_calg_get_byname,